aboutsummaryrefslogtreecommitdiff
path: root/dbstore
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2022-02-05 02:37:00 -0500
committerPaul Duncan <pabs@pablotron.org>2022-02-05 02:37:00 -0500
commit22fd269069b8e47bc40fbd8681243bdd4bd61a47 (patch)
treee16bbe17d985d1d567f83926fd1b7a417948083a /dbstore
parent56cc399430d127cb628b3abe84bc653d2b0ce59b (diff)
downloadcvez-22fd269069b8e47bc40fbd8681243bdd4bd61a47.tar.bz2
cvez-22fd269069b8e47bc40fbd8681243bdd4bd61a47.zip
mv internal/dbstore dbstore
Diffstat (limited to 'dbstore')
-rw-r--r--dbstore/cpesearchtype_string.go25
-rw-r--r--dbstore/dbstore.go255
-rw-r--r--dbstore/dbstore_test.go335
-rw-r--r--dbstore/sql/cpe-search-all.sql46
-rw-r--r--dbstore/sql/cpe-search-ref.sql47
-rw-r--r--dbstore/sql/cpe-search-title.sql44
-rw-r--r--dbstore/sql/fill-fts.sql10
-rw-r--r--dbstore/sql/init.sql180
-rw-r--r--dbstore/sql/insert-cpe.sql1
-rw-r--r--dbstore/sql/insert-ref.sql1
-rw-r--r--dbstore/sql/insert-title.sql1
-rw-r--r--dbstore/sql/old-cpe-search-all.sql69
-rw-r--r--dbstore/sql/table-exists.sql6
-rw-r--r--dbstore/testdata/sql/fill-fts.sql10
-rw-r--r--dbstore/testdata/sql/init.sql73
-rw-r--r--dbstore/testdata/sql/insert-cpe.sql1
-rw-r--r--dbstore/testdata/sql/insert-ref.sql1
-rw-r--r--dbstore/testdata/sql/insert-title.sql1
-rw-r--r--dbstore/testdata/sql/table-exists.sql6
-rw-r--r--dbstore/testdata/test-0.xml.gzbin0 -> 1359 bytes
20 files changed, 1112 insertions, 0 deletions
diff --git a/dbstore/cpesearchtype_string.go b/dbstore/cpesearchtype_string.go
new file mode 100644
index 0000000..1d4826e
--- /dev/null
+++ b/dbstore/cpesearchtype_string.go
@@ -0,0 +1,25 @@
+// Code generated by "stringer -linecomment -type=CpeSearchType"; DO NOT EDIT.
+
+package dbstore
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[CpeSearchAll-0]
+ _ = x[CpeSearchTitle-1]
+ _ = x[CpeSearchRef-2]
+}
+
+const _CpeSearchType_name = "cpe-search-allcpe-search-titlecpe-search-ref"
+
+var _CpeSearchType_index = [...]uint8{0, 14, 30, 44}
+
+func (i CpeSearchType) String() string {
+ if i >= CpeSearchType(len(_CpeSearchType_index)-1) {
+ return "CpeSearchType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _CpeSearchType_name[_CpeSearchType_index[i]:_CpeSearchType_index[i+1]]
+}
diff --git a/dbstore/dbstore.go b/dbstore/dbstore.go
new file mode 100644
index 0000000..4df9fb9
--- /dev/null
+++ b/dbstore/dbstore.go
@@ -0,0 +1,255 @@
+// database storage
+package dbstore
+
+import (
+ "context"
+ db_sql "database/sql"
+ "embed"
+ "encoding/json"
+ "fmt"
+ _ "github.com/mattn/go-sqlite3"
+ "github.com/pablotron/cvez/cpedict"
+)
+
+//go:embed sql
+var sqlFs embed.FS
+
+type DbStore struct {
+ db *db_sql.DB
+}
+
+// open database
+func Open(path string) (DbStore, error) {
+ var r DbStore
+ // init db
+ if db, err := db_sql.Open("sqlite3", path); err != nil {
+ return r, err
+ } else {
+ r.db = db
+ return r, nil
+ }
+}
+
+// initialized database version
+const initDbVersion = 314159
+
+func (me DbStore) isInitialized(ctx context.Context) (bool, error) {
+ sql := "PRAGMA user_version;"
+ // get version
+ var version int32
+ if err := me.db.QueryRowContext(ctx, sql).Scan(&version); err != nil {
+ return false, err
+ }
+
+ // return result
+ return (version == initDbVersion), nil
+}
+
+// initialize database
+func (me DbStore) Init(ctx context.Context) error {
+ if inited, err := me.isInitialized(ctx); err != nil {
+ return err
+ } else if inited {
+ // already initialized, return success
+ return nil
+ }
+
+ // read init query
+ sql, err := sqlFs.ReadFile("sql/init.sql")
+ if err != nil {
+ return err
+ }
+
+ // exec init query, return result
+ _, err = me.db.ExecContext(ctx, string(sql))
+ return err
+}
+
+// get single query from embedded filesystem
+func getQuery(id string) (string, error) {
+ // read query
+ if data, err := sqlFs.ReadFile(fmt.Sprintf("sql/%s.sql", id)); err != nil {
+ return "", err
+ } else {
+ // return query
+ return string(data), nil
+ }
+}
+
+// return query map
+func getQueries(ids []string) (map[string]string, error) {
+ r := make(map[string]string)
+
+ for _, id := range(ids) {
+ // read query
+ if sql, err := getQuery(id); err != nil {
+ return r, fmt.Errorf("%s: %s", id, err.Error())
+ } else {
+ // save query
+ r[id] = sql
+ }
+ }
+
+ // return success
+ return r, nil
+}
+
+// import CPE dictionary
+func (me DbStore) AddCpeDictionary(ctx context.Context, dict cpedict.Dictionary) error {
+ // lazy-init db
+ if err := me.Init(ctx); err != nil {
+ return err
+ }
+
+ // build query map
+ queryIds := []string { "insert-cpe", "insert-title", "insert-ref" }
+ queries, err := getQueries(queryIds)
+ if err != nil {
+ return err
+ }
+
+ // begin context
+ tx, err := me.db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ // build statements
+ sts := make(map[string]*db_sql.Stmt)
+ for id, sql := range(queries) {
+ if st, err := tx.PrepareContext(ctx, sql); err != nil {
+ return err
+ } else {
+ sts[id] = st
+ defer sts[id].Close()
+ }
+ }
+
+ // add items
+ for _, item := range(dict.Items) {
+ // add cpe
+ rs, err := sts["insert-cpe"].ExecContext(ctx, item.CpeUri, item.Cpe23Item.Name)
+ if err != nil {
+ return err
+ }
+
+ // get last row ID
+ id, err := rs.LastInsertId()
+ if err != nil {
+ return err
+ }
+
+ // add titles
+ for _, title := range(item.Titles) {
+ _, err := sts["insert-title"].ExecContext(ctx, id, title.Lang, title.Text)
+ if err != nil {
+ return err
+ }
+ }
+
+ // add refs
+ for _, ref := range(item.References) {
+ _, err := sts["insert-ref"].ExecContext(ctx, id, ref.Href, ref.Text)
+ if err != nil {
+ return err
+ }
+ }
+ }
+
+ // commit changes, return result
+ return tx.Commit()
+}
+
+//go:generate stringer -linecomment -type=CpeSearchType
+
+// CPE search type
+type CpeSearchType byte
+
+const (
+ CpeSearchAll CpeSearchType = iota //cpe-search-all
+ CpeSearchTitle // cpe-search-title
+ CpeSearchRef // cpe-search-ref
+)
+
+// title search result
+type CpeSearchRow struct {
+ // Database CPE ID
+ CpeId int64 `json:"cpe_id"`
+
+ // v2.3 formatting string
+ Cpe23 string `json:"cpe23"`
+
+ // titles
+ Titles []cpedict.Title `json:"titles"`
+
+ // references
+ Refs []cpedict.Reference `json:"refs"`
+
+ // search result rank
+ Rank float32 `json:"rank"`
+}
+
+// search CPEs
+func (me DbStore) CpeSearch(
+ ctx context.Context,
+ searchType CpeSearchType,
+ s string,
+) ([]CpeSearchRow, error) {
+ var r []CpeSearchRow
+
+ // lazy-init db
+ if err := me.Init(ctx); err != nil {
+ return r, err
+ }
+
+ // get query
+ sql, err := getQuery(searchType.String())
+ if err != nil {
+ return r, err
+ }
+
+ // exec search query
+ rows, err := me.db.QueryContext(ctx, sql, db_sql.Named("q", s))
+ if err != nil {
+ return r, err
+ }
+
+ // walk results
+ for rows.Next() {
+ var sr CpeSearchRow
+ var titles string
+ var refs string
+
+ // get row values
+ err = rows.Scan(&sr.CpeId, &sr.Cpe23, &titles, &refs, &sr.Rank)
+ if err != nil {
+ return r, err
+ }
+
+ // unmarshal titles
+ if err = json.Unmarshal([]byte(titles), &sr.Titles); err != nil {
+ return r, err
+ }
+
+ // unmarshal refs
+ if err = json.Unmarshal([]byte(refs), &sr.Refs); err != nil {
+ return r, err
+ }
+
+ // append to results
+ r = append(r, sr)
+ }
+
+ // close rows
+ if err = rows.Close(); err != nil {
+ return r, err
+ }
+
+ // check for iteration errors
+ if err = rows.Err(); err != nil {
+ return r, err
+ }
+
+ // return success
+ return r, nil
+}
diff --git a/dbstore/dbstore_test.go b/dbstore/dbstore_test.go
new file mode 100644
index 0000000..f957146
--- /dev/null
+++ b/dbstore/dbstore_test.go
@@ -0,0 +1,335 @@
+package dbstore
+
+import (
+ "compress/gzip"
+ "context"
+ db_sql "database/sql"
+ "encoding/xml"
+ "embed"
+ "errors"
+ "fmt"
+ _ "github.com/mattn/go-sqlite3"
+ "github.com/pablotron/cvez/cpedict"
+ io_fs "io/fs"
+ "os"
+ "reflect"
+ "testing"
+)
+
+func getTestDictionary(path string) (cpedict.Dictionary, error) {
+ var dict cpedict.Dictionary
+
+ // open test data
+ f, err := os.Open(path)
+ if err != nil {
+ return dict, err
+ }
+ defer f.Close()
+
+ // create zip reader
+ gz, err := gzip.NewReader(f)
+ if err != nil {
+ return dict, err
+ }
+ defer gz.Close()
+
+ // create xml decoder
+ d := xml.NewDecoder(gz)
+
+ // decode xml
+ if err = d.Decode(&dict); err != nil {
+ return dict, err
+ }
+
+ // return success
+ return dict, nil
+}
+//go:embed testdata/sql/*.sql
+var testSqlFs embed.FS
+
+var testSqlIds = map[string]bool {
+ "init": false,
+ "insert-cpe": true,
+ "insert-title": true,
+ "insert-ref": true,
+}
+
+func getTestQueries() (map[string]string, error) {
+ r := make(map[string]string)
+
+ for id, _ := range(testSqlIds) {
+ path := fmt.Sprintf("testdata/sql/%s.sql", id)
+ if data, err := testSqlFs.ReadFile(path); err != nil {
+ return r, err
+ } else {
+ r[id] = string(data)
+ }
+ }
+
+ return r, nil
+}
+
+func ignoreTestSimple(t *testing.T) {
+ testDbPath := "./testdata/foo.db"
+ // get queries
+ queries, err := getTestQueries()
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // load test CPEs
+ dict, err := getTestDictionary("testdata/test-0.xml.gz")
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // does test db exist?
+ if _, err = os.Stat(testDbPath); err != nil {
+ if !errors.Is(err, io_fs.ErrNotExist) {
+ t.Error(err)
+ return
+ }
+ } else if err == nil {
+ // remove test db
+ if err = os.Remove(testDbPath); err != nil {
+ t.Error(err)
+ return
+ }
+ }
+
+ // init db
+ db, err := db_sql.Open("sqlite3", testDbPath)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ defer db.Close()
+
+ // init tables
+ if _, err := db.Exec(queries["init"]); err != nil {
+ t.Error(err)
+ return
+ }
+
+ tx, err := db.Begin()
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // build statements
+ sts := make(map[string]*db_sql.Stmt)
+ for id, use := range(testSqlIds) {
+ if use {
+ if st, err := tx.Prepare(queries[id]); err != nil {
+ t.Error(err)
+ return
+ } else {
+ sts[id] = st
+ defer sts[id].Close()
+ }
+ }
+ }
+
+ // add items
+ for _, item := range(dict.Items) {
+ // add cpe
+ rs, err := sts["insert-cpe"].Exec(item.CpeUri, item.Cpe23Item.Name);
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // get last row ID
+ id, err := rs.LastInsertId()
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // add titles
+ for _, title := range(item.Titles) {
+ if _, err := sts["insert-title"].Exec(id, title.Lang, title.Text); err != nil {
+ t.Error(err)
+ return
+ }
+ }
+
+ // add refs
+ for _, ref := range(item.References) {
+ if _, err := sts["insert-ref"].Exec(id, ref.Href, ref.Text); err != nil {
+ t.Error(err)
+ return
+ }
+ }
+ }
+
+ // commit changes
+ if err = tx.Commit(); err != nil {
+ t.Error(err)
+ return
+ }
+}
+
+// remove file if it exists
+func removeFile(path string) error {
+ // remove file
+ err := os.Remove(path)
+ if err != nil && errors.Is(err, io_fs.ErrNotExist) {
+ return nil
+ }
+
+ return err
+}
+
+func createTestDb(ctx context.Context, path string) (DbStore, error) {
+ // remove existing file
+ if err := removeFile(path); err != nil {
+ return DbStore{}, err
+ }
+
+ // open db
+ return Open(path)
+}
+
+func seedTestDb(ctx context.Context, db DbStore) error {
+ // load test CPEs
+ dict, err := getTestDictionary("testdata/test-0.xml.gz")
+ if err != nil {
+ return err
+ }
+
+ // add cpe dictionary
+ return db.AddCpeDictionary(ctx, dict)
+
+ // TODO: seed with other data
+}
+
+func TestOpen(t *testing.T) {
+ path := "./testdata/test-new.db"
+ ctx := context.Background()
+
+ if _, err := createTestDb(ctx, path); err != nil {
+ t.Error(err)
+ return
+ }
+}
+
+func TestAddCpeDictionary(t *testing.T) {
+ path := "./testdata/test-addcpedict.db"
+ ctx := context.Background()
+
+ // create db
+ db, err := createTestDb(ctx, path)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // load test CPEs
+ dict, err := getTestDictionary("testdata/test-0.xml.gz")
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // add cpe dictionary
+ if err := db.AddCpeDictionary(ctx, dict); err != nil {
+ t.Error(err)
+ return
+ }
+}
+
+// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_all where cpe_fts_all match 'advisory' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank;
+// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_all where cpe_fts_all match 'advisory AND book' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank;
+// cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:*
+//
+// sqlite> select c.cpe_id, c.cpe23, a.rank from cpe_titles_fts a join cpe_titles b on (b.cpe_title_id = a.rowid) join cpes c on (c.cpe_id = b.cpe_id) where cpe_titles_fts match 'project' order by a.rank;
+// 2|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:-:*:*:*:*:node.js:*:*|-0.775759508773217
+// 3|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.1.0:*:*:*:*:node.js:*:*|-0.66983333682734
+// 4|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.0:*:*:*:*:node.js:*:*|-0.66983333682734
+// 5|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.1:*:*:*:*:node.js:*:*|-0.66983333682734
+// 1|cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:*|-0.545655647541265
+//
+// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_refs where cpe_fts_refs match 'advisory' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank;
+// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:-:*:*:*:*:node.js:*:*
+// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.1.0:*:*:*:*:node.js:*:*
+// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.0:*:*:*:*:node.js:*:*
+// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.1:*:*:*:*:node.js:*:*
+// cpe:2.3:a:360totalsecurity:360_total_security:12.1.0.1005:*:*:*:*:*:*:*
+// cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:*
+
+
+func TestCpeSearch(t *testing.T) {
+ path := "./testdata/test-search.db"
+ ctx := context.Background()
+
+ tests := []struct {
+ t CpeSearchType // search type
+ q string // query string
+ exp []string // expected search results (cpe23s)
+ } {{
+ t: CpeSearchAll,
+ q: "advisory AND book",
+ exp: []string {
+ "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*",
+ },
+ }, {
+ t: CpeSearchTitle,
+ q: "project",
+ exp: []string {
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:-:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.1.0:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.0:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.1:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*",
+ },
+ }, {
+ t: CpeSearchRef,
+ q: "advisory",
+ exp: []string {
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:-:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.1.0:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.0:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.1:*:*:*:*:node.js:*:*",
+ "cpe:2.3:a:360totalsecurity:360_total_security:12.1.0.1005:*:*:*:*:*:*:*",
+ "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*",
+ },
+ }}
+
+ // create db
+ db, err := createTestDb(ctx, path)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // seed test database
+ if err = seedTestDb(ctx, db); err != nil {
+ t.Error(err)
+ return
+ }
+
+ for _, test := range(tests) {
+ t.Run(test.t.String(), func(t *testing.T) {
+ rows, err := db.CpeSearch(ctx, test.t, test.q)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+
+ // build ids
+ got := make([]string, len(rows))
+ for i, row := range(rows) {
+ got[i] = row.Cpe23
+ }
+
+ if !reflect.DeepEqual(got, test.exp) {
+ t.Errorf("got \"%v\", exp \"%v\"", got, test.exp)
+ return
+ }
+ })
+ }
+}
diff --git a/dbstore/sql/cpe-search-all.sql b/dbstore/sql/cpe-search-all.sql
new file mode 100644
index 0000000..3260902
--- /dev/null
+++ b/dbstore/sql/cpe-search-all.sql
@@ -0,0 +1,46 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_all a
+ WHERE cpe_fts_all MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/cpe-search-ref.sql b/dbstore/sql/cpe-search-ref.sql
new file mode 100644
index 0000000..a2d3be6
--- /dev/null
+++ b/dbstore/sql/cpe-search-ref.sql
@@ -0,0 +1,47 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ -- there can be multiple matching references for a given
+ -- CPE, so use MIN() to limit the match to the most
+ -- "relevant", according to bm25()
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_refs
+ WHERE cpe_fts_refs MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/cpe-search-title.sql b/dbstore/sql/cpe-search-title.sql
new file mode 100644
index 0000000..7421ffc
--- /dev/null
+++ b/dbstore/sql/cpe-search-title.sql
@@ -0,0 +1,44 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_titles
+ WHERE cpe_fts_titles MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank;
diff --git a/dbstore/sql/fill-fts.sql b/dbstore/sql/fill-fts.sql
new file mode 100644
index 0000000..5329ca7
--- /dev/null
+++ b/dbstore/sql/fill-fts.sql
@@ -0,0 +1,10 @@
+-- populate fts table
+INSERT INTO cpe_fts(rowid, title)
+ SELECT a.cpe_id,
+ COALESCE(b.val, '')
+
+ FROM cpes a
+ LEFT JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE b.lang = 'en-US';
diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql
new file mode 100644
index 0000000..37eef54
--- /dev/null
+++ b/dbstore/sql/init.sql
@@ -0,0 +1,180 @@
+-- enable foreign keys
+PRAGMA foreign_keys = true;
+
+-- create cpes table
+CREATE TABLE cpes (
+ -- cpe unique ID
+ cpe_id INTEGER PRIMARY KEY,
+
+ -- cpe uri
+ cpe_uri TEXT UNIQUE NOT NULL,
+
+ -- cpe 2.3 formatting string
+ cpe23 TEXT UNIQUE NOT NULL
+);
+
+-- create all fts table (cross join across all
+-- titles and references, populated by triggers)
+CREATE VIRTUAL TABLE cpe_fts_all USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create titles table
+CREATE TABLE cpe_titles (
+ -- title primary key
+ cpe_title_id INTEGER PRIMARY KEY,
+
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- language code
+ lang TEXT NOT NULL
+ CHECK (LENGTH(lang) > 0),
+
+ -- text value
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0),
+
+ -- prevent duplicate titles of the same name
+ UNIQUE (cpe_id, lang)
+);
+
+-- create titles fts table
+CREATE VIRTUAL TABLE cpe_fts_titles USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ NEW.val,
+ b.href,
+ b.val
+
+ FROM cpes a
+ JOIN cpe_refs b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id;
+ END;
+
+-- create titles delete all fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
+ END;
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val);
+ END;
+
+-- create titles delete titles fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_fts_titles
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
+ END;
+
+-- create refs table
+CREATE TABLE cpe_refs (
+ -- reference unique id
+ cpe_ref_id INTEGER PRIMARY KEY,
+
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- url
+ href TEXT NOT NULL
+ CHECK (LENGTH(href) > 0),
+
+ -- text description
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0)
+);
+
+-- create refs fts table
+CREATE VIRTUAL TABLE cpe_fts_refs USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create refs insert trigger for all fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ b.val,
+ NEW.href,
+ NEW.val
+
+ FROM cpes a
+ JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id
+ AND b.lang = 'en-US';
+ END;
+
+-- create refs delete trigger for all fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
+ END;
+
+-- create refs insert trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val);
+ END;
+
+-- create refs delete trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_fts_refs
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
+ END;
+
+PRAGMA user_version = 314159;
diff --git a/dbstore/sql/insert-cpe.sql b/dbstore/sql/insert-cpe.sql
new file mode 100644
index 0000000..dca1c2b
--- /dev/null
+++ b/dbstore/sql/insert-cpe.sql
@@ -0,0 +1 @@
+INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?);
diff --git a/dbstore/sql/insert-ref.sql b/dbstore/sql/insert-ref.sql
new file mode 100644
index 0000000..a39ab1c
--- /dev/null
+++ b/dbstore/sql/insert-ref.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?);
diff --git a/dbstore/sql/insert-title.sql b/dbstore/sql/insert-title.sql
new file mode 100644
index 0000000..5045d95
--- /dev/null
+++ b/dbstore/sql/insert-title.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?);
diff --git a/dbstore/sql/old-cpe-search-all.sql b/dbstore/sql/old-cpe-search-all.sql
new file mode 100644
index 0000000..18e286f
--- /dev/null
+++ b/dbstore/sql/old-cpe-search-all.sql
@@ -0,0 +1,69 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT a.cpe_id,
+ MIN(a.rank) AS rank
+
+ FROM (
+ -- find title matches
+ SELECT b.cpe_id,
+ a.rank AS rank
+
+ FROM cpe_titles_fts a
+ JOIN cpe_titles b
+ ON (b.cpe_title_id = a.rowid)
+
+ WHERE cpe_titles_fts MATCH :q
+
+ UNION ALL
+
+ -- find reference matches
+ SELECT b.cpe_id,
+ a.rank
+
+ FROM cpe_refs_fts a
+ JOIN cpe_refs b
+ ON (b.cpe_ref_id = a.rowid)
+
+ WHERE cpe_refs_fts MATCH :q
+ ) a
+
+ GROUP BY a.cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/table-exists.sql b/dbstore/sql/table-exists.sql
new file mode 100644
index 0000000..09d32a4
--- /dev/null
+++ b/dbstore/sql/table-exists.sql
@@ -0,0 +1,6 @@
+SELECT EXISTS(
+ SELECT 1
+ FROM sqlite_schema
+ WHERE type = 'table'
+ AND name = ?
+)
diff --git a/dbstore/testdata/sql/fill-fts.sql b/dbstore/testdata/sql/fill-fts.sql
new file mode 100644
index 0000000..5329ca7
--- /dev/null
+++ b/dbstore/testdata/sql/fill-fts.sql
@@ -0,0 +1,10 @@
+-- populate fts table
+INSERT INTO cpe_fts(rowid, title)
+ SELECT a.cpe_id,
+ COALESCE(b.val, '')
+
+ FROM cpes a
+ LEFT JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE b.lang = 'en-US';
diff --git a/dbstore/testdata/sql/init.sql b/dbstore/testdata/sql/init.sql
new file mode 100644
index 0000000..a08b2f8
--- /dev/null
+++ b/dbstore/testdata/sql/init.sql
@@ -0,0 +1,73 @@
+-- enable foreign keys
+PRAGMA foreign_keys = true;
+
+-- create cpes table
+CREATE TABLE cpes (
+ cpe_id INTEGER PRIMARY KEY,
+
+ -- cpe uri
+ cpe_uri TEXT UNIQUE NOT NULL,
+
+ -- cpe 2.3 formatting string
+ cpe23 TEXT UNIQUE NOT NULL
+);
+
+-- create titles table
+CREATE TABLE cpe_titles (
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- language code
+ lang TEXT NOT NULL,
+
+ -- text value
+ val TEXT NOT NULL,
+
+ UNIQUE (cpe_id, lang)
+);
+
+-- create titles fts table
+CREATE VIRTUAL TABLE cpe_titles_fts USING fts5(title, tokenize = 'porter');
+
+-- create titles insert trigger
+CREATE TRIGGER tr_cpe_titles_insert AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_titles_fts(rowid, title) VALUES (NEW.cpe_id, NEW.val);
+ END;
+
+-- create titles delete trigger
+CREATE TRIGGER tr_cpe_titles_delete BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_titles_fts WHERE rowid = OLD.cpe_id;
+ END;
+
+-- create refs table
+CREATE TABLE cpe_refs (
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- url
+ href TEXT NOT NULL,
+
+ -- text description
+ val TEXT NOT NULL
+);
+
+-- create refs fts
+CREATE VIRTUAL TABLE cpe_refs_fts USING fts5(cpe_id UNINDEXED, href, val);
+
+-- create refs insert trigger
+CREATE TRIGGER tr_cpe_refs_insert AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_refs_fts(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val);
+ END;
+
+-- create refs delete trigger
+CREATE TRIGGER tr_cpe_refs_delete BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_refs_fts WHERE rowid = OLD.cpe_id;
+ END;
+
+PRAGMA user_version = 314159;
diff --git a/dbstore/testdata/sql/insert-cpe.sql b/dbstore/testdata/sql/insert-cpe.sql
new file mode 100644
index 0000000..dca1c2b
--- /dev/null
+++ b/dbstore/testdata/sql/insert-cpe.sql
@@ -0,0 +1 @@
+INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?);
diff --git a/dbstore/testdata/sql/insert-ref.sql b/dbstore/testdata/sql/insert-ref.sql
new file mode 100644
index 0000000..a39ab1c
--- /dev/null
+++ b/dbstore/testdata/sql/insert-ref.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?);
diff --git a/dbstore/testdata/sql/insert-title.sql b/dbstore/testdata/sql/insert-title.sql
new file mode 100644
index 0000000..5045d95
--- /dev/null
+++ b/dbstore/testdata/sql/insert-title.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?);
diff --git a/dbstore/testdata/sql/table-exists.sql b/dbstore/testdata/sql/table-exists.sql
new file mode 100644
index 0000000..09d32a4
--- /dev/null
+++ b/dbstore/testdata/sql/table-exists.sql
@@ -0,0 +1,6 @@
+SELECT EXISTS(
+ SELECT 1
+ FROM sqlite_schema
+ WHERE type = 'table'
+ AND name = ?
+)
diff --git a/dbstore/testdata/test-0.xml.gz b/dbstore/testdata/test-0.xml.gz
new file mode 100644
index 0000000..110e965
--- /dev/null
+++ b/dbstore/testdata/test-0.xml.gz
Binary files differ