diff options
Diffstat (limited to 'dbstore')
-rw-r--r-- | dbstore/cpesearchtype_string.go | 25 | ||||
-rw-r--r-- | dbstore/dbstore.go | 255 | ||||
-rw-r--r-- | dbstore/dbstore_test.go | 335 | ||||
-rw-r--r-- | dbstore/sql/cpe-search-all.sql | 46 | ||||
-rw-r--r-- | dbstore/sql/cpe-search-ref.sql | 47 | ||||
-rw-r--r-- | dbstore/sql/cpe-search-title.sql | 44 | ||||
-rw-r--r-- | dbstore/sql/fill-fts.sql | 10 | ||||
-rw-r--r-- | dbstore/sql/init.sql | 180 | ||||
-rw-r--r-- | dbstore/sql/insert-cpe.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/insert-ref.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/insert-title.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/old-cpe-search-all.sql | 69 | ||||
-rw-r--r-- | dbstore/sql/table-exists.sql | 6 | ||||
-rw-r--r-- | dbstore/testdata/sql/fill-fts.sql | 10 | ||||
-rw-r--r-- | dbstore/testdata/sql/init.sql | 73 | ||||
-rw-r--r-- | dbstore/testdata/sql/insert-cpe.sql | 1 | ||||
-rw-r--r-- | dbstore/testdata/sql/insert-ref.sql | 1 | ||||
-rw-r--r-- | dbstore/testdata/sql/insert-title.sql | 1 | ||||
-rw-r--r-- | dbstore/testdata/sql/table-exists.sql | 6 | ||||
-rw-r--r-- | dbstore/testdata/test-0.xml.gz | bin | 0 -> 1359 bytes |
20 files changed, 1112 insertions, 0 deletions
diff --git a/dbstore/cpesearchtype_string.go b/dbstore/cpesearchtype_string.go new file mode 100644 index 0000000..1d4826e --- /dev/null +++ b/dbstore/cpesearchtype_string.go @@ -0,0 +1,25 @@ +// Code generated by "stringer -linecomment -type=CpeSearchType"; DO NOT EDIT. + +package dbstore + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[CpeSearchAll-0] + _ = x[CpeSearchTitle-1] + _ = x[CpeSearchRef-2] +} + +const _CpeSearchType_name = "cpe-search-allcpe-search-titlecpe-search-ref" + +var _CpeSearchType_index = [...]uint8{0, 14, 30, 44} + +func (i CpeSearchType) String() string { + if i >= CpeSearchType(len(_CpeSearchType_index)-1) { + return "CpeSearchType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _CpeSearchType_name[_CpeSearchType_index[i]:_CpeSearchType_index[i+1]] +} diff --git a/dbstore/dbstore.go b/dbstore/dbstore.go new file mode 100644 index 0000000..4df9fb9 --- /dev/null +++ b/dbstore/dbstore.go @@ -0,0 +1,255 @@ +// database storage +package dbstore + +import ( + "context" + db_sql "database/sql" + "embed" + "encoding/json" + "fmt" + _ "github.com/mattn/go-sqlite3" + "github.com/pablotron/cvez/cpedict" +) + +//go:embed sql +var sqlFs embed.FS + +type DbStore struct { + db *db_sql.DB +} + +// open database +func Open(path string) (DbStore, error) { + var r DbStore + // init db + if db, err := db_sql.Open("sqlite3", path); err != nil { + return r, err + } else { + r.db = db + return r, nil + } +} + +// initialized database version +const initDbVersion = 314159 + +func (me DbStore) isInitialized(ctx context.Context) (bool, error) { + sql := "PRAGMA user_version;" + // get version + var version int32 + if err := me.db.QueryRowContext(ctx, sql).Scan(&version); err != nil { + return false, err + } + + // return result + return (version == initDbVersion), nil +} + +// initialize database +func (me DbStore) Init(ctx context.Context) error { + if inited, err := me.isInitialized(ctx); err != nil { + return err + } else if inited { + // already initialized, return success + return nil + } + + // read init query + sql, err := sqlFs.ReadFile("sql/init.sql") + if err != nil { + return err + } + + // exec init query, return result + _, err = me.db.ExecContext(ctx, string(sql)) + return err +} + +// get single query from embedded filesystem +func getQuery(id string) (string, error) { + // read query + if data, err := sqlFs.ReadFile(fmt.Sprintf("sql/%s.sql", id)); err != nil { + return "", err + } else { + // return query + return string(data), nil + } +} + +// return query map +func getQueries(ids []string) (map[string]string, error) { + r := make(map[string]string) + + for _, id := range(ids) { + // read query + if sql, err := getQuery(id); err != nil { + return r, fmt.Errorf("%s: %s", id, err.Error()) + } else { + // save query + r[id] = sql + } + } + + // return success + return r, nil +} + +// import CPE dictionary +func (me DbStore) AddCpeDictionary(ctx context.Context, dict cpedict.Dictionary) error { + // lazy-init db + if err := me.Init(ctx); err != nil { + return err + } + + // build query map + queryIds := []string { "insert-cpe", "insert-title", "insert-ref" } + queries, err := getQueries(queryIds) + if err != nil { + return err + } + + // begin context + tx, err := me.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + // build statements + sts := make(map[string]*db_sql.Stmt) + for id, sql := range(queries) { + if st, err := tx.PrepareContext(ctx, sql); err != nil { + return err + } else { + sts[id] = st + defer sts[id].Close() + } + } + + // add items + for _, item := range(dict.Items) { + // add cpe + rs, err := sts["insert-cpe"].ExecContext(ctx, item.CpeUri, item.Cpe23Item.Name) + if err != nil { + return err + } + + // get last row ID + id, err := rs.LastInsertId() + if err != nil { + return err + } + + // add titles + for _, title := range(item.Titles) { + _, err := sts["insert-title"].ExecContext(ctx, id, title.Lang, title.Text) + if err != nil { + return err + } + } + + // add refs + for _, ref := range(item.References) { + _, err := sts["insert-ref"].ExecContext(ctx, id, ref.Href, ref.Text) + if err != nil { + return err + } + } + } + + // commit changes, return result + return tx.Commit() +} + +//go:generate stringer -linecomment -type=CpeSearchType + +// CPE search type +type CpeSearchType byte + +const ( + CpeSearchAll CpeSearchType = iota //cpe-search-all + CpeSearchTitle // cpe-search-title + CpeSearchRef // cpe-search-ref +) + +// title search result +type CpeSearchRow struct { + // Database CPE ID + CpeId int64 `json:"cpe_id"` + + // v2.3 formatting string + Cpe23 string `json:"cpe23"` + + // titles + Titles []cpedict.Title `json:"titles"` + + // references + Refs []cpedict.Reference `json:"refs"` + + // search result rank + Rank float32 `json:"rank"` +} + +// search CPEs +func (me DbStore) CpeSearch( + ctx context.Context, + searchType CpeSearchType, + s string, +) ([]CpeSearchRow, error) { + var r []CpeSearchRow + + // lazy-init db + if err := me.Init(ctx); err != nil { + return r, err + } + + // get query + sql, err := getQuery(searchType.String()) + if err != nil { + return r, err + } + + // exec search query + rows, err := me.db.QueryContext(ctx, sql, db_sql.Named("q", s)) + if err != nil { + return r, err + } + + // walk results + for rows.Next() { + var sr CpeSearchRow + var titles string + var refs string + + // get row values + err = rows.Scan(&sr.CpeId, &sr.Cpe23, &titles, &refs, &sr.Rank) + if err != nil { + return r, err + } + + // unmarshal titles + if err = json.Unmarshal([]byte(titles), &sr.Titles); err != nil { + return r, err + } + + // unmarshal refs + if err = json.Unmarshal([]byte(refs), &sr.Refs); err != nil { + return r, err + } + + // append to results + r = append(r, sr) + } + + // close rows + if err = rows.Close(); err != nil { + return r, err + } + + // check for iteration errors + if err = rows.Err(); err != nil { + return r, err + } + + // return success + return r, nil +} diff --git a/dbstore/dbstore_test.go b/dbstore/dbstore_test.go new file mode 100644 index 0000000..f957146 --- /dev/null +++ b/dbstore/dbstore_test.go @@ -0,0 +1,335 @@ +package dbstore + +import ( + "compress/gzip" + "context" + db_sql "database/sql" + "encoding/xml" + "embed" + "errors" + "fmt" + _ "github.com/mattn/go-sqlite3" + "github.com/pablotron/cvez/cpedict" + io_fs "io/fs" + "os" + "reflect" + "testing" +) + +func getTestDictionary(path string) (cpedict.Dictionary, error) { + var dict cpedict.Dictionary + + // open test data + f, err := os.Open(path) + if err != nil { + return dict, err + } + defer f.Close() + + // create zip reader + gz, err := gzip.NewReader(f) + if err != nil { + return dict, err + } + defer gz.Close() + + // create xml decoder + d := xml.NewDecoder(gz) + + // decode xml + if err = d.Decode(&dict); err != nil { + return dict, err + } + + // return success + return dict, nil +} +//go:embed testdata/sql/*.sql +var testSqlFs embed.FS + +var testSqlIds = map[string]bool { + "init": false, + "insert-cpe": true, + "insert-title": true, + "insert-ref": true, +} + +func getTestQueries() (map[string]string, error) { + r := make(map[string]string) + + for id, _ := range(testSqlIds) { + path := fmt.Sprintf("testdata/sql/%s.sql", id) + if data, err := testSqlFs.ReadFile(path); err != nil { + return r, err + } else { + r[id] = string(data) + } + } + + return r, nil +} + +func ignoreTestSimple(t *testing.T) { + testDbPath := "./testdata/foo.db" + // get queries + queries, err := getTestQueries() + if err != nil { + t.Error(err) + return + } + + // load test CPEs + dict, err := getTestDictionary("testdata/test-0.xml.gz") + if err != nil { + t.Error(err) + return + } + + // does test db exist? + if _, err = os.Stat(testDbPath); err != nil { + if !errors.Is(err, io_fs.ErrNotExist) { + t.Error(err) + return + } + } else if err == nil { + // remove test db + if err = os.Remove(testDbPath); err != nil { + t.Error(err) + return + } + } + + // init db + db, err := db_sql.Open("sqlite3", testDbPath) + if err != nil { + t.Error(err) + return + } + defer db.Close() + + // init tables + if _, err := db.Exec(queries["init"]); err != nil { + t.Error(err) + return + } + + tx, err := db.Begin() + if err != nil { + t.Error(err) + return + } + + // build statements + sts := make(map[string]*db_sql.Stmt) + for id, use := range(testSqlIds) { + if use { + if st, err := tx.Prepare(queries[id]); err != nil { + t.Error(err) + return + } else { + sts[id] = st + defer sts[id].Close() + } + } + } + + // add items + for _, item := range(dict.Items) { + // add cpe + rs, err := sts["insert-cpe"].Exec(item.CpeUri, item.Cpe23Item.Name); + if err != nil { + t.Error(err) + return + } + + // get last row ID + id, err := rs.LastInsertId() + if err != nil { + t.Error(err) + return + } + + // add titles + for _, title := range(item.Titles) { + if _, err := sts["insert-title"].Exec(id, title.Lang, title.Text); err != nil { + t.Error(err) + return + } + } + + // add refs + for _, ref := range(item.References) { + if _, err := sts["insert-ref"].Exec(id, ref.Href, ref.Text); err != nil { + t.Error(err) + return + } + } + } + + // commit changes + if err = tx.Commit(); err != nil { + t.Error(err) + return + } +} + +// remove file if it exists +func removeFile(path string) error { + // remove file + err := os.Remove(path) + if err != nil && errors.Is(err, io_fs.ErrNotExist) { + return nil + } + + return err +} + +func createTestDb(ctx context.Context, path string) (DbStore, error) { + // remove existing file + if err := removeFile(path); err != nil { + return DbStore{}, err + } + + // open db + return Open(path) +} + +func seedTestDb(ctx context.Context, db DbStore) error { + // load test CPEs + dict, err := getTestDictionary("testdata/test-0.xml.gz") + if err != nil { + return err + } + + // add cpe dictionary + return db.AddCpeDictionary(ctx, dict) + + // TODO: seed with other data +} + +func TestOpen(t *testing.T) { + path := "./testdata/test-new.db" + ctx := context.Background() + + if _, err := createTestDb(ctx, path); err != nil { + t.Error(err) + return + } +} + +func TestAddCpeDictionary(t *testing.T) { + path := "./testdata/test-addcpedict.db" + ctx := context.Background() + + // create db + db, err := createTestDb(ctx, path) + if err != nil { + t.Error(err) + return + } + + // load test CPEs + dict, err := getTestDictionary("testdata/test-0.xml.gz") + if err != nil { + t.Error(err) + return + } + + // add cpe dictionary + if err := db.AddCpeDictionary(ctx, dict); err != nil { + t.Error(err) + return + } +} + +// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_all where cpe_fts_all match 'advisory' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank; +// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_all where cpe_fts_all match 'advisory AND book' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank; +// cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:* +// +// sqlite> select c.cpe_id, c.cpe23, a.rank from cpe_titles_fts a join cpe_titles b on (b.cpe_title_id = a.rowid) join cpes c on (c.cpe_id = b.cpe_id) where cpe_titles_fts match 'project' order by a.rank; +// 2|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:-:*:*:*:*:node.js:*:*|-0.775759508773217 +// 3|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.1.0:*:*:*:*:node.js:*:*|-0.66983333682734 +// 4|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.0:*:*:*:*:node.js:*:*|-0.66983333682734 +// 5|cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.1:*:*:*:*:node.js:*:*|-0.66983333682734 +// 1|cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:*|-0.545655647541265 +// +// sqlite> select a.cpe23 from cpes a join (select cpe_id, min(rank) as rank from cpe_fts_refs where cpe_fts_refs match 'advisory' group by cpe_id) b on (b.cpe_id = a.cpe_id) order by b.rank; +// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:-:*:*:*:*:node.js:*:* +// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.1.0:*:*:*:*:node.js:*:* +// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.0:*:*:*:*:node.js:*:* +// cpe:2.3:a:\@thi.ng\/egf_project:\@thi.ng\/egf:0.2.1:*:*:*:*:node.js:*:* +// cpe:2.3:a:360totalsecurity:360_total_security:12.1.0.1005:*:*:*:*:*:*:* +// cpe:2.3:a:\$0.99_kindle_books_project:\$0.99_kindle_books:6:*:*:*:*:android:*:* + + +func TestCpeSearch(t *testing.T) { + path := "./testdata/test-search.db" + ctx := context.Background() + + tests := []struct { + t CpeSearchType // search type + q string // query string + exp []string // expected search results (cpe23s) + } {{ + t: CpeSearchAll, + q: "advisory AND book", + exp: []string { + "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*", + }, + }, { + t: CpeSearchTitle, + q: "project", + exp: []string { + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:-:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.1.0:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.0:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.1:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*", + }, + }, { + t: CpeSearchRef, + q: "advisory", + exp: []string { + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:-:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.1.0:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.0:*:*:*:*:node.js:*:*", + "cpe:2.3:a:\\@thi.ng\\/egf_project:\\@thi.ng\\/egf:0.2.1:*:*:*:*:node.js:*:*", + "cpe:2.3:a:360totalsecurity:360_total_security:12.1.0.1005:*:*:*:*:*:*:*", + "cpe:2.3:a:\\$0.99_kindle_books_project:\\$0.99_kindle_books:6:*:*:*:*:android:*:*", + }, + }} + + // create db + db, err := createTestDb(ctx, path) + if err != nil { + t.Error(err) + return + } + + // seed test database + if err = seedTestDb(ctx, db); err != nil { + t.Error(err) + return + } + + for _, test := range(tests) { + t.Run(test.t.String(), func(t *testing.T) { + rows, err := db.CpeSearch(ctx, test.t, test.q) + if err != nil { + t.Error(err) + return + } + + // build ids + got := make([]string, len(rows)) + for i, row := range(rows) { + got[i] = row.Cpe23 + } + + if !reflect.DeepEqual(got, test.exp) { + t.Errorf("got \"%v\", exp \"%v\"", got, test.exp) + return + } + }) + } +} diff --git a/dbstore/sql/cpe-search-all.sql b/dbstore/sql/cpe-search-all.sql new file mode 100644 index 0000000..3260902 --- /dev/null +++ b/dbstore/sql/cpe-search-all.sql @@ -0,0 +1,46 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_all a + WHERE cpe_fts_all MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/cpe-search-ref.sql b/dbstore/sql/cpe-search-ref.sql new file mode 100644 index 0000000..a2d3be6 --- /dev/null +++ b/dbstore/sql/cpe-search-ref.sql @@ -0,0 +1,47 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + -- there can be multiple matching references for a given + -- CPE, so use MIN() to limit the match to the most + -- "relevant", according to bm25() + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_refs + WHERE cpe_fts_refs MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/cpe-search-title.sql b/dbstore/sql/cpe-search-title.sql new file mode 100644 index 0000000..7421ffc --- /dev/null +++ b/dbstore/sql/cpe-search-title.sql @@ -0,0 +1,44 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_titles + WHERE cpe_fts_titles MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank; diff --git a/dbstore/sql/fill-fts.sql b/dbstore/sql/fill-fts.sql new file mode 100644 index 0000000..5329ca7 --- /dev/null +++ b/dbstore/sql/fill-fts.sql @@ -0,0 +1,10 @@ +-- populate fts table +INSERT INTO cpe_fts(rowid, title) + SELECT a.cpe_id, + COALESCE(b.val, '') + + FROM cpes a + LEFT JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE b.lang = 'en-US'; diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql new file mode 100644 index 0000000..37eef54 --- /dev/null +++ b/dbstore/sql/init.sql @@ -0,0 +1,180 @@ +-- enable foreign keys +PRAGMA foreign_keys = true; + +-- create cpes table +CREATE TABLE cpes ( + -- cpe unique ID + cpe_id INTEGER PRIMARY KEY, + + -- cpe uri + cpe_uri TEXT UNIQUE NOT NULL, + + -- cpe 2.3 formatting string + cpe23 TEXT UNIQUE NOT NULL +); + +-- create all fts table (cross join across all +-- titles and references, populated by triggers) +CREATE VIRTUAL TABLE cpe_fts_all USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' +); + +-- create titles table +CREATE TABLE cpe_titles ( + -- title primary key + cpe_title_id INTEGER PRIMARY KEY, + + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- language code + lang TEXT NOT NULL + CHECK (LENGTH(lang) > 0), + + -- text value + val TEXT NOT NULL + CHECK (LENGTH(val) > 0), + + -- prevent duplicate titles of the same name + UNIQUE (cpe_id, lang) +); + +-- create titles fts table +CREATE VIRTUAL TABLE cpe_fts_titles USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, + + -- use porter stemming + tokenize = 'porter' +); + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + NEW.val, + b.href, + b.val + + FROM cpes a + JOIN cpe_refs b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id; + END; + +-- create titles delete all fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; + END; + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val); + END; + +-- create titles delete titles fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_fts_titles + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; + END; + +-- create refs table +CREATE TABLE cpe_refs ( + -- reference unique id + cpe_ref_id INTEGER PRIMARY KEY, + + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- url + href TEXT NOT NULL + CHECK (LENGTH(href) > 0), + + -- text description + val TEXT NOT NULL + CHECK (LENGTH(val) > 0) +); + +-- create refs fts table +CREATE VIRTUAL TABLE cpe_fts_refs USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' +); + +-- create refs insert trigger for all fts +CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + b.val, + NEW.href, + NEW.val + + FROM cpes a + JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id + AND b.lang = 'en-US'; + END; + +-- create refs delete trigger for all fts +CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; + END; + +-- create refs insert trigger for refs fts +CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val); + END; + +-- create refs delete trigger for refs fts +CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_fts_refs + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; + END; + +PRAGMA user_version = 314159; diff --git a/dbstore/sql/insert-cpe.sql b/dbstore/sql/insert-cpe.sql new file mode 100644 index 0000000..dca1c2b --- /dev/null +++ b/dbstore/sql/insert-cpe.sql @@ -0,0 +1 @@ +INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?); diff --git a/dbstore/sql/insert-ref.sql b/dbstore/sql/insert-ref.sql new file mode 100644 index 0000000..a39ab1c --- /dev/null +++ b/dbstore/sql/insert-ref.sql @@ -0,0 +1 @@ +INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?); diff --git a/dbstore/sql/insert-title.sql b/dbstore/sql/insert-title.sql new file mode 100644 index 0000000..5045d95 --- /dev/null +++ b/dbstore/sql/insert-title.sql @@ -0,0 +1 @@ +INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?); diff --git a/dbstore/sql/old-cpe-search-all.sql b/dbstore/sql/old-cpe-search-all.sql new file mode 100644 index 0000000..18e286f --- /dev/null +++ b/dbstore/sql/old-cpe-search-all.sql @@ -0,0 +1,69 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT a.cpe_id, + MIN(a.rank) AS rank + + FROM ( + -- find title matches + SELECT b.cpe_id, + a.rank AS rank + + FROM cpe_titles_fts a + JOIN cpe_titles b + ON (b.cpe_title_id = a.rowid) + + WHERE cpe_titles_fts MATCH :q + + UNION ALL + + -- find reference matches + SELECT b.cpe_id, + a.rank + + FROM cpe_refs_fts a + JOIN cpe_refs b + ON (b.cpe_ref_id = a.rowid) + + WHERE cpe_refs_fts MATCH :q + ) a + + GROUP BY a.cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/table-exists.sql b/dbstore/sql/table-exists.sql new file mode 100644 index 0000000..09d32a4 --- /dev/null +++ b/dbstore/sql/table-exists.sql @@ -0,0 +1,6 @@ +SELECT EXISTS( + SELECT 1 + FROM sqlite_schema + WHERE type = 'table' + AND name = ? +) diff --git a/dbstore/testdata/sql/fill-fts.sql b/dbstore/testdata/sql/fill-fts.sql new file mode 100644 index 0000000..5329ca7 --- /dev/null +++ b/dbstore/testdata/sql/fill-fts.sql @@ -0,0 +1,10 @@ +-- populate fts table +INSERT INTO cpe_fts(rowid, title) + SELECT a.cpe_id, + COALESCE(b.val, '') + + FROM cpes a + LEFT JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE b.lang = 'en-US'; diff --git a/dbstore/testdata/sql/init.sql b/dbstore/testdata/sql/init.sql new file mode 100644 index 0000000..a08b2f8 --- /dev/null +++ b/dbstore/testdata/sql/init.sql @@ -0,0 +1,73 @@ +-- enable foreign keys +PRAGMA foreign_keys = true; + +-- create cpes table +CREATE TABLE cpes ( + cpe_id INTEGER PRIMARY KEY, + + -- cpe uri + cpe_uri TEXT UNIQUE NOT NULL, + + -- cpe 2.3 formatting string + cpe23 TEXT UNIQUE NOT NULL +); + +-- create titles table +CREATE TABLE cpe_titles ( + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- language code + lang TEXT NOT NULL, + + -- text value + val TEXT NOT NULL, + + UNIQUE (cpe_id, lang) +); + +-- create titles fts table +CREATE VIRTUAL TABLE cpe_titles_fts USING fts5(title, tokenize = 'porter'); + +-- create titles insert trigger +CREATE TRIGGER tr_cpe_titles_insert AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_titles_fts(rowid, title) VALUES (NEW.cpe_id, NEW.val); + END; + +-- create titles delete trigger +CREATE TRIGGER tr_cpe_titles_delete BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_titles_fts WHERE rowid = OLD.cpe_id; + END; + +-- create refs table +CREATE TABLE cpe_refs ( + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- url + href TEXT NOT NULL, + + -- text description + val TEXT NOT NULL +); + +-- create refs fts +CREATE VIRTUAL TABLE cpe_refs_fts USING fts5(cpe_id UNINDEXED, href, val); + +-- create refs insert trigger +CREATE TRIGGER tr_cpe_refs_insert AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_refs_fts(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val); + END; + +-- create refs delete trigger +CREATE TRIGGER tr_cpe_refs_delete BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_refs_fts WHERE rowid = OLD.cpe_id; + END; + +PRAGMA user_version = 314159; diff --git a/dbstore/testdata/sql/insert-cpe.sql b/dbstore/testdata/sql/insert-cpe.sql new file mode 100644 index 0000000..dca1c2b --- /dev/null +++ b/dbstore/testdata/sql/insert-cpe.sql @@ -0,0 +1 @@ +INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?); diff --git a/dbstore/testdata/sql/insert-ref.sql b/dbstore/testdata/sql/insert-ref.sql new file mode 100644 index 0000000..a39ab1c --- /dev/null +++ b/dbstore/testdata/sql/insert-ref.sql @@ -0,0 +1 @@ +INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?); diff --git a/dbstore/testdata/sql/insert-title.sql b/dbstore/testdata/sql/insert-title.sql new file mode 100644 index 0000000..5045d95 --- /dev/null +++ b/dbstore/testdata/sql/insert-title.sql @@ -0,0 +1 @@ +INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?); diff --git a/dbstore/testdata/sql/table-exists.sql b/dbstore/testdata/sql/table-exists.sql new file mode 100644 index 0000000..09d32a4 --- /dev/null +++ b/dbstore/testdata/sql/table-exists.sql @@ -0,0 +1,6 @@ +SELECT EXISTS( + SELECT 1 + FROM sqlite_schema + WHERE type = 'table' + AND name = ? +) diff --git a/dbstore/testdata/test-0.xml.gz b/dbstore/testdata/test-0.xml.gz Binary files differnew file mode 100644 index 0000000..110e965 --- /dev/null +++ b/dbstore/testdata/test-0.xml.gz |