diff options
Diffstat (limited to 'dbstore/sql')
-rw-r--r-- | dbstore/sql/cpe-search-all.sql | 46 | ||||
-rw-r--r-- | dbstore/sql/cpe-search-ref.sql | 47 | ||||
-rw-r--r-- | dbstore/sql/cpe-search-title.sql | 44 | ||||
-rw-r--r-- | dbstore/sql/fill-fts.sql | 10 | ||||
-rw-r--r-- | dbstore/sql/init.sql | 180 | ||||
-rw-r--r-- | dbstore/sql/insert-cpe.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/insert-ref.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/insert-title.sql | 1 | ||||
-rw-r--r-- | dbstore/sql/old-cpe-search-all.sql | 69 | ||||
-rw-r--r-- | dbstore/sql/table-exists.sql | 6 |
10 files changed, 405 insertions, 0 deletions
diff --git a/dbstore/sql/cpe-search-all.sql b/dbstore/sql/cpe-search-all.sql new file mode 100644 index 0000000..3260902 --- /dev/null +++ b/dbstore/sql/cpe-search-all.sql @@ -0,0 +1,46 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_all a + WHERE cpe_fts_all MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/cpe-search-ref.sql b/dbstore/sql/cpe-search-ref.sql new file mode 100644 index 0000000..a2d3be6 --- /dev/null +++ b/dbstore/sql/cpe-search-ref.sql @@ -0,0 +1,47 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + -- there can be multiple matching references for a given + -- CPE, so use MIN() to limit the match to the most + -- "relevant", according to bm25() + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_refs + WHERE cpe_fts_refs MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/cpe-search-title.sql b/dbstore/sql/cpe-search-title.sql new file mode 100644 index 0000000..7421ffc --- /dev/null +++ b/dbstore/sql/cpe-search-title.sql @@ -0,0 +1,44 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_titles + WHERE cpe_fts_titles MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank; diff --git a/dbstore/sql/fill-fts.sql b/dbstore/sql/fill-fts.sql new file mode 100644 index 0000000..5329ca7 --- /dev/null +++ b/dbstore/sql/fill-fts.sql @@ -0,0 +1,10 @@ +-- populate fts table +INSERT INTO cpe_fts(rowid, title) + SELECT a.cpe_id, + COALESCE(b.val, '') + + FROM cpes a + LEFT JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE b.lang = 'en-US'; diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql new file mode 100644 index 0000000..37eef54 --- /dev/null +++ b/dbstore/sql/init.sql @@ -0,0 +1,180 @@ +-- enable foreign keys +PRAGMA foreign_keys = true; + +-- create cpes table +CREATE TABLE cpes ( + -- cpe unique ID + cpe_id INTEGER PRIMARY KEY, + + -- cpe uri + cpe_uri TEXT UNIQUE NOT NULL, + + -- cpe 2.3 formatting string + cpe23 TEXT UNIQUE NOT NULL +); + +-- create all fts table (cross join across all +-- titles and references, populated by triggers) +CREATE VIRTUAL TABLE cpe_fts_all USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' +); + +-- create titles table +CREATE TABLE cpe_titles ( + -- title primary key + cpe_title_id INTEGER PRIMARY KEY, + + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- language code + lang TEXT NOT NULL + CHECK (LENGTH(lang) > 0), + + -- text value + val TEXT NOT NULL + CHECK (LENGTH(val) > 0), + + -- prevent duplicate titles of the same name + UNIQUE (cpe_id, lang) +); + +-- create titles fts table +CREATE VIRTUAL TABLE cpe_fts_titles USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, + + -- use porter stemming + tokenize = 'porter' +); + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + NEW.val, + b.href, + b.val + + FROM cpes a + JOIN cpe_refs b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id; + END; + +-- create titles delete all fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; + END; + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val); + END; + +-- create titles delete titles fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_fts_titles + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; + END; + +-- create refs table +CREATE TABLE cpe_refs ( + -- reference unique id + cpe_ref_id INTEGER PRIMARY KEY, + + -- external CPE id + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), + + -- url + href TEXT NOT NULL + CHECK (LENGTH(href) > 0), + + -- text description + val TEXT NOT NULL + CHECK (LENGTH(val) > 0) +); + +-- create refs fts table +CREATE VIRTUAL TABLE cpe_fts_refs USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' +); + +-- create refs insert trigger for all fts +CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + b.val, + NEW.href, + NEW.val + + FROM cpes a + JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id + AND b.lang = 'en-US'; + END; + +-- create refs delete trigger for all fts +CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; + END; + +-- create refs insert trigger for refs fts +CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val); + END; + +-- create refs delete trigger for refs fts +CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_fts_refs + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; + END; + +PRAGMA user_version = 314159; diff --git a/dbstore/sql/insert-cpe.sql b/dbstore/sql/insert-cpe.sql new file mode 100644 index 0000000..dca1c2b --- /dev/null +++ b/dbstore/sql/insert-cpe.sql @@ -0,0 +1 @@ +INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?); diff --git a/dbstore/sql/insert-ref.sql b/dbstore/sql/insert-ref.sql new file mode 100644 index 0000000..a39ab1c --- /dev/null +++ b/dbstore/sql/insert-ref.sql @@ -0,0 +1 @@ +INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?); diff --git a/dbstore/sql/insert-title.sql b/dbstore/sql/insert-title.sql new file mode 100644 index 0000000..5045d95 --- /dev/null +++ b/dbstore/sql/insert-title.sql @@ -0,0 +1 @@ +INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?); diff --git a/dbstore/sql/old-cpe-search-all.sql b/dbstore/sql/old-cpe-search-all.sql new file mode 100644 index 0000000..18e286f --- /dev/null +++ b/dbstore/sql/old-cpe-search-all.sql @@ -0,0 +1,69 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT a.cpe_id, + MIN(a.rank) AS rank + + FROM ( + -- find title matches + SELECT b.cpe_id, + a.rank AS rank + + FROM cpe_titles_fts a + JOIN cpe_titles b + ON (b.cpe_title_id = a.rowid) + + WHERE cpe_titles_fts MATCH :q + + UNION ALL + + -- find reference matches + SELECT b.cpe_id, + a.rank + + FROM cpe_refs_fts a + JOIN cpe_refs b + ON (b.cpe_ref_id = a.rowid) + + WHERE cpe_refs_fts MATCH :q + ) a + + GROUP BY a.cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/dbstore/sql/table-exists.sql b/dbstore/sql/table-exists.sql new file mode 100644 index 0000000..09d32a4 --- /dev/null +++ b/dbstore/sql/table-exists.sql @@ -0,0 +1,6 @@ +SELECT EXISTS( + SELECT 1 + FROM sqlite_schema + WHERE type = 'table' + AND name = ? +) |