From 425d2a2aa3e3d4ebb1cbf60982ef7a699dd79d97 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sat, 5 Feb 2022 02:34:41 -0500 Subject: internal/dbstore/sql: populate cpe search sql --- internal/dbstore/sql/cpe-search-all.sql | 46 ++++++++ internal/dbstore/sql/cpe-search-ref.sql | 47 ++++++++ internal/dbstore/sql/cpe-search-title.sql | 44 ++++++++ internal/dbstore/sql/init.sql | 159 +++++++++++++++++++++++----- internal/dbstore/sql/old-cpe-search-all.sql | 69 ++++++++++++ internal/dbstore/sql/table-exists.sql | 6 ++ 6 files changed, 345 insertions(+), 26 deletions(-) create mode 100644 internal/dbstore/sql/cpe-search-all.sql create mode 100644 internal/dbstore/sql/cpe-search-ref.sql create mode 100644 internal/dbstore/sql/cpe-search-title.sql create mode 100644 internal/dbstore/sql/old-cpe-search-all.sql create mode 100644 internal/dbstore/sql/table-exists.sql (limited to 'internal/dbstore') diff --git a/internal/dbstore/sql/cpe-search-all.sql b/internal/dbstore/sql/cpe-search-all.sql new file mode 100644 index 0000000..3260902 --- /dev/null +++ b/internal/dbstore/sql/cpe-search-all.sql @@ -0,0 +1,46 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_all a + WHERE cpe_fts_all MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/internal/dbstore/sql/cpe-search-ref.sql b/internal/dbstore/sql/cpe-search-ref.sql new file mode 100644 index 0000000..a2d3be6 --- /dev/null +++ b/internal/dbstore/sql/cpe-search-ref.sql @@ -0,0 +1,47 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + -- there can be multiple matching references for a given + -- CPE, so use MIN() to limit the match to the most + -- "relevant", according to bm25() + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_refs + WHERE cpe_fts_refs MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/internal/dbstore/sql/cpe-search-title.sql b/internal/dbstore/sql/cpe-search-title.sql new file mode 100644 index 0000000..7421ffc --- /dev/null +++ b/internal/dbstore/sql/cpe-search-title.sql @@ -0,0 +1,44 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + -- find matching cpes + FROM cpes a + JOIN ( + SELECT cpe_id, + MIN(rank) AS rank + FROM cpe_fts_titles + WHERE cpe_fts_titles MATCH :q + GROUP BY cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank; diff --git a/internal/dbstore/sql/init.sql b/internal/dbstore/sql/init.sql index a08b2f8..37eef54 100644 --- a/internal/dbstore/sql/init.sql +++ b/internal/dbstore/sql/init.sql @@ -3,71 +3,178 @@ PRAGMA foreign_keys = true; -- create cpes table CREATE TABLE cpes ( - cpe_id INTEGER PRIMARY KEY, + -- cpe unique ID + cpe_id INTEGER PRIMARY KEY, -- cpe uri - cpe_uri TEXT UNIQUE NOT NULL, + cpe_uri TEXT UNIQUE NOT NULL, -- cpe 2.3 formatting string - cpe23 TEXT UNIQUE NOT NULL + cpe23 TEXT UNIQUE NOT NULL +); + +-- create all fts table (cross join across all +-- titles and references, populated by triggers) +CREATE VIRTUAL TABLE cpe_fts_all USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' ); -- create titles table CREATE TABLE cpe_titles ( + -- title primary key + cpe_title_id INTEGER PRIMARY KEY, + -- external CPE id - cpe_id INT NOT NULL - REFERENCES cpes(cpe_id), + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), -- language code - lang TEXT NOT NULL, + lang TEXT NOT NULL + CHECK (LENGTH(lang) > 0), -- text value - val TEXT NOT NULL, + val TEXT NOT NULL + CHECK (LENGTH(val) > 0), + -- prevent duplicate titles of the same name UNIQUE (cpe_id, lang) ); -- create titles fts table -CREATE VIRTUAL TABLE cpe_titles_fts USING fts5(title, tokenize = 'porter'); +CREATE VIRTUAL TABLE cpe_fts_titles USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- title + title, --- create titles insert trigger -CREATE TRIGGER tr_cpe_titles_insert AFTER INSERT ON cpe_titles + -- use porter stemming + tokenize = 'porter' +); + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN - INSERT INTO cpe_titles_fts(rowid, title) VALUES (NEW.cpe_id, NEW.val); + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + NEW.val, + b.href, + b.val + + FROM cpes a + JOIN cpe_refs b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id; END; --- create titles delete trigger -CREATE TRIGGER tr_cpe_titles_delete BEFORE DELETE ON cpe_titles +-- create titles delete all fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN - DELETE FROM cpe_titles_fts WHERE rowid = OLD.cpe_id; + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; + END; + +-- create titles all fts insert trigger +CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles + FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN + INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val); + END; + +-- create titles delete titles fts trigger +CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles + FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN + DELETE FROM cpe_fts_titles + WHERE cpe_id = OLD.cpe_id + AND title = OLD.val; END; -- create refs table CREATE TABLE cpe_refs ( + -- reference unique id + cpe_ref_id INTEGER PRIMARY KEY, + -- external CPE id - cpe_id INT NOT NULL - REFERENCES cpes(cpe_id), + cpe_id INT NOT NULL + REFERENCES cpes(cpe_id), -- url - href TEXT NOT NULL, + href TEXT NOT NULL + CHECK (LENGTH(href) > 0), -- text description - val TEXT NOT NULL + val TEXT NOT NULL + CHECK (LENGTH(val) > 0) +); + +-- create refs fts table +CREATE VIRTUAL TABLE cpe_fts_refs USING fts5( + -- parent CPE id + cpe_id UNINDEXED, + + -- reference URL + href, + + -- reference name + val, + + -- use porter stemming + tokenize = 'porter' ); --- create refs fts -CREATE VIRTUAL TABLE cpe_refs_fts USING fts5(cpe_id UNINDEXED, href, val); +-- create refs insert trigger for all fts +CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs + FOR EACH ROW BEGIN + INSERT INTO cpe_fts_all(cpe_id, title, href, val) + SELECT NEW.cpe_id, + b.val, + NEW.href, + NEW.val + + FROM cpes a + JOIN cpe_titles b + ON (b.cpe_id = a.cpe_id) + + WHERE a.cpe_id = NEW.cpe_id + AND b.lang = 'en-US'; + END; + +-- create refs delete trigger for all fts +CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs + FOR EACH ROW BEGIN + DELETE FROM cpe_fts_all + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; + END; --- create refs insert trigger -CREATE TRIGGER tr_cpe_refs_insert AFTER INSERT ON cpe_refs +-- create refs insert trigger for refs fts +CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs FOR EACH ROW BEGIN - INSERT INTO cpe_refs_fts(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val); + INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val); END; --- create refs delete trigger -CREATE TRIGGER tr_cpe_refs_delete BEFORE DELETE ON cpe_refs +-- create refs delete trigger for refs fts +CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs FOR EACH ROW BEGIN - DELETE FROM cpe_refs_fts WHERE rowid = OLD.cpe_id; + DELETE FROM cpe_fts_refs + WHERE cpe_id = OLD.cpe_id + AND href = OLD.lang + AND val = OLD.val; END; PRAGMA user_version = 314159; diff --git a/internal/dbstore/sql/old-cpe-search-all.sql b/internal/dbstore/sql/old-cpe-search-all.sql new file mode 100644 index 0000000..18e286f --- /dev/null +++ b/internal/dbstore/sql/old-cpe-search-all.sql @@ -0,0 +1,69 @@ +SELECT a.cpe_id, + a.cpe23, + COALESCE(c.titles, '[]') AS titles, + COALESCE(d.refs, '[]') AS refs, + b.rank + + FROM cpes a + JOIN ( + -- limit to "best" match (e.g., lowest rank) for given CPE + -- NOTE: it's not clear to me whether cross-index bm25() + -- comparisons are valid + SELECT a.cpe_id, + MIN(a.rank) AS rank + + FROM ( + -- find title matches + SELECT b.cpe_id, + a.rank AS rank + + FROM cpe_titles_fts a + JOIN cpe_titles b + ON (b.cpe_title_id = a.rowid) + + WHERE cpe_titles_fts MATCH :q + + UNION ALL + + -- find reference matches + SELECT b.cpe_id, + a.rank + + FROM cpe_refs_fts a + JOIN cpe_refs b + ON (b.cpe_ref_id = a.rowid) + + WHERE cpe_refs_fts MATCH :q + ) a + + GROUP BY a.cpe_id + ) b ON (b.cpe_id = a.cpe_id) + + -- get all titles, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'lang', lang, + 'text', val + )) AS titles + + FROM cpe_titles + + GROUP BY cpe_id + ) c ON (c.cpe_id = b.cpe_id) + + -- get all refs, grouped by cpe + LEFT JOIN ( + SELECT cpe_id, + json_group_array(json_object( + 'href', href, + 'text', val + )) AS refs + + FROM cpe_refs + + GROUP BY cpe_id + ) d ON (d.cpe_id = b.cpe_id) + + -- order by rank + ORDER BY b.rank diff --git a/internal/dbstore/sql/table-exists.sql b/internal/dbstore/sql/table-exists.sql new file mode 100644 index 0000000..09d32a4 --- /dev/null +++ b/internal/dbstore/sql/table-exists.sql @@ -0,0 +1,6 @@ +SELECT EXISTS( + SELECT 1 + FROM sqlite_schema + WHERE type = 'table' + AND name = ? +) -- cgit v1.2.3