aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2022-02-05 02:34:41 -0500
committerPaul Duncan <pabs@pablotron.org>2022-02-05 02:34:41 -0500
commit425d2a2aa3e3d4ebb1cbf60982ef7a699dd79d97 (patch)
tree68cf04eaf99ae83f678399971a44458ad295d6fe
parent4f21e2b639f7d66062f0340793626db962ebb9f6 (diff)
downloadcvez-425d2a2aa3e3d4ebb1cbf60982ef7a699dd79d97.tar.bz2
cvez-425d2a2aa3e3d4ebb1cbf60982ef7a699dd79d97.zip
internal/dbstore/sql: populate cpe search sql
-rw-r--r--internal/dbstore/sql/cpe-search-all.sql46
-rw-r--r--internal/dbstore/sql/cpe-search-ref.sql47
-rw-r--r--internal/dbstore/sql/cpe-search-title.sql44
-rw-r--r--internal/dbstore/sql/init.sql159
-rw-r--r--internal/dbstore/sql/old-cpe-search-all.sql69
-rw-r--r--internal/dbstore/sql/table-exists.sql6
6 files changed, 345 insertions, 26 deletions
diff --git a/internal/dbstore/sql/cpe-search-all.sql b/internal/dbstore/sql/cpe-search-all.sql
new file mode 100644
index 0000000..3260902
--- /dev/null
+++ b/internal/dbstore/sql/cpe-search-all.sql
@@ -0,0 +1,46 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_all a
+ WHERE cpe_fts_all MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/internal/dbstore/sql/cpe-search-ref.sql b/internal/dbstore/sql/cpe-search-ref.sql
new file mode 100644
index 0000000..a2d3be6
--- /dev/null
+++ b/internal/dbstore/sql/cpe-search-ref.sql
@@ -0,0 +1,47 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ -- there can be multiple matching references for a given
+ -- CPE, so use MIN() to limit the match to the most
+ -- "relevant", according to bm25()
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_refs
+ WHERE cpe_fts_refs MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/internal/dbstore/sql/cpe-search-title.sql b/internal/dbstore/sql/cpe-search-title.sql
new file mode 100644
index 0000000..7421ffc
--- /dev/null
+++ b/internal/dbstore/sql/cpe-search-title.sql
@@ -0,0 +1,44 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_titles
+ WHERE cpe_fts_titles MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank;
diff --git a/internal/dbstore/sql/init.sql b/internal/dbstore/sql/init.sql
index a08b2f8..37eef54 100644
--- a/internal/dbstore/sql/init.sql
+++ b/internal/dbstore/sql/init.sql
@@ -3,71 +3,178 @@ PRAGMA foreign_keys = true;
-- create cpes table
CREATE TABLE cpes (
- cpe_id INTEGER PRIMARY KEY,
+ -- cpe unique ID
+ cpe_id INTEGER PRIMARY KEY,
-- cpe uri
- cpe_uri TEXT UNIQUE NOT NULL,
+ cpe_uri TEXT UNIQUE NOT NULL,
-- cpe 2.3 formatting string
- cpe23 TEXT UNIQUE NOT NULL
+ cpe23 TEXT UNIQUE NOT NULL
+);
+
+-- create all fts table (cross join across all
+-- titles and references, populated by triggers)
+CREATE VIRTUAL TABLE cpe_fts_all USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
);
-- create titles table
CREATE TABLE cpe_titles (
+ -- title primary key
+ cpe_title_id INTEGER PRIMARY KEY,
+
-- external CPE id
- cpe_id INT NOT NULL
- REFERENCES cpes(cpe_id),
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
-- language code
- lang TEXT NOT NULL,
+ lang TEXT NOT NULL
+ CHECK (LENGTH(lang) > 0),
-- text value
- val TEXT NOT NULL,
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0),
+ -- prevent duplicate titles of the same name
UNIQUE (cpe_id, lang)
);
-- create titles fts table
-CREATE VIRTUAL TABLE cpe_titles_fts USING fts5(title, tokenize = 'porter');
+CREATE VIRTUAL TABLE cpe_fts_titles USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
--- create titles insert trigger
-CREATE TRIGGER tr_cpe_titles_insert AFTER INSERT ON cpe_titles
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles
FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
- INSERT INTO cpe_titles_fts(rowid, title) VALUES (NEW.cpe_id, NEW.val);
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ NEW.val,
+ b.href,
+ b.val
+
+ FROM cpes a
+ JOIN cpe_refs b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id;
END;
--- create titles delete trigger
-CREATE TRIGGER tr_cpe_titles_delete BEFORE DELETE ON cpe_titles
+-- create titles delete all fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles
FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
- DELETE FROM cpe_titles_fts WHERE rowid = OLD.cpe_id;
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
+ END;
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val);
+ END;
+
+-- create titles delete titles fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_fts_titles
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
END;
-- create refs table
CREATE TABLE cpe_refs (
+ -- reference unique id
+ cpe_ref_id INTEGER PRIMARY KEY,
+
-- external CPE id
- cpe_id INT NOT NULL
- REFERENCES cpes(cpe_id),
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
-- url
- href TEXT NOT NULL,
+ href TEXT NOT NULL
+ CHECK (LENGTH(href) > 0),
-- text description
- val TEXT NOT NULL
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0)
+);
+
+-- create refs fts table
+CREATE VIRTUAL TABLE cpe_fts_refs USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
);
--- create refs fts
-CREATE VIRTUAL TABLE cpe_refs_fts USING fts5(cpe_id UNINDEXED, href, val);
+-- create refs insert trigger for all fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ b.val,
+ NEW.href,
+ NEW.val
+
+ FROM cpes a
+ JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id
+ AND b.lang = 'en-US';
+ END;
+
+-- create refs delete trigger for all fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
+ END;
--- create refs insert trigger
-CREATE TRIGGER tr_cpe_refs_insert AFTER INSERT ON cpe_refs
+-- create refs insert trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs
FOR EACH ROW BEGIN
- INSERT INTO cpe_refs_fts(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val);
+ INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val);
END;
--- create refs delete trigger
-CREATE TRIGGER tr_cpe_refs_delete BEFORE DELETE ON cpe_refs
+-- create refs delete trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs
FOR EACH ROW BEGIN
- DELETE FROM cpe_refs_fts WHERE rowid = OLD.cpe_id;
+ DELETE FROM cpe_fts_refs
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
END;
PRAGMA user_version = 314159;
diff --git a/internal/dbstore/sql/old-cpe-search-all.sql b/internal/dbstore/sql/old-cpe-search-all.sql
new file mode 100644
index 0000000..18e286f
--- /dev/null
+++ b/internal/dbstore/sql/old-cpe-search-all.sql
@@ -0,0 +1,69 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT a.cpe_id,
+ MIN(a.rank) AS rank
+
+ FROM (
+ -- find title matches
+ SELECT b.cpe_id,
+ a.rank AS rank
+
+ FROM cpe_titles_fts a
+ JOIN cpe_titles b
+ ON (b.cpe_title_id = a.rowid)
+
+ WHERE cpe_titles_fts MATCH :q
+
+ UNION ALL
+
+ -- find reference matches
+ SELECT b.cpe_id,
+ a.rank
+
+ FROM cpe_refs_fts a
+ JOIN cpe_refs b
+ ON (b.cpe_ref_id = a.rowid)
+
+ WHERE cpe_refs_fts MATCH :q
+ ) a
+
+ GROUP BY a.cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/internal/dbstore/sql/table-exists.sql b/internal/dbstore/sql/table-exists.sql
new file mode 100644
index 0000000..09d32a4
--- /dev/null
+++ b/internal/dbstore/sql/table-exists.sql
@@ -0,0 +1,6 @@
+SELECT EXISTS(
+ SELECT 1
+ FROM sqlite_schema
+ WHERE type = 'table'
+ AND name = ?
+)