aboutsummaryrefslogtreecommitdiff
path: root/dbstore/sql
diff options
context:
space:
mode:
Diffstat (limited to 'dbstore/sql')
-rw-r--r--dbstore/sql/cpe-search-all.sql46
-rw-r--r--dbstore/sql/cpe-search-ref.sql47
-rw-r--r--dbstore/sql/cpe-search-title.sql44
-rw-r--r--dbstore/sql/fill-fts.sql10
-rw-r--r--dbstore/sql/init.sql180
-rw-r--r--dbstore/sql/insert-cpe.sql1
-rw-r--r--dbstore/sql/insert-ref.sql1
-rw-r--r--dbstore/sql/insert-title.sql1
-rw-r--r--dbstore/sql/old-cpe-search-all.sql69
-rw-r--r--dbstore/sql/table-exists.sql6
10 files changed, 405 insertions, 0 deletions
diff --git a/dbstore/sql/cpe-search-all.sql b/dbstore/sql/cpe-search-all.sql
new file mode 100644
index 0000000..3260902
--- /dev/null
+++ b/dbstore/sql/cpe-search-all.sql
@@ -0,0 +1,46 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_all a
+ WHERE cpe_fts_all MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/cpe-search-ref.sql b/dbstore/sql/cpe-search-ref.sql
new file mode 100644
index 0000000..a2d3be6
--- /dev/null
+++ b/dbstore/sql/cpe-search-ref.sql
@@ -0,0 +1,47 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ -- there can be multiple matching references for a given
+ -- CPE, so use MIN() to limit the match to the most
+ -- "relevant", according to bm25()
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_refs
+ WHERE cpe_fts_refs MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/cpe-search-title.sql b/dbstore/sql/cpe-search-title.sql
new file mode 100644
index 0000000..7421ffc
--- /dev/null
+++ b/dbstore/sql/cpe-search-title.sql
@@ -0,0 +1,44 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ -- find matching cpes
+ FROM cpes a
+ JOIN (
+ SELECT cpe_id,
+ MIN(rank) AS rank
+ FROM cpe_fts_titles
+ WHERE cpe_fts_titles MATCH :q
+ GROUP BY cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank;
diff --git a/dbstore/sql/fill-fts.sql b/dbstore/sql/fill-fts.sql
new file mode 100644
index 0000000..5329ca7
--- /dev/null
+++ b/dbstore/sql/fill-fts.sql
@@ -0,0 +1,10 @@
+-- populate fts table
+INSERT INTO cpe_fts(rowid, title)
+ SELECT a.cpe_id,
+ COALESCE(b.val, '')
+
+ FROM cpes a
+ LEFT JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE b.lang = 'en-US';
diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql
new file mode 100644
index 0000000..37eef54
--- /dev/null
+++ b/dbstore/sql/init.sql
@@ -0,0 +1,180 @@
+-- enable foreign keys
+PRAGMA foreign_keys = true;
+
+-- create cpes table
+CREATE TABLE cpes (
+ -- cpe unique ID
+ cpe_id INTEGER PRIMARY KEY,
+
+ -- cpe uri
+ cpe_uri TEXT UNIQUE NOT NULL,
+
+ -- cpe 2.3 formatting string
+ cpe23 TEXT UNIQUE NOT NULL
+);
+
+-- create all fts table (cross join across all
+-- titles and references, populated by triggers)
+CREATE VIRTUAL TABLE cpe_fts_all USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create titles table
+CREATE TABLE cpe_titles (
+ -- title primary key
+ cpe_title_id INTEGER PRIMARY KEY,
+
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- language code
+ lang TEXT NOT NULL
+ CHECK (LENGTH(lang) > 0),
+
+ -- text value
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0),
+
+ -- prevent duplicate titles of the same name
+ UNIQUE (cpe_id, lang)
+);
+
+-- create titles fts table
+CREATE VIRTUAL TABLE cpe_fts_titles USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- title
+ title,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_all AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ NEW.val,
+ b.href,
+ b.val
+
+ FROM cpes a
+ JOIN cpe_refs b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id;
+ END;
+
+-- create titles delete all fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_all BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
+ END;
+
+-- create titles all fts insert trigger
+CREATE TRIGGER tr_cpe_titles_insert_fts_titles AFTER INSERT ON cpe_titles
+ FOR EACH ROW WHEN (NEW.lang = 'en-US') BEGIN
+ INSERT INTO cpe_fts_titles(cpe_id, title) VALUES (NEW.cpe_id, NEW.val);
+ END;
+
+-- create titles delete titles fts trigger
+CREATE TRIGGER tr_cpe_titles_delete_fts_titles BEFORE DELETE ON cpe_titles
+ FOR EACH ROW WHEN (OLD.lang = 'en-US') BEGIN
+ DELETE FROM cpe_fts_titles
+ WHERE cpe_id = OLD.cpe_id
+ AND title = OLD.val;
+ END;
+
+-- create refs table
+CREATE TABLE cpe_refs (
+ -- reference unique id
+ cpe_ref_id INTEGER PRIMARY KEY,
+
+ -- external CPE id
+ cpe_id INT NOT NULL
+ REFERENCES cpes(cpe_id),
+
+ -- url
+ href TEXT NOT NULL
+ CHECK (LENGTH(href) > 0),
+
+ -- text description
+ val TEXT NOT NULL
+ CHECK (LENGTH(val) > 0)
+);
+
+-- create refs fts table
+CREATE VIRTUAL TABLE cpe_fts_refs USING fts5(
+ -- parent CPE id
+ cpe_id UNINDEXED,
+
+ -- reference URL
+ href,
+
+ -- reference name
+ val,
+
+ -- use porter stemming
+ tokenize = 'porter'
+);
+
+-- create refs insert trigger for all fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_all AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_fts_all(cpe_id, title, href, val)
+ SELECT NEW.cpe_id,
+ b.val,
+ NEW.href,
+ NEW.val
+
+ FROM cpes a
+ JOIN cpe_titles b
+ ON (b.cpe_id = a.cpe_id)
+
+ WHERE a.cpe_id = NEW.cpe_id
+ AND b.lang = 'en-US';
+ END;
+
+-- create refs delete trigger for all fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_all BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_fts_all
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
+ END;
+
+-- create refs insert trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_insert_fts_refs AFTER INSERT ON cpe_refs
+ FOR EACH ROW BEGIN
+ INSERT INTO cpe_fts_refs(cpe_id, href, val) VALUES (NEW.cpe_id, NEW.href, NEW.val);
+ END;
+
+-- create refs delete trigger for refs fts
+CREATE TRIGGER tr_cpe_refs_delete_fts_refs BEFORE DELETE ON cpe_refs
+ FOR EACH ROW BEGIN
+ DELETE FROM cpe_fts_refs
+ WHERE cpe_id = OLD.cpe_id
+ AND href = OLD.lang
+ AND val = OLD.val;
+ END;
+
+PRAGMA user_version = 314159;
diff --git a/dbstore/sql/insert-cpe.sql b/dbstore/sql/insert-cpe.sql
new file mode 100644
index 0000000..dca1c2b
--- /dev/null
+++ b/dbstore/sql/insert-cpe.sql
@@ -0,0 +1 @@
+INSERT INTO cpes(cpe_uri, cpe23) VALUES (?, ?);
diff --git a/dbstore/sql/insert-ref.sql b/dbstore/sql/insert-ref.sql
new file mode 100644
index 0000000..a39ab1c
--- /dev/null
+++ b/dbstore/sql/insert-ref.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_refs(cpe_id, href, val) VALUES (?, ?, ?);
diff --git a/dbstore/sql/insert-title.sql b/dbstore/sql/insert-title.sql
new file mode 100644
index 0000000..5045d95
--- /dev/null
+++ b/dbstore/sql/insert-title.sql
@@ -0,0 +1 @@
+INSERT INTO cpe_titles(cpe_id, lang, val) VALUES (?, ?, ?);
diff --git a/dbstore/sql/old-cpe-search-all.sql b/dbstore/sql/old-cpe-search-all.sql
new file mode 100644
index 0000000..18e286f
--- /dev/null
+++ b/dbstore/sql/old-cpe-search-all.sql
@@ -0,0 +1,69 @@
+SELECT a.cpe_id,
+ a.cpe23,
+ COALESCE(c.titles, '[]') AS titles,
+ COALESCE(d.refs, '[]') AS refs,
+ b.rank
+
+ FROM cpes a
+ JOIN (
+ -- limit to "best" match (e.g., lowest rank) for given CPE
+ -- NOTE: it's not clear to me whether cross-index bm25()
+ -- comparisons are valid
+ SELECT a.cpe_id,
+ MIN(a.rank) AS rank
+
+ FROM (
+ -- find title matches
+ SELECT b.cpe_id,
+ a.rank AS rank
+
+ FROM cpe_titles_fts a
+ JOIN cpe_titles b
+ ON (b.cpe_title_id = a.rowid)
+
+ WHERE cpe_titles_fts MATCH :q
+
+ UNION ALL
+
+ -- find reference matches
+ SELECT b.cpe_id,
+ a.rank
+
+ FROM cpe_refs_fts a
+ JOIN cpe_refs b
+ ON (b.cpe_ref_id = a.rowid)
+
+ WHERE cpe_refs_fts MATCH :q
+ ) a
+
+ GROUP BY a.cpe_id
+ ) b ON (b.cpe_id = a.cpe_id)
+
+ -- get all titles, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'lang', lang,
+ 'text', val
+ )) AS titles
+
+ FROM cpe_titles
+
+ GROUP BY cpe_id
+ ) c ON (c.cpe_id = b.cpe_id)
+
+ -- get all refs, grouped by cpe
+ LEFT JOIN (
+ SELECT cpe_id,
+ json_group_array(json_object(
+ 'href', href,
+ 'text', val
+ )) AS refs
+
+ FROM cpe_refs
+
+ GROUP BY cpe_id
+ ) d ON (d.cpe_id = b.cpe_id)
+
+ -- order by rank
+ ORDER BY b.rank
diff --git a/dbstore/sql/table-exists.sql b/dbstore/sql/table-exists.sql
new file mode 100644
index 0000000..09d32a4
--- /dev/null
+++ b/dbstore/sql/table-exists.sql
@@ -0,0 +1,6 @@
+SELECT EXISTS(
+ SELECT 1
+ FROM sqlite_schema
+ WHERE type = 'table'
+ AND name = ?
+)