diff options
author | Paul Duncan <pabs@pablotron.org> | 2022-02-20 12:06:43 -0500 |
---|---|---|
committer | Paul Duncan <pabs@pablotron.org> | 2022-02-20 12:06:43 -0500 |
commit | 87c0c8d7ed4eb334dbd084d0ab0040589071b111 (patch) | |
tree | 269849b568fa5e27847af26c2ddf12e6bc36a815 /dbstore/sql | |
parent | 14dc55d984a9791aeb7d242427a3d54f879f678c (diff) | |
download | cvez-87c0c8d7ed4eb334dbd084d0ab0040589071b111.tar.bz2 cvez-87c0c8d7ed4eb334dbd084d0ab0040589071b111.zip |
dbstore: shrink CVE FTS index (test index from 120M to 86M) by removing all IDs except cve_id, concatenating reference tags, and removing severities
Diffstat (limited to 'dbstore/sql')
-rw-r--r-- | dbstore/sql/feed/search.sql | 26 | ||||
-rw-r--r-- | dbstore/sql/init.sql | 160 |
2 files changed, 84 insertions, 102 deletions
diff --git a/dbstore/sql/feed/search.sql b/dbstore/sql/feed/search.sql index cb80470..bc3f946 100644 --- a/dbstore/sql/feed/search.sql +++ b/dbstore/sql/feed/search.sql @@ -2,10 +2,10 @@ -- FIXME: should this be moved to cve/search SELECT COALESCE(json_group_array(json_object( 'cve_id', b.cve_id, - 'nvd_cve_id', printf('CVE-%4d-%04d', b.cve_year, b.cve_num), - 'description', d.value, - 'v3_base_score', e.v3_base_score / 10.0, - 'v3_severity', f.name, + 'nvd_id', c.nvd_id, + 'description', e.value, + 'v3_base_score', f.v3_base_score / 10.0, + 'v3_severity', g.name, 'rank', a.min_rank )), '[]') @@ -18,15 +18,17 @@ SELECT COALESCE(json_group_array(json_object( ) a JOIN cves b ON (b.cve_id = a.cve_id) - JOIN cve_descriptions c + JOIN cve_nvd_ids c ON (c.cve_id = b.cve_id) - JOIN descriptions d - ON (d.desc_id = c.desc_id) - JOIN item_cvss_v3_impacts e - ON (e.item_id = b.item_id) - JOIN severities f - ON (f.severity_id = e.v3_severity_id) + JOIN cve_descriptions d + ON (d.cve_id = b.cve_id) + JOIN descriptions e + ON (e.desc_id = d.desc_id) + JOIN item_cvss_v3_impacts f + ON (f.item_id = b.item_id) + JOIN severities g + ON (g.severity_id = f.v3_severity_id) - WHERE d.lang = 'en' + WHERE e.lang LIKE 'en%' ORDER BY a.min_rank; diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql index be5e4fb..cf9d82a 100644 --- a/dbstore/sql/init.sql +++ b/dbstore/sql/init.sql @@ -638,38 +638,20 @@ CREATE INDEX in_item_cvss_v3_impacts_v3_severity_id ON item_cvss_v3_impacts(v3_s -- corresponding cve tables (see triggers below). -- CREATE VIRTUAL TABLE cve_fts_all USING fts5( - -- parent item ID - item_id UNINDEXED, - -- parent CVE id cve_id UNINDEXED, - -- cve description ID - cve_desc_id UNINDEXED, - - -- problem type ID - pt_desc_id UNINDEXED, - - -- cve ref ID - cve_ref_id UNINDEXED, - - -- v2 severity ID - v2_severity_id UNINDEXED, - - -- v3 severity ID - v3_severity_id UNINDEXED, - -- NVD CVE ID (e.g. CVE-XXXX-YYYY) - nvd_cve_id, + nvd_id, -- CVE assigner assigner, - -- CVE description - cve_desc, + -- CVE descriptions + cve_descs, - -- problem type description - pt_desc, + -- problem type descriptions + pt_descs, -- reference URL ref_url, @@ -680,14 +662,8 @@ CREATE VIRTUAL TABLE cve_fts_all USING fts5( -- reference source ref_source, - -- reference tag - ref_tag, - - -- cvss v2 severity - v2_severity, - - -- cvss v3 severity - v3_severity, + -- reference tags + ref_tags, -- use porter stemming tokenize = 'porter' @@ -708,6 +684,7 @@ CREATE TRIGGER tr_cve_fts_refresh_before_insert DELETE FROM cve_fts_all; END; +-- -- create insert trigger on item_cvss_v3_impacts -- -- Notes: @@ -715,78 +692,81 @@ CREATE TRIGGER tr_cve_fts_refresh_before_insert -- - no index: ~31M -- - per-table triggers: ~320M -- - full refresh trigger: ~120M +-- - refresh trigger, group_concat(), IDs/severities removed: 86M -- -- TODO: --- * use group_concat() to concatenate tags --- * remove all IDs except cve_id --- * remove severities +-- - [x] use group_concat() to concatenate tags +-- - [x] remove all IDs except cve_id +-- - [x] remove severities -- CREATE TRIGGER tr_cve_fts_refresh_after_insert AFTER INSERT ON cve_fts_refresh FOR EACH ROW BEGIN INSERT INTO cve_fts_all( - item_id, cve_id, - cve_desc_id, - pt_desc_id, - cve_ref_id, - v2_severity_id, - v3_severity_id, - - nvd_cve_id, + nvd_id, assigner, - cve_desc, - pt_desc, + cve_descs, + pt_descs, ref_url, ref_name, ref_source, - ref_tag, - v2_severity, - v3_severity + ref_tags ) - SELECT a.item_id, -- item_id - a.cve_id, -- cve_id - d.desc_id, -- cve_desc_id - g.desc_id, -- pt_desc_id - h.cve_ref_id, -- cve_ref_id - j.v2_severity_id, -- v2_severity_id - l.v3_severity_id, -- v3_severity_id - n.nvd_id AS nvd_cve_id, -- nvd_cve_id - b.assigner, -- assigner - COALESCE(d.value, ''), -- cve_desc - COALESCE(g.value, ''), -- pt_desc - COALESCE(h.url, ''), -- ref_url - COALESCE(h.name, ''), -- ref_name - COALESCE(h.source, ''), -- ref_source - COALESCE(i.tag, ''), -- ref_tag - COALESCE(k.name, ''), -- v2_severity - COALESCE(m.name, '') -- v3_severity + SELECT a.cve_id, -- cve_id + b.nvd_id, -- nvd_id + c.assigner, -- assigner + COALESCE(d.descriptions, ''), -- cve_descs + COALESCE(e.descriptions, ''), -- pt_descs + COALESCE(f.url, ''), -- ref_url + COALESCE(f.name, ''), -- ref_name + COALESCE(f.source, ''), -- ref_source + COALESCE(g.tags, '') -- ref_tags FROM cves a - JOIN assigners b - ON (b.assigner_id = a.assigner_id) - LEFT JOIN cve_descriptions c - ON (c.cve_id = a.cve_id) - LEFT JOIN descriptions d - ON (d.desc_id = c.desc_id) - LEFT JOIN cve_problems e - ON (e.cve_id = a.cve_id) - LEFT JOIN cve_problem_descriptions f - ON (f.pt_id = e.pt_id) - LEFT JOIN descriptions g - ON (g.desc_id = f.desc_id) - LEFT JOIN cve_refs h - ON (h.cve_id = a.cve_id) - LEFT JOIN cve_ref_tags i - ON (i.cve_ref_id = h.cve_ref_id) - LEFT JOIN item_cvss_v2_impacts j - ON (j.item_id = a.item_id) - LEFT JOIN severities k - ON (k.severity_id = j.v2_severity_id) - LEFT JOIN item_cvss_v3_impacts l - ON (l.item_id = a.item_id) - LEFT JOIN severities m - ON (m.severity_id = l.v3_severity_id) - JOIN cve_nvd_ids n - ON (n.cve_id = a.cve_id); + JOIN cve_nvd_ids b + ON (b.cve_id = a.cve_id) + JOIN assigners c + ON (c.assigner_id = a.assigner_id) + + LEFT JOIN ( + -- concatenate english cve descriptions + SELECT a.cve_id, + group_concat(b.value) AS descriptions + + FROM cve_descriptions a + JOIN descriptions b + ON (b.desc_id = a.desc_id) + + WHERE b.lang LIKE 'en%' + + GROUP BY a.cve_id + ) d ON (d.cve_id = a.cve_id) + + LEFT JOIN ( + -- concatenate english problem type descriptions + SELECT a.cve_id, + group_concat(c.value) AS descriptions + + FROM cve_problems a + JOIN cve_problem_descriptions b + ON (b.pt_id = a.pt_id) + JOIN descriptions c + ON (c.desc_id = b.desc_id) + + WHERE c.lang LIKE 'en%' + + GROUP BY a.cve_id + ) e ON (e.cve_id = a.cve_id) + + LEFT JOIN cve_refs f + ON (f.cve_id = a.cve_id) + + LEFT JOIN ( + -- concatenate reference tags + SELECT cve_ref_id, + group_concat(tag) AS tags + FROM cve_ref_tags + GROUP BY cve_ref_id + ) g ON (g.cve_ref_id = f.cve_ref_id); END; |