From 87c0c8d7ed4eb334dbd084d0ab0040589071b111 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Sun, 20 Feb 2022 12:06:43 -0500 Subject: dbstore: shrink CVE FTS index (test index from 120M to 86M) by removing all IDs except cve_id, concatenating reference tags, and removing severities --- dbstore/cvesearchrow.go | 4 +- dbstore/dbstore_test.go | 8 +-- dbstore/sql/feed/search.sql | 26 +++---- dbstore/sql/init.sql | 160 +++++++++++++++++++------------------------- 4 files changed, 90 insertions(+), 108 deletions(-) diff --git a/dbstore/cvesearchrow.go b/dbstore/cvesearchrow.go index 4128347..91920b9 100644 --- a/dbstore/cvesearchrow.go +++ b/dbstore/cvesearchrow.go @@ -6,8 +6,8 @@ import ( // CveSearch() result type CveSearchRow struct { - Id int64 `json:"cve_id"` // internal database CVE ID - CveId feed.CveId `json:"nvd_cve_id"` // NVD CVE ID + Id int64 `json:"cve_id"` // CVE database ID + NvdId feed.CveId `json:"nvd_id"` // NVD CVE ID Description string `json:"description"` // CVE description (english) V3BaseScore feed.Score `json:"v3_base_score"` // CVSSv3 base score V3Severity feed.Severity `json:"v3_severity"` // CVSSv3 severity diff --git a/dbstore/dbstore_test.go b/dbstore/dbstore_test.go index 5ec2103..69327d9 100644 --- a/dbstore/dbstore_test.go +++ b/dbstore/dbstore_test.go @@ -1027,20 +1027,20 @@ func TestCveSearch(t *testing.T) { exp: []CveSearchRow { CveSearchRow { Id: 9315, - CveId: getTestCveId(t, "CVE-2021-1366"), + NvdId: getTestCveId(t, "CVE-2021-1366"), Description: "A vulnerability in the interprocess communication (IPC) channel of Cisco AnyConnect Secure Mobility Client for Windows could allow an authenticated, local attacker to perform a DLL hijacking attack on an affected device if the VPN Posture (HostScan) Module is installed on the AnyConnect client. This vulnerability is due to insufficient validation of resources that are loaded by the application at run time. An attacker could exploit this vulnerability by sending a crafted IPC message to the AnyConnect process. A successful exploit could allow the attacker to execute arbitrary code on the affected machine with SYSTEM privileges. To exploit this vulnerability, the attacker needs valid credentials on the Windows system.", V3BaseScore: nvd_feed.Score(uint8(78)), V3Severity: nvd_feed.SeverityHigh, - Rank: -36.487476, + Rank: -37.51353, }, CveSearchRow { Id: 9495, - CveId: getTestCveId(t, "CVE-2021-1567"), + NvdId: getTestCveId(t, "CVE-2021-1567"), Description: "A vulnerability in the DLL loading mechanism of Cisco AnyConnect Secure Mobility Client for Windows could allow an authenticated, local attacker to perform a DLL hijacking attack on an affected device if the VPN Posture (HostScan) Module is installed on the AnyConnect client. This vulnerability is due to a race condition in the signature verification process for DLL files that are loaded on an affected device. An attacker could exploit this vulnerability by sending a series of crafted interprocess communication (IPC) messages to the AnyConnect process. A successful exploit could allow the attacker to execute arbitrary code on the affected device with SYSTEM privileges. To exploit this vulnerability, the attacker must have valid credentials on the Windows system.", V3BaseScore: nvd_feed.Score(uint8(67)), V3Severity: nvd_feed.SeverityMedium, - Rank: -34.62201, + Rank: -35.5376, }, }, }} diff --git a/dbstore/sql/feed/search.sql b/dbstore/sql/feed/search.sql index cb80470..bc3f946 100644 --- a/dbstore/sql/feed/search.sql +++ b/dbstore/sql/feed/search.sql @@ -2,10 +2,10 @@ -- FIXME: should this be moved to cve/search SELECT COALESCE(json_group_array(json_object( 'cve_id', b.cve_id, - 'nvd_cve_id', printf('CVE-%4d-%04d', b.cve_year, b.cve_num), - 'description', d.value, - 'v3_base_score', e.v3_base_score / 10.0, - 'v3_severity', f.name, + 'nvd_id', c.nvd_id, + 'description', e.value, + 'v3_base_score', f.v3_base_score / 10.0, + 'v3_severity', g.name, 'rank', a.min_rank )), '[]') @@ -18,15 +18,17 @@ SELECT COALESCE(json_group_array(json_object( ) a JOIN cves b ON (b.cve_id = a.cve_id) - JOIN cve_descriptions c + JOIN cve_nvd_ids c ON (c.cve_id = b.cve_id) - JOIN descriptions d - ON (d.desc_id = c.desc_id) - JOIN item_cvss_v3_impacts e - ON (e.item_id = b.item_id) - JOIN severities f - ON (f.severity_id = e.v3_severity_id) + JOIN cve_descriptions d + ON (d.cve_id = b.cve_id) + JOIN descriptions e + ON (e.desc_id = d.desc_id) + JOIN item_cvss_v3_impacts f + ON (f.item_id = b.item_id) + JOIN severities g + ON (g.severity_id = f.v3_severity_id) - WHERE d.lang = 'en' + WHERE e.lang LIKE 'en%' ORDER BY a.min_rank; diff --git a/dbstore/sql/init.sql b/dbstore/sql/init.sql index be5e4fb..cf9d82a 100644 --- a/dbstore/sql/init.sql +++ b/dbstore/sql/init.sql @@ -638,38 +638,20 @@ CREATE INDEX in_item_cvss_v3_impacts_v3_severity_id ON item_cvss_v3_impacts(v3_s -- corresponding cve tables (see triggers below). -- CREATE VIRTUAL TABLE cve_fts_all USING fts5( - -- parent item ID - item_id UNINDEXED, - -- parent CVE id cve_id UNINDEXED, - -- cve description ID - cve_desc_id UNINDEXED, - - -- problem type ID - pt_desc_id UNINDEXED, - - -- cve ref ID - cve_ref_id UNINDEXED, - - -- v2 severity ID - v2_severity_id UNINDEXED, - - -- v3 severity ID - v3_severity_id UNINDEXED, - -- NVD CVE ID (e.g. CVE-XXXX-YYYY) - nvd_cve_id, + nvd_id, -- CVE assigner assigner, - -- CVE description - cve_desc, + -- CVE descriptions + cve_descs, - -- problem type description - pt_desc, + -- problem type descriptions + pt_descs, -- reference URL ref_url, @@ -680,14 +662,8 @@ CREATE VIRTUAL TABLE cve_fts_all USING fts5( -- reference source ref_source, - -- reference tag - ref_tag, - - -- cvss v2 severity - v2_severity, - - -- cvss v3 severity - v3_severity, + -- reference tags + ref_tags, -- use porter stemming tokenize = 'porter' @@ -708,6 +684,7 @@ CREATE TRIGGER tr_cve_fts_refresh_before_insert DELETE FROM cve_fts_all; END; +-- -- create insert trigger on item_cvss_v3_impacts -- -- Notes: @@ -715,78 +692,81 @@ CREATE TRIGGER tr_cve_fts_refresh_before_insert -- - no index: ~31M -- - per-table triggers: ~320M -- - full refresh trigger: ~120M +-- - refresh trigger, group_concat(), IDs/severities removed: 86M -- -- TODO: --- * use group_concat() to concatenate tags --- * remove all IDs except cve_id --- * remove severities +-- - [x] use group_concat() to concatenate tags +-- - [x] remove all IDs except cve_id +-- - [x] remove severities -- CREATE TRIGGER tr_cve_fts_refresh_after_insert AFTER INSERT ON cve_fts_refresh FOR EACH ROW BEGIN INSERT INTO cve_fts_all( - item_id, cve_id, - cve_desc_id, - pt_desc_id, - cve_ref_id, - v2_severity_id, - v3_severity_id, - - nvd_cve_id, + nvd_id, assigner, - cve_desc, - pt_desc, + cve_descs, + pt_descs, ref_url, ref_name, ref_source, - ref_tag, - v2_severity, - v3_severity + ref_tags ) - SELECT a.item_id, -- item_id - a.cve_id, -- cve_id - d.desc_id, -- cve_desc_id - g.desc_id, -- pt_desc_id - h.cve_ref_id, -- cve_ref_id - j.v2_severity_id, -- v2_severity_id - l.v3_severity_id, -- v3_severity_id - n.nvd_id AS nvd_cve_id, -- nvd_cve_id - b.assigner, -- assigner - COALESCE(d.value, ''), -- cve_desc - COALESCE(g.value, ''), -- pt_desc - COALESCE(h.url, ''), -- ref_url - COALESCE(h.name, ''), -- ref_name - COALESCE(h.source, ''), -- ref_source - COALESCE(i.tag, ''), -- ref_tag - COALESCE(k.name, ''), -- v2_severity - COALESCE(m.name, '') -- v3_severity + SELECT a.cve_id, -- cve_id + b.nvd_id, -- nvd_id + c.assigner, -- assigner + COALESCE(d.descriptions, ''), -- cve_descs + COALESCE(e.descriptions, ''), -- pt_descs + COALESCE(f.url, ''), -- ref_url + COALESCE(f.name, ''), -- ref_name + COALESCE(f.source, ''), -- ref_source + COALESCE(g.tags, '') -- ref_tags FROM cves a - JOIN assigners b - ON (b.assigner_id = a.assigner_id) - LEFT JOIN cve_descriptions c - ON (c.cve_id = a.cve_id) - LEFT JOIN descriptions d - ON (d.desc_id = c.desc_id) - LEFT JOIN cve_problems e - ON (e.cve_id = a.cve_id) - LEFT JOIN cve_problem_descriptions f - ON (f.pt_id = e.pt_id) - LEFT JOIN descriptions g - ON (g.desc_id = f.desc_id) - LEFT JOIN cve_refs h - ON (h.cve_id = a.cve_id) - LEFT JOIN cve_ref_tags i - ON (i.cve_ref_id = h.cve_ref_id) - LEFT JOIN item_cvss_v2_impacts j - ON (j.item_id = a.item_id) - LEFT JOIN severities k - ON (k.severity_id = j.v2_severity_id) - LEFT JOIN item_cvss_v3_impacts l - ON (l.item_id = a.item_id) - LEFT JOIN severities m - ON (m.severity_id = l.v3_severity_id) - JOIN cve_nvd_ids n - ON (n.cve_id = a.cve_id); + JOIN cve_nvd_ids b + ON (b.cve_id = a.cve_id) + JOIN assigners c + ON (c.assigner_id = a.assigner_id) + + LEFT JOIN ( + -- concatenate english cve descriptions + SELECT a.cve_id, + group_concat(b.value) AS descriptions + + FROM cve_descriptions a + JOIN descriptions b + ON (b.desc_id = a.desc_id) + + WHERE b.lang LIKE 'en%' + + GROUP BY a.cve_id + ) d ON (d.cve_id = a.cve_id) + + LEFT JOIN ( + -- concatenate english problem type descriptions + SELECT a.cve_id, + group_concat(c.value) AS descriptions + + FROM cve_problems a + JOIN cve_problem_descriptions b + ON (b.pt_id = a.pt_id) + JOIN descriptions c + ON (c.desc_id = b.desc_id) + + WHERE c.lang LIKE 'en%' + + GROUP BY a.cve_id + ) e ON (e.cve_id = a.cve_id) + + LEFT JOIN cve_refs f + ON (f.cve_id = a.cve_id) + + LEFT JOIN ( + -- concatenate reference tags + SELECT cve_ref_id, + group_concat(tag) AS tags + FROM cve_ref_tags + GROUP BY cve_ref_id + ) g ON (g.cve_ref_id = f.cve_ref_id); END; -- cgit v1.2.3