From fac967e3a0ac4d999a58d889e795a12fa9ee96e0 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Wed, 23 Feb 2022 22:07:12 -0500 Subject: nvdmirror: full rewrite (still not working correctly) --- nvdmirror/nvdmirror.go | 494 +++++++++++++++++++++++++++++++++++--------- nvdmirror/nvdmirror_test.go | 65 +++--- 2 files changed, 427 insertions(+), 132 deletions(-) diff --git a/nvdmirror/nvdmirror.go b/nvdmirror/nvdmirror.go index cba638b..4faf68e 100644 --- a/nvdmirror/nvdmirror.go +++ b/nvdmirror/nvdmirror.go @@ -2,28 +2,32 @@ package nvdmirror import ( + "bytes" + "crypto/sha256" + "errors" "fmt" "github.com/pablotron/cvez/atomictemp" + "github.com/pablotron/cvez/feed" + "github.com/rs/zerolog" "github.com/rs/zerolog/log" "io" + "io/fs" "net/http" "net/url" + "os" "path/filepath" "time" ) -// default user agent (FIXME: make configurable) -var defaultUserAgent = "cvez/0.1.0" - // Sync() configuration. type SyncConfig struct { // CVE 1.1 Base URL. The full meta and JSON URLs are constructed by // appending the file name to this base. Cve11BaseUrl string - // CPE 1.0 base URL. The full meta and JSON URLs are constructed by - // appending the file name to this base. - Cpe10MatchBaseUrl string + // CPE Match 1.0 base URL. The full meta and JSON URLs are + // constructed by appending the file name to this base. + CpeMatch10BaseUrl string // CPE 2.3 dictionary URL. Cpe23DictUrl string @@ -38,77 +42,197 @@ type SyncConfig struct { IdleConnTimeout time.Duration } +// NVD URLs +var DefaultConfig = SyncConfig { + Cve11BaseUrl: "https://nvd.nist.gov/feeds/json/cve/1.1", + CpeMatch10BaseUrl: "https://nvd.nist.gov/feeds/json/cpematch/1.0", + Cpe23DictUrl: "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz", + UserAgent: "cvez/0.1.0", +} + +// Initial (first) CVE year. +const baseYear = 2002 + +// Additional non-year CVE feeds. +var cveExtraFiles = []string { + "modified", + "recent", +} + // Get user agent string. func (me SyncConfig) GetUserAgent() string { if len(me.UserAgent) > 0 { return me.UserAgent } else { - return defaultUserAgent + return DefaultConfig.UserAgent } } -// NVD URLs -var DefaultConfig = SyncConfig { - Cve11BaseUrl: "https://nvd.nist.gov/feeds/json/cve/1.1", - Cpe10MatchBaseUrl: "https://nvd.nist.gov/feeds/json/cpematch/1.0", - Cpe23DictUrl: "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz", +// Get URL for CVE feed file. +func (me SyncConfig) GetCveUrl(id, ext string) string { + return fmt.Sprintf("%s/nvdcve-1.1-%s.%s", me.Cve11BaseUrl, id, ext) +} + +// Get URL for CVE feed file for given year. +func (me SyncConfig) GetCveYearUrl(year int, ext string) string { + return me.GetCveUrl(fmt.Sprintf("%04d", year), ext) } -var extraFiles = []string { - "nvdcve-1.1-modified", - "nvdcve-1.1-recent", +// Get URL for CPE match file. +func (me SyncConfig) GetCpeMatchUrl(ext string) string { + return fmt.Sprintf("%s/nvdcpematch-1.0.%s", me.CpeMatch10BaseUrl, ext) +} + +// Get CPE dictionary URL. +func (me SyncConfig) GetCpeDictUrl() string { + if len(me.Cpe23DictUrl) > 0 { + return me.Cpe23DictUrl + } else { + return DefaultConfig.Cpe23DictUrl + } +} + +// get meta URL map. +func (me SyncConfig) getMetaUrls() map[string]string { + // calculate total number of years + numYears := time.Now().Year() - baseYear + 1 + + r := make(map[string]string) + + // fetch cve feed metas + for i := 0; i < numYears; i++ { + metaUrl := me.GetCveYearUrl(baseYear + i, "meta") + feedUrl := me.GetCveYearUrl(baseYear + i, "json.gz") + r[metaUrl] = feedUrl + } + + // fetch cve extra file metas + for _, s := range(cveExtraFiles) { + metaUrl := me.GetCveUrl(s, "meta") + feedUrl := me.GetCveUrl(s, "json.gz") + r[metaUrl] = feedUrl + } + + { + // add cpe match + metaUrl := me.GetCpeMatchUrl("meta") + feedUrl := me.GetCpeMatchUrl("json.gz") + r[metaUrl] = feedUrl + } + + // return map + return r } -// fetch result +// Fetch result. type fetchResult struct { - src string // source URL - err error // fetch result - modified bool // was the result modified? - lastModified string // last modified response header - etag string // etag response header + src string // source URL + err error // fetch result + modified bool // Was the result modified? + path string // Destination file. + headers http.Header // response headers } -// base CVE year -const baseYear = 2002 +// Check result. +type checkResult struct { + metaUrl string // meta full url + metaPath string // meta file path + fullPath string // full file path + err error // error + match bool // true if size and hash match +} + +type syncMessage struct { + fetch fetchResult // fetch result + check checkResult // check result +} + +// sync context +type syncContext struct { + config SyncConfig // sync config + client *http.Client // shared HTTP client + cache Cache // cache + dstDir string // destination directory + ch chan syncMessage // sync message channel +} + +// Create sync context. +func newSyncContext(config SyncConfig, cache Cache, dstDir string) syncContext { + // create shared transport and client + tr := &http.Transport { + MaxIdleConns: config.MaxIdleConns, + IdleConnTimeout: config.IdleConnTimeout, + } + + return syncContext { + config: config, + client: &http.Client{Transport: tr}, + cache: cache, + dstDir: dstDir, + ch: make(chan syncMessage), + } +} + +// Build request +func (me syncContext) getRequest(srcUrl string) (*http.Request, error) { + // create HTTP request + req, err := http.NewRequest("GET", srcUrl, nil) + if err != nil { + return nil, err + } -func fetch(ch chan fetchResult, config SyncConfig, cache Cache, client *http.Client, srcUrl string, dstDir string) { + // Add user-agent, if-none-match, and if-modified-since headers. + req.Header.Add("user-agent", me.config.GetUserAgent()) + if headers, ok := me.cache.Get(srcUrl); ok { + for k, v := range(headers) { + req.Header.Add(k, v) + } + } + + // return success + return req, nil +} + +// Fetch URL and write result to destination directory. +// +// Note: This method is called from a goroutine and writes the results +// back via the member channel. +func (me syncContext) fetch(srcUrl string) { // parse source url src, err := url.Parse(srcUrl) if err != nil { - ch <- fetchResult { src: srcUrl, err: err } + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl, err: err }, + } return } // build destination path - path := filepath.Join(dstDir, filepath.Base(src.Path)) + path := filepath.Join(me.dstDir, filepath.Base(src.Path)) log.Debug().Str("url", srcUrl).Str("path", path).Send() // create request - req, err := http.NewRequest("GET", srcUrl, nil) + req, err := me.getRequest(srcUrl) if err != nil { - ch <- fetchResult { src: srcUrl, err: err } - return - } - - // add request headers - req.Header.Add("user-agent", config.GetUserAgent()) - if headers, ok := cache.Get(srcUrl); ok { - for k, v := range(headers) { - req.Header.Add(k, v) + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl, err: err }, } + return } // send request - resp, err := client.Do(req) + resp, err := me.client.Do(req) if err != nil { - ch <- fetchResult { src: srcUrl, err: err } + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl, err: err }, + } return } defer resp.Body.Close() switch resp.StatusCode { - case 200: - // create temporary output file + case 200: // success + // write to output file err := atomictemp.Create(path, func(f io.Writer) error { _, err := io.Copy(f, resp.Body) return err @@ -116,83 +240,263 @@ func fetch(ch chan fetchResult, config SyncConfig, cache Cache, client *http.Cli if err != nil { // write failed - ch <- fetchResult { src: srcUrl, err: err, modified: false } + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl, err: err }, + } } else { - ch <- fetchResult { - src: srcUrl, - modified: true, - lastModified: resp.Header.Get("last-modified"), - etag: resp.Header.Get("etag"), + me.ch <- syncMessage { + fetch: fetchResult { + src: srcUrl, + modified: true, + path: path, + headers: resp.Header, + }, } } - case 304: - ch <- fetchResult { src: srcUrl, modified: false } - default: + case 304: // not modified + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl }, + } + default: // error code := resp.StatusCode err := fmt.Errorf("%d: %s", code, http.StatusText(code)) - ch <- fetchResult { src: srcUrl, err: err } + me.ch <- syncMessage { + fetch: fetchResult { src: srcUrl, err: err }, + } } } -func Sync(config SyncConfig, cache Cache, dstDir string) error { - log.Debug().Str("dstDir", dstDir).Msg("Sync") +// read hash from given meta file. +func (me syncContext) getMeta(path string) (*feed.Meta, error) { + // open meta file + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() - // create fetch result channel - ch := make(chan fetchResult) + // parse meta + return feed.NewMeta(f) +} - // create shared transport and client - tr := &http.Transport{ - MaxIdleConns: config.MaxIdleConns, - IdleConnTimeout: config.IdleConnTimeout, +// get hash of file in destination directory. +func (me syncContext) getFileHash(path string) ([32]byte, error) { + var r [32]byte + + // open file + f, err := os.Open(path) + if err != nil { + return r, err } - client := &http.Client{Transport: tr} + defer f.Close() - // calculate total number of years - numYears := time.Now().Year() - baseYear + 1 + // hash file + hash := sha256.New() + if _, err := io.Copy(hash, f); err != nil { + return r, err + } - // fetch cve feed metas - for year := baseYear; year < baseYear + numYears; year++ { - // build url - url := fmt.Sprintf("%s/nvdcve-1.1-%04d.meta", config.Cve11BaseUrl, year) - log.Debug().Int("year", year).Str("url", url).Send() - go fetch(ch, config, cache, client, url, dstDir) + // copy sum to result, return success + hash.Sum(r[:]) + return r, nil +} + +// Get file size, in bytes. +func getFileSize(path string) (uint64, error) { + // verify that full path exists + if st, err := os.Stat(path); err != nil { + return 0, err + } else { + return uint64(st.Size()), err + } +} + +// Check the size and hash in the metadata file against the full file. +// +// Note: This method is called from a goroutine and returns it's value +// via the internal channel. +func (me syncContext) check(metaUrl, fullUrl string) { + // build result + r := syncMessage { + check: checkResult { + metaUrl: metaUrl, + // build paths + metaPath: filepath.Join(me.dstDir, filepath.Base(metaUrl)), + fullPath: filepath.Join(me.dstDir, filepath.Base(fullUrl)), + }, } - // fetch cve extra file metas - for _, s := range(extraFiles) { - url := fmt.Sprintf("%s/%s.meta", config.Cve11BaseUrl, s) - log.Debug().Str("file", s).Str("url", url).Send() - go fetch(ch, config, cache, client, url, dstDir) + // get size of full file + size, err := getFileSize(r.check.fullPath) + if errors.Is(err, fs.ErrNotExist) { + r.check.match = false + me.ch <- r + return + } else if err != nil { + r.check.err = err + me.ch <- r + return + } + + // get meta hash + m, err := me.getMeta(r.check.metaPath) + if err != nil { + r.check.err = err + me.ch <- r + return } - // read results - for i := 0; i < numYears + len(extraFiles); i++ { - if r := <-ch; r.err != nil { - log.Error().Str("url", r.src).Err(r.err).Send() - // FIXME: errs = append(errs, r) + // check for file size match + if size != m.GzSize { + r.check.match = false + me.ch <- r + return + } + + // get full hash + fh, err := me.getFileHash(r.check.fullPath) + if err != nil { + r.check.err = err + me.ch <- r + return + } + + // return result + r.check.match = (bytes.Compare(m.Sha256[:], fh[:]) == 0) + me.ch <- r +} + +// log array of strings +func logArray(key string, strs []string) { + // populate array + a := zerolog.Arr() + for _, v := range(strs) { + a.Str(v) + } + + // log array + log.Info().Array(key, a).Send() +} + +// Sync to destination directory and return an array of updated files. +func Sync(config SyncConfig, cache Cache, dstDir string) []string { + // log.Debug().Str("dstDir", dstDir).Msg("Sync") + + // build sync context + ctx := newSyncContext(config, cache, dstDir) + + // get meta URL to full URL map + metaUrls := config.getMetaUrls() + + // fetch meta URLs + for metaUrl, _ := range(metaUrls) { + log.Debug().Str("url", metaUrl).Msg("init") + go ctx.fetch(metaUrl) + } + + // build list of metas to check + checkUrls := make([]string, 0, len(metaUrls)) + + // read meta results + for range(metaUrls) { + r := <-ctx.ch + sl := log.With().Str("url", r.fetch.src).Logger() + + if r.fetch.err != nil { + // URL error + sl.Error().Err(r.fetch.err).Send() + } else if !r.fetch.modified { + // URL not modified + sl.Debug().Msg("not modified") } else { - log.Info().Str("url", r.src).Str("etag", r.etag).Msg("ok") - - if r.modified { - // build request headers - headers := map[string]string { - "if-none-match": r.etag, - "if-modified-since": r.lastModified, - } - - // save headers to cache - if err := cache.Set(r.src, headers); err != nil { - log.Error().Str("url", r.src).Err(r.err).Msg("Set") - return err - } + // URL updated + sl.Debug().Msg("update") + + // build request headers + headers := map[string]string { + "if-none-match": r.fetch.headers.Get("etag"), + "if-modified-since": r.fetch.headers.Get("last-modified"), + } + + // save headers to cache + if err := cache.Set(r.fetch.src, headers); err != nil { + sl.Error().Err(r.fetch.err).Msg("cache.Set") + } else { + // append to list of check URLs + checkUrls = append(checkUrls, r.fetch.src) + } + } + } + + // check size and hash in updated metas + logArray("checkUrls", checkUrls) + for _, metaUrl := range(checkUrls) { + go ctx.check(metaUrl, metaUrls[metaUrl]) + } + + // build list of non-meta URLs to sync. + syncUrls := make([]string, 0, len(metaUrls)) + syncUrls = append(syncUrls, config.GetCpeDictUrl()) + + for range(checkUrls) { + r := <-ctx.ch + + // create sublogger + sl := log.With(). + Str("metaUrl", r.check.metaUrl). + Str("metaPath", r.check.metaPath). + Str("fullPath", r.check.fullPath). + Logger() + + if r.check.err != nil { + sl.Error().Err(r.check.err).Send() + } else if r.check.match { + sl.Debug().Msg("match") + } else { + syncUrls = append(syncUrls, metaUrls[r.check.metaUrl]) + } + } + + logArray("syncUrls", syncUrls) + for _, fullUrl := range(syncUrls) { + go ctx.fetch(fullUrl) + } + + // build list of changed files + changed := make([]string, 0, len(syncUrls)) + + // read sync results + for range(syncUrls) { + r := <-ctx.ch + // build sublogger + sl := log.With().Str("url", r.fetch.src).Logger() + + if r.fetch.err != nil { + sl.Error().Err(r.fetch.err).Send() + } else if !r.fetch.modified { + sl.Debug().Msg("not modified") + } else { + sl.Debug().Msg("update") + + // build request headers + headers := map[string]string { + "if-none-match": r.fetch.headers.Get("etag"), + "if-modified-since": r.fetch.headers.Get("last-modified"), + } + + // save headers to cache + if err := cache.Set(r.fetch.src, headers); err != nil { + sl.Error().Err(r.fetch.err).Msg("cache.Set") + } else { + // append to list of changed files + changed = append(changed, filepath.Base(r.fetch.src)) } } } - // TODO: fetch cpe dictionary - // TODO: fetch cpematch - // "nvdcpematch-1.0.{meta,json}", + // log changed files + logArray("changed", changed) // return success - return nil + return changed } diff --git a/nvdmirror/nvdmirror_test.go b/nvdmirror/nvdmirror_test.go index 2c65d41..fb9b56b 100644 --- a/nvdmirror/nvdmirror_test.go +++ b/nvdmirror/nvdmirror_test.go @@ -8,6 +8,18 @@ import ( "testing" ) +// get test config +func getTestConfig(port int) SyncConfig { + // build url + url := fmt.Sprintf("http://localhost:%d", port) + + return SyncConfig { + Cve11BaseUrl: url, + CpeMatch10BaseUrl: url, + Cpe23DictUrl: fmt.Sprintf("%s/official-cpe-dictionary_v2.3.xml.gz", url), + } +} + // serve on given port func serve(port int, ch chan bool) { s := http.Server { @@ -59,64 +71,44 @@ func TestSync(t *testing.T) { defer cache.Close() // custom sync config - // FIXME: stand up custom server for this - config := SyncConfig { - Cve11BaseUrl: fmt.Sprintf("http://localhost:%d", port), - } + config := getTestConfig(port) // sync data t.Run("initial", func(t *testing.T) { - if err := Sync(config, &cache, dir); err != nil { - t.Error(err) - } + Sync(config, &cache, dir) }) // sync data again (to test caching) t.Run("caching", func(t *testing.T) { - if err := Sync(config, &cache, dir); err != nil { - t.Error(err) - } + Sync(config, &cache, dir) }) // sync w/ missing dir t.Run("missingDir", func(t *testing.T) { missingDir := filepath.Join(dir, "does/not/exist") - if err := Sync(config, &cache, missingDir); err != nil { - t.Error(err) - } + Sync(config, &cache, missingDir) }) // sync w/ bad cache t.Run("failSetCache", func(t *testing.T) { var cache FailSetCache - if err := Sync(config, &cache, dir); err == nil { - t.Error(err) - } + Sync(config, &cache, dir) }) t.Run("customUserAgent", func(t *testing.T) { // custom sync config - // FIXME: stand up custom server for this - config := SyncConfig { - Cve11BaseUrl: fmt.Sprintf("http://localhost:%d", port), - UserAgent: "custom-user-agent/0.0.0", - } - - if err := Sync(config, &cache, dir); err != nil { - t.Error(err) - } + config := getTestConfig(port) + config.UserAgent = "custom-user-agent/0.0.0" + + Sync(config, &cache, dir) }) t.Run("clientFail", func(t *testing.T) { // custom sync config - // FIXME: stand up custom server for this - config := SyncConfig { - Cve11BaseUrl: "http://localhost:0", - } - - if err := Sync(config, &cache, dir); err != nil { - t.Error(err) - } + config := getTestConfig(port) + config.Cve11BaseUrl = "http://localhost:0" + + Sync(config, &cache, dir) }) } @@ -145,14 +137,13 @@ func TestBadUrls(t *testing.T) { for _, test := range(failTests) { t.Run(test, func(t *testing.T) { // custom sync config - config := SyncConfig { Cve11BaseUrl: test } + config := getTestConfig(0) + config.Cve11BaseUrl = test // sync data; note: even with an invalid base URL we still expect // this call to succeed; it's just that all of the URLs will be // nonsensical - if err := Sync(config, &cache, dir); err != nil { - t.Error(err) - } + Sync(config, &cache, dir) }) } } -- cgit v1.2.3