From 5fa2cbc1cd4c2ccbdf8897fcf9b5263ab050fbf6 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Thu, 24 Feb 2022 00:52:19 -0500 Subject: nvdmirror: break Sync() into logical chunks, move into syncContext --- nvdmirror/sync.go | 148 ++++++++++------------------------------------- nvdmirror/synccontext.go | 116 +++++++++++++++++++++++++++++++++++++ nvdmirror/util.go | 11 +++- 3 files changed, 156 insertions(+), 119 deletions(-) diff --git a/nvdmirror/sync.go b/nvdmirror/sync.go index 6f0b4d4..8cdd4c7 100644 --- a/nvdmirror/sync.go +++ b/nvdmirror/sync.go @@ -1,127 +1,39 @@ package nvdmirror -import ( - "github.com/rs/zerolog/log" - "path/filepath" -) - -// Sync to destination directory and return an array of updated files. +// Sync to destination directory based on given sync configuration and +// cache. Returns an array of file names which have been updated in the +// destination directory. +// +// This function does the following: +// +// 1. Fetch the contents of the source meta URLs for CVE feeds and +// CPE matches. All source URLs are fetched concurrently. +// +// 2. Check the size and hash from meta files against the existing +// CVE feed and CPE match files in the destination directory. All +// file sizes and hashes are checked concurrently. +// +// 3. Fetch the contents of the changed CVE feeds, CPE match files, and +// the CPE dictionary. +// +// All HTTP requests are made with the following request headers: +// +// * if-modified-since (if the URL was queried previously and the +// previous successful response had a "last-modified" header). +// * if-none-match (if the URL was queried previously and the +// previous successful response had an "etag" header). +// * user-agent +// func Sync(config SyncConfig, cache Cache, dstDir string) []string { // build sync context ctx := newSyncContext(config, cache, dstDir) - // get meta URL to full URL map - metaUrls := config.getMetaUrls() - - // fetch meta URLs - for metaUrl, _ := range(metaUrls) { - log.Debug().Str("url", metaUrl).Msg("init") - go ctx.fetch(metaUrl) - } - - // build list of metas to check - checkUrls := make([]string, 0, len(metaUrls)) - - // read meta results - for range(metaUrls) { - r := <-ctx.ch - sl := log.With().Str("url", r.fetch.src).Logger() - - if r.fetch.err != nil { - // URL error - sl.Error().Err(r.fetch.err).Send() - } else if !r.fetch.modified { - // URL not modified - sl.Debug().Msg("not modified") - } else { - // URL updated - sl.Debug().Msg("update") - - // build request headers - headers := map[string]string { - "if-none-match": r.fetch.headers.Get("etag"), - "if-modified-since": r.fetch.headers.Get("last-modified"), - } - - // save headers to cache - if err := cache.Set(r.fetch.src, headers); err != nil { - sl.Error().Err(r.fetch.err).Msg("cache.Set") - } else { - // append to list of check URLs - checkUrls = append(checkUrls, r.fetch.src) - } - } - } - - // check size and hash in updated metas - logArray("checkUrls", checkUrls) - for _, metaUrl := range(checkUrls) { - go ctx.check(metaUrl, metaUrls[metaUrl]) - } - - // build list of non-meta URLs to sync. - syncUrls := make([]string, 0, len(metaUrls)) - syncUrls = append(syncUrls, config.GetCpeDictUrl()) - - for range(checkUrls) { - r := <-ctx.ch - - // create sublogger - sl := log.With(). - Str("metaUrl", r.check.metaUrl). - Str("metaPath", r.check.metaPath). - Str("fullPath", r.check.fullPath). - Logger() - - if r.check.err != nil { - sl.Error().Err(r.check.err).Send() - } else if r.check.match { - sl.Debug().Msg("match") - } else { - syncUrls = append(syncUrls, metaUrls[r.check.metaUrl]) - } - } - - logArray("syncUrls", syncUrls) - for _, fullUrl := range(syncUrls) { - go ctx.fetch(fullUrl) - } - - // build list of changed files - changed := make([]string, 0, len(syncUrls)) - - // read sync results - for range(syncUrls) { - r := <-ctx.ch - // build sublogger - sl := log.With().Str("url", r.fetch.src).Logger() - - if r.fetch.err != nil { - sl.Error().Err(r.fetch.err).Send() - } else if !r.fetch.modified { - sl.Debug().Msg("not modified") - } else { - sl.Debug().Msg("update") - - // build request headers - headers := map[string]string { - "if-none-match": r.fetch.headers.Get("etag"), - "if-modified-since": r.fetch.headers.Get("last-modified"), - } - - // save headers to cache - if err := cache.Set(r.fetch.src, headers); err != nil { - sl.Error().Err(r.fetch.err).Msg("cache.Set") - } else { - // append to list of changed files - changed = append(changed, filepath.Base(r.fetch.src)) - } - } - } + // fetch updated meta files + checks := ctx.fetchMetas() - // log changed files - logArray("changed", changed) + // get syncable URLs + urls := append(ctx.checkMetas(checks), config.GetCpeDictUrl()) - // return success - return changed + // sync urls and return changed files + return ctx.syncUrls(urls) } diff --git a/nvdmirror/synccontext.go b/nvdmirror/synccontext.go index efc2f28..a53804e 100644 --- a/nvdmirror/synccontext.go +++ b/nvdmirror/synccontext.go @@ -247,3 +247,119 @@ func (me syncContext) check(metaUrl, fullUrl string) { r.check.match = (bytes.Compare(m.Sha256[:], fh[:]) == 0) me.ch <- r } + +// Fetch updated meta files and get a map of updated meta files to their +// corresponding full content URL. +// +// Note: This function uses the syncContext member channel and +// goroutines to fetch all meta URLS concurrently. +func (me syncContext) fetchMetas() map[string]string { + ret := make(map[string]string) + + // get map of meta URLs to full URLs. + metaUrls := me.config.getMetaUrls() + + // fetch meta URLs + for metaUrl, _ := range(metaUrls) { + log.Debug().Str("url", metaUrl).Msg("init") + go me.fetch(metaUrl) + } + + // read meta results + for range(metaUrls) { + r := <-me.ch + sl := log.With().Str("url", r.fetch.src).Logger() + + if r.fetch.err != nil { + // URL error + sl.Error().Err(r.fetch.err).Send() + } else if !r.fetch.modified { + // URL not modified + sl.Debug().Msg("not modified") + } else if err := saveHeaders(me.cache, r.fetch); err != nil { + sl.Error().Err(err).Msg("saveHeaders") + } else { + // add to result + ret[r.fetch.src] = metaUrls[r.fetch.src] + } + } + + // return result + return ret +} + +// Check compare file size and hash in updated metadata files. Returns +// an array of URLs that should be updated. +// +// Note: This function uses the syncContext member channel and +// goroutines to check the size and hash of all files concurrently. +func (me syncContext) checkMetas(checks map[string]string) []string { + // build list of URLs to sync + // (include one extra slot for cpedict) + syncUrls := make([]string, 0, len(checks) + 1) + + // check size and hash in updated metas concurrently + for metaUrl, fullUrl := range(checks) { + go me.check(metaUrl, fullUrl) + } + + for range(checks) { + r := <-me.ch + + // create sublogger + sl := log.With(). + Str("metaUrl", r.check.metaUrl). + Str("metaPath", r.check.metaPath). + Str("fullPath", r.check.fullPath). + Logger() + + if r.check.err != nil { + sl.Error().Err(r.check.err).Send() + } else if r.check.match { + sl.Debug().Msg("match") + } else { + // append list of full URLs to sync + syncUrls = append(syncUrls, checks[r.check.metaUrl]) + } + } + + // return results + return syncUrls +} + +// Fetch full URLs. Returns an array of files in destination directory +// that have changed. +// +// Note: This function uses the syncContext member channel and +// goroutines to fetch URLs concurrently. +func (me syncContext) syncUrls(urls []string) []string { + // build list of changed files + changed := make([]string, 0, len(urls)) + + // fetch URLs concurrently + logArray("syncUrls", urls) + for _, url := range(urls) { + go me.fetch(url) + } + + // read sync results + for range(urls) { + r := <-me.ch + // build sublogger + sl := log.With().Str("url", r.fetch.src).Logger() + + if r.fetch.err != nil { + sl.Error().Err(r.fetch.err).Send() + } else if !r.fetch.modified { + sl.Debug().Msg("not modified") + } else if err := saveHeaders(me.cache, r.fetch); err != nil { + sl.Error().Err(err).Msg("cache.Set") + } else { + // append to list of changed files + changed = append(changed, filepath.Base(r.fetch.src)) + } + } + + // return results + return changed +} diff --git a/nvdmirror/util.go b/nvdmirror/util.go index 9cf7d3a..d54e41b 100644 --- a/nvdmirror/util.go +++ b/nvdmirror/util.go @@ -16,7 +16,7 @@ func getFileSize(path string) (uint64, error) { } } -// log array of strings +// Log array of strings. func logArray(key string, strs []string) { // populate array a := zerolog.Arr() @@ -27,3 +27,12 @@ func logArray(key string, strs []string) { // log array log.Info().Array(key, a).Send() } + +// Get source URL, etag response header, and last-modified response +// header from fetchResult and save them in the given cache. +func saveHeaders(cache Cache, fr fetchResult) error { + return cache.Set(fr.src, map[string]string { + "if-none-match": fr.headers.Get("etag"), + "if-modified-since": fr.headers.Get("last-modified"), + }) +} -- cgit v1.2.3