// mirror files from upstream NVD source package nvdmirror import ( "bytes" "crypto/sha256" "errors" "fmt" "github.com/pablotron/cvez/atomictemp" "github.com/pablotron/cvez/feed" "github.com/rs/zerolog" "github.com/rs/zerolog/log" "io" "io/fs" "net/http" "net/url" "os" "path/filepath" "time" ) // Sync() configuration. type SyncConfig struct { // CVE 1.1 Base URL. The full meta and JSON URLs are constructed by // appending the file name to this base. Cve11BaseUrl string // CPE Match 1.0 base URL. The full meta and JSON URLs are // constructed by appending the file name to this base. CpeMatch10BaseUrl string // CPE 2.3 dictionary URL. Cpe23DictUrl string // User agent string. Set to "" for default user agent string. UserAgent string // Maximum number of idle connections. MaxIdleConns int // Idle connection timeout. IdleConnTimeout time.Duration } // NVD URLs var DefaultConfig = SyncConfig { Cve11BaseUrl: "https://nvd.nist.gov/feeds/json/cve/1.1", CpeMatch10BaseUrl: "https://nvd.nist.gov/feeds/json/cpematch/1.0", Cpe23DictUrl: "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz", UserAgent: "cvez/0.1.0", } // Initial (first) CVE year. const baseYear = 2002 // Additional non-year CVE feeds. var cveExtraFiles = []string { "modified", "recent", } // Get user agent string. func (me SyncConfig) GetUserAgent() string { if len(me.UserAgent) > 0 { return me.UserAgent } else { return DefaultConfig.UserAgent } } // Get URL for CVE feed file. func (me SyncConfig) GetCveUrl(id, ext string) string { return fmt.Sprintf("%s/nvdcve-1.1-%s.%s", me.Cve11BaseUrl, id, ext) } // Get URL for CVE feed file for given year. func (me SyncConfig) GetCveYearUrl(year int, ext string) string { return me.GetCveUrl(fmt.Sprintf("%04d", year), ext) } // Get URL for CPE match file. func (me SyncConfig) GetCpeMatchUrl(ext string) string { return fmt.Sprintf("%s/nvdcpematch-1.0.%s", me.CpeMatch10BaseUrl, ext) } // Get CPE dictionary URL. func (me SyncConfig) GetCpeDictUrl() string { if len(me.Cpe23DictUrl) > 0 { return me.Cpe23DictUrl } else { return DefaultConfig.Cpe23DictUrl } } // get meta URL map. func (me SyncConfig) getMetaUrls() map[string]string { // calculate total number of years numYears := time.Now().Year() - baseYear + 1 r := make(map[string]string) // fetch cve feed metas for i := 0; i < numYears; i++ { metaUrl := me.GetCveYearUrl(baseYear + i, "meta") feedUrl := me.GetCveYearUrl(baseYear + i, "json.gz") r[metaUrl] = feedUrl } // fetch cve extra file metas for _, s := range(cveExtraFiles) { metaUrl := me.GetCveUrl(s, "meta") feedUrl := me.GetCveUrl(s, "json.gz") r[metaUrl] = feedUrl } { // add cpe match metaUrl := me.GetCpeMatchUrl("meta") feedUrl := me.GetCpeMatchUrl("json.gz") r[metaUrl] = feedUrl } // return map return r } // Fetch result. type fetchResult struct { src string // source URL err error // fetch result modified bool // Was the result modified? path string // Destination file. headers http.Header // response headers } // Check result. type checkResult struct { metaUrl string // meta full url metaPath string // meta file path fullPath string // full file path err error // error match bool // true if size and hash match } type syncMessage struct { fetch fetchResult // fetch result check checkResult // check result } // sync context type syncContext struct { config SyncConfig // sync config client *http.Client // shared HTTP client cache Cache // cache dstDir string // destination directory ch chan syncMessage // sync message channel } // Create sync context. func newSyncContext(config SyncConfig, cache Cache, dstDir string) syncContext { // create shared transport and client tr := &http.Transport { MaxIdleConns: config.MaxIdleConns, IdleConnTimeout: config.IdleConnTimeout, } return syncContext { config: config, client: &http.Client{Transport: tr}, cache: cache, dstDir: dstDir, ch: make(chan syncMessage), } } // Build request func (me syncContext) getRequest(srcUrl string) (*http.Request, error) { // create HTTP request req, err := http.NewRequest("GET", srcUrl, nil) if err != nil { return nil, err } // Add user-agent, if-none-match, and if-modified-since headers. req.Header.Add("user-agent", me.config.GetUserAgent()) if headers, ok := me.cache.Get(srcUrl); ok { for k, v := range(headers) { req.Header.Add(k, v) } } // return success return req, nil } // Fetch URL and write result to destination directory. // // Note: This method is called from a goroutine and writes the results // back via the member channel. func (me syncContext) fetch(srcUrl string) { // parse source url src, err := url.Parse(srcUrl) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } // build destination path path := filepath.Join(me.dstDir, filepath.Base(src.Path)) log.Debug().Str("url", srcUrl).Str("path", path).Send() // create request req, err := me.getRequest(srcUrl) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } // send request resp, err := me.client.Do(req) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } defer resp.Body.Close() switch resp.StatusCode { case 200: // success // write to output file err := atomictemp.Create(path, func(f io.Writer) error { _, err := io.Copy(f, resp.Body) return err }) if err != nil { // write failed me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } } else { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, modified: true, path: path, headers: resp.Header, }, } } case 304: // not modified me.ch <- syncMessage { fetch: fetchResult { src: srcUrl }, } default: // error code := resp.StatusCode err := fmt.Errorf("%d: %s", code, http.StatusText(code)) me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } } } // read hash from given meta file. func (me syncContext) getMeta(path string) (*feed.Meta, error) { // open meta file f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() // parse meta return feed.NewMeta(f) } // get hash of file in destination directory. func (me syncContext) getFileHash(path string) ([32]byte, error) { var r [32]byte // open file f, err := os.Open(path) if err != nil { return r, err } defer f.Close() // hash file hash := sha256.New() if _, err := io.Copy(hash, f); err != nil { return r, err } // copy sum to result, return success hash.Sum(r[:]) return r, nil } // Get file size, in bytes. func getFileSize(path string) (uint64, error) { // verify that full path exists if st, err := os.Stat(path); err != nil { return 0, err } else { return uint64(st.Size()), err } } // Check the size and hash in the metadata file against the full file. // // Note: This method is called from a goroutine and returns it's value // via the internal channel. func (me syncContext) check(metaUrl, fullUrl string) { // build result r := syncMessage { check: checkResult { metaUrl: metaUrl, // build paths metaPath: filepath.Join(me.dstDir, filepath.Base(metaUrl)), fullPath: filepath.Join(me.dstDir, filepath.Base(fullUrl)), }, } // get size of full file size, err := getFileSize(r.check.fullPath) if errors.Is(err, fs.ErrNotExist) { r.check.match = false me.ch <- r return } else if err != nil { r.check.err = err me.ch <- r return } // get meta hash m, err := me.getMeta(r.check.metaPath) if err != nil { r.check.err = err me.ch <- r return } // check for file size match if size != m.GzSize { r.check.match = false me.ch <- r return } // get full hash fh, err := me.getFileHash(r.check.fullPath) if err != nil { r.check.err = err me.ch <- r return } // return result r.check.match = (bytes.Compare(m.Sha256[:], fh[:]) == 0) me.ch <- r } // log array of strings func logArray(key string, strs []string) { // populate array a := zerolog.Arr() for _, v := range(strs) { a.Str(v) } // log array log.Info().Array(key, a).Send() } // Sync to destination directory and return an array of updated files. func Sync(config SyncConfig, cache Cache, dstDir string) []string { // log.Debug().Str("dstDir", dstDir).Msg("Sync") // build sync context ctx := newSyncContext(config, cache, dstDir) // get meta URL to full URL map metaUrls := config.getMetaUrls() // fetch meta URLs for metaUrl, _ := range(metaUrls) { log.Debug().Str("url", metaUrl).Msg("init") go ctx.fetch(metaUrl) } // build list of metas to check checkUrls := make([]string, 0, len(metaUrls)) // read meta results for range(metaUrls) { r := <-ctx.ch sl := log.With().Str("url", r.fetch.src).Logger() if r.fetch.err != nil { // URL error sl.Error().Err(r.fetch.err).Send() } else if !r.fetch.modified { // URL not modified sl.Debug().Msg("not modified") } else { // URL updated sl.Debug().Msg("update") // build request headers headers := map[string]string { "if-none-match": r.fetch.headers.Get("etag"), "if-modified-since": r.fetch.headers.Get("last-modified"), } // save headers to cache if err := cache.Set(r.fetch.src, headers); err != nil { sl.Error().Err(r.fetch.err).Msg("cache.Set") } else { // append to list of check URLs checkUrls = append(checkUrls, r.fetch.src) } } } // check size and hash in updated metas logArray("checkUrls", checkUrls) for _, metaUrl := range(checkUrls) { go ctx.check(metaUrl, metaUrls[metaUrl]) } // build list of non-meta URLs to sync. syncUrls := make([]string, 0, len(metaUrls)) syncUrls = append(syncUrls, config.GetCpeDictUrl()) for range(checkUrls) { r := <-ctx.ch // create sublogger sl := log.With(). Str("metaUrl", r.check.metaUrl). Str("metaPath", r.check.metaPath). Str("fullPath", r.check.fullPath). Logger() if r.check.err != nil { sl.Error().Err(r.check.err).Send() } else if r.check.match { sl.Debug().Msg("match") } else { syncUrls = append(syncUrls, metaUrls[r.check.metaUrl]) } } logArray("syncUrls", syncUrls) for _, fullUrl := range(syncUrls) { go ctx.fetch(fullUrl) } // build list of changed files changed := make([]string, 0, len(syncUrls)) // read sync results for range(syncUrls) { r := <-ctx.ch // build sublogger sl := log.With().Str("url", r.fetch.src).Logger() if r.fetch.err != nil { sl.Error().Err(r.fetch.err).Send() } else if !r.fetch.modified { sl.Debug().Msg("not modified") } else { sl.Debug().Msg("update") // build request headers headers := map[string]string { "if-none-match": r.fetch.headers.Get("etag"), "if-modified-since": r.fetch.headers.Get("last-modified"), } // save headers to cache if err := cache.Set(r.fetch.src, headers); err != nil { sl.Error().Err(r.fetch.err).Msg("cache.Set") } else { // append to list of changed files changed = append(changed, filepath.Base(r.fetch.src)) } } } // log changed files logArray("changed", changed) // return success return changed }