aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Duncan <pabs@pablotron.org>2022-02-23 22:07:12 -0500
committerPaul Duncan <pabs@pablotron.org>2022-02-23 22:07:12 -0500
commitfac967e3a0ac4d999a58d889e795a12fa9ee96e0 (patch)
tree9dfddfcfbedf19855c5984a28199f6cdb345a501
parent281f6eff5acfd38cae0bd3217c36622c27389b75 (diff)
downloadcvez-fac967e3a0ac4d999a58d889e795a12fa9ee96e0.tar.bz2
cvez-fac967e3a0ac4d999a58d889e795a12fa9ee96e0.zip
nvdmirror: full rewrite (still not working correctly)
-rw-r--r--nvdmirror/nvdmirror.go494
-rw-r--r--nvdmirror/nvdmirror_test.go65
2 files changed, 427 insertions, 132 deletions
diff --git a/nvdmirror/nvdmirror.go b/nvdmirror/nvdmirror.go
index cba638b..4faf68e 100644
--- a/nvdmirror/nvdmirror.go
+++ b/nvdmirror/nvdmirror.go
@@ -2,28 +2,32 @@
package nvdmirror
import (
+ "bytes"
+ "crypto/sha256"
+ "errors"
"fmt"
"github.com/pablotron/cvez/atomictemp"
+ "github.com/pablotron/cvez/feed"
+ "github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"io"
+ "io/fs"
"net/http"
"net/url"
+ "os"
"path/filepath"
"time"
)
-// default user agent (FIXME: make configurable)
-var defaultUserAgent = "cvez/0.1.0"
-
// Sync() configuration.
type SyncConfig struct {
// CVE 1.1 Base URL. The full meta and JSON URLs are constructed by
// appending the file name to this base.
Cve11BaseUrl string
- // CPE 1.0 base URL. The full meta and JSON URLs are constructed by
- // appending the file name to this base.
- Cpe10MatchBaseUrl string
+ // CPE Match 1.0 base URL. The full meta and JSON URLs are
+ // constructed by appending the file name to this base.
+ CpeMatch10BaseUrl string
// CPE 2.3 dictionary URL.
Cpe23DictUrl string
@@ -38,77 +42,197 @@ type SyncConfig struct {
IdleConnTimeout time.Duration
}
+// NVD URLs
+var DefaultConfig = SyncConfig {
+ Cve11BaseUrl: "https://nvd.nist.gov/feeds/json/cve/1.1",
+ CpeMatch10BaseUrl: "https://nvd.nist.gov/feeds/json/cpematch/1.0",
+ Cpe23DictUrl: "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz",
+ UserAgent: "cvez/0.1.0",
+}
+
+// Initial (first) CVE year.
+const baseYear = 2002
+
+// Additional non-year CVE feeds.
+var cveExtraFiles = []string {
+ "modified",
+ "recent",
+}
+
// Get user agent string.
func (me SyncConfig) GetUserAgent() string {
if len(me.UserAgent) > 0 {
return me.UserAgent
} else {
- return defaultUserAgent
+ return DefaultConfig.UserAgent
}
}
-// NVD URLs
-var DefaultConfig = SyncConfig {
- Cve11BaseUrl: "https://nvd.nist.gov/feeds/json/cve/1.1",
- Cpe10MatchBaseUrl: "https://nvd.nist.gov/feeds/json/cpematch/1.0",
- Cpe23DictUrl: "https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz",
+// Get URL for CVE feed file.
+func (me SyncConfig) GetCveUrl(id, ext string) string {
+ return fmt.Sprintf("%s/nvdcve-1.1-%s.%s", me.Cve11BaseUrl, id, ext)
+}
+
+// Get URL for CVE feed file for given year.
+func (me SyncConfig) GetCveYearUrl(year int, ext string) string {
+ return me.GetCveUrl(fmt.Sprintf("%04d", year), ext)
}
-var extraFiles = []string {
- "nvdcve-1.1-modified",
- "nvdcve-1.1-recent",
+// Get URL for CPE match file.
+func (me SyncConfig) GetCpeMatchUrl(ext string) string {
+ return fmt.Sprintf("%s/nvdcpematch-1.0.%s", me.CpeMatch10BaseUrl, ext)
+}
+
+// Get CPE dictionary URL.
+func (me SyncConfig) GetCpeDictUrl() string {
+ if len(me.Cpe23DictUrl) > 0 {
+ return me.Cpe23DictUrl
+ } else {
+ return DefaultConfig.Cpe23DictUrl
+ }
+}
+
+// get meta URL map.
+func (me SyncConfig) getMetaUrls() map[string]string {
+ // calculate total number of years
+ numYears := time.Now().Year() - baseYear + 1
+
+ r := make(map[string]string)
+
+ // fetch cve feed metas
+ for i := 0; i < numYears; i++ {
+ metaUrl := me.GetCveYearUrl(baseYear + i, "meta")
+ feedUrl := me.GetCveYearUrl(baseYear + i, "json.gz")
+ r[metaUrl] = feedUrl
+ }
+
+ // fetch cve extra file metas
+ for _, s := range(cveExtraFiles) {
+ metaUrl := me.GetCveUrl(s, "meta")
+ feedUrl := me.GetCveUrl(s, "json.gz")
+ r[metaUrl] = feedUrl
+ }
+
+ {
+ // add cpe match
+ metaUrl := me.GetCpeMatchUrl("meta")
+ feedUrl := me.GetCpeMatchUrl("json.gz")
+ r[metaUrl] = feedUrl
+ }
+
+ // return map
+ return r
}
-// fetch result
+// Fetch result.
type fetchResult struct {
- src string // source URL
- err error // fetch result
- modified bool // was the result modified?
- lastModified string // last modified response header
- etag string // etag response header
+ src string // source URL
+ err error // fetch result
+ modified bool // Was the result modified?
+ path string // Destination file.
+ headers http.Header // response headers
}
-// base CVE year
-const baseYear = 2002
+// Check result.
+type checkResult struct {
+ metaUrl string // meta full url
+ metaPath string // meta file path
+ fullPath string // full file path
+ err error // error
+ match bool // true if size and hash match
+}
+
+type syncMessage struct {
+ fetch fetchResult // fetch result
+ check checkResult // check result
+}
+
+// sync context
+type syncContext struct {
+ config SyncConfig // sync config
+ client *http.Client // shared HTTP client
+ cache Cache // cache
+ dstDir string // destination directory
+ ch chan syncMessage // sync message channel
+}
+
+// Create sync context.
+func newSyncContext(config SyncConfig, cache Cache, dstDir string) syncContext {
+ // create shared transport and client
+ tr := &http.Transport {
+ MaxIdleConns: config.MaxIdleConns,
+ IdleConnTimeout: config.IdleConnTimeout,
+ }
+
+ return syncContext {
+ config: config,
+ client: &http.Client{Transport: tr},
+ cache: cache,
+ dstDir: dstDir,
+ ch: make(chan syncMessage),
+ }
+}
+
+// Build request
+func (me syncContext) getRequest(srcUrl string) (*http.Request, error) {
+ // create HTTP request
+ req, err := http.NewRequest("GET", srcUrl, nil)
+ if err != nil {
+ return nil, err
+ }
-func fetch(ch chan fetchResult, config SyncConfig, cache Cache, client *http.Client, srcUrl string, dstDir string) {
+ // Add user-agent, if-none-match, and if-modified-since headers.
+ req.Header.Add("user-agent", me.config.GetUserAgent())
+ if headers, ok := me.cache.Get(srcUrl); ok {
+ for k, v := range(headers) {
+ req.Header.Add(k, v)
+ }
+ }
+
+ // return success
+ return req, nil
+}
+
+// Fetch URL and write result to destination directory.
+//
+// Note: This method is called from a goroutine and writes the results
+// back via the member channel.
+func (me syncContext) fetch(srcUrl string) {
// parse source url
src, err := url.Parse(srcUrl)
if err != nil {
- ch <- fetchResult { src: srcUrl, err: err }
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl, err: err },
+ }
return
}
// build destination path
- path := filepath.Join(dstDir, filepath.Base(src.Path))
+ path := filepath.Join(me.dstDir, filepath.Base(src.Path))
log.Debug().Str("url", srcUrl).Str("path", path).Send()
// create request
- req, err := http.NewRequest("GET", srcUrl, nil)
+ req, err := me.getRequest(srcUrl)
if err != nil {
- ch <- fetchResult { src: srcUrl, err: err }
- return
- }
-
- // add request headers
- req.Header.Add("user-agent", config.GetUserAgent())
- if headers, ok := cache.Get(srcUrl); ok {
- for k, v := range(headers) {
- req.Header.Add(k, v)
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl, err: err },
}
+ return
}
// send request
- resp, err := client.Do(req)
+ resp, err := me.client.Do(req)
if err != nil {
- ch <- fetchResult { src: srcUrl, err: err }
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl, err: err },
+ }
return
}
defer resp.Body.Close()
switch resp.StatusCode {
- case 200:
- // create temporary output file
+ case 200: // success
+ // write to output file
err := atomictemp.Create(path, func(f io.Writer) error {
_, err := io.Copy(f, resp.Body)
return err
@@ -116,83 +240,263 @@ func fetch(ch chan fetchResult, config SyncConfig, cache Cache, client *http.Cli
if err != nil {
// write failed
- ch <- fetchResult { src: srcUrl, err: err, modified: false }
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl, err: err },
+ }
} else {
- ch <- fetchResult {
- src: srcUrl,
- modified: true,
- lastModified: resp.Header.Get("last-modified"),
- etag: resp.Header.Get("etag"),
+ me.ch <- syncMessage {
+ fetch: fetchResult {
+ src: srcUrl,
+ modified: true,
+ path: path,
+ headers: resp.Header,
+ },
}
}
- case 304:
- ch <- fetchResult { src: srcUrl, modified: false }
- default:
+ case 304: // not modified
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl },
+ }
+ default: // error
code := resp.StatusCode
err := fmt.Errorf("%d: %s", code, http.StatusText(code))
- ch <- fetchResult { src: srcUrl, err: err }
+ me.ch <- syncMessage {
+ fetch: fetchResult { src: srcUrl, err: err },
+ }
}
}
-func Sync(config SyncConfig, cache Cache, dstDir string) error {
- log.Debug().Str("dstDir", dstDir).Msg("Sync")
+// read hash from given meta file.
+func (me syncContext) getMeta(path string) (*feed.Meta, error) {
+ // open meta file
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
- // create fetch result channel
- ch := make(chan fetchResult)
+ // parse meta
+ return feed.NewMeta(f)
+}
- // create shared transport and client
- tr := &http.Transport{
- MaxIdleConns: config.MaxIdleConns,
- IdleConnTimeout: config.IdleConnTimeout,
+// get hash of file in destination directory.
+func (me syncContext) getFileHash(path string) ([32]byte, error) {
+ var r [32]byte
+
+ // open file
+ f, err := os.Open(path)
+ if err != nil {
+ return r, err
}
- client := &http.Client{Transport: tr}
+ defer f.Close()
- // calculate total number of years
- numYears := time.Now().Year() - baseYear + 1
+ // hash file
+ hash := sha256.New()
+ if _, err := io.Copy(hash, f); err != nil {
+ return r, err
+ }
- // fetch cve feed metas
- for year := baseYear; year < baseYear + numYears; year++ {
- // build url
- url := fmt.Sprintf("%s/nvdcve-1.1-%04d.meta", config.Cve11BaseUrl, year)
- log.Debug().Int("year", year).Str("url", url).Send()
- go fetch(ch, config, cache, client, url, dstDir)
+ // copy sum to result, return success
+ hash.Sum(r[:])
+ return r, nil
+}
+
+// Get file size, in bytes.
+func getFileSize(path string) (uint64, error) {
+ // verify that full path exists
+ if st, err := os.Stat(path); err != nil {
+ return 0, err
+ } else {
+ return uint64(st.Size()), err
+ }
+}
+
+// Check the size and hash in the metadata file against the full file.
+//
+// Note: This method is called from a goroutine and returns it's value
+// via the internal channel.
+func (me syncContext) check(metaUrl, fullUrl string) {
+ // build result
+ r := syncMessage {
+ check: checkResult {
+ metaUrl: metaUrl,
+ // build paths
+ metaPath: filepath.Join(me.dstDir, filepath.Base(metaUrl)),
+ fullPath: filepath.Join(me.dstDir, filepath.Base(fullUrl)),
+ },
}
- // fetch cve extra file metas
- for _, s := range(extraFiles) {
- url := fmt.Sprintf("%s/%s.meta", config.Cve11BaseUrl, s)
- log.Debug().Str("file", s).Str("url", url).Send()
- go fetch(ch, config, cache, client, url, dstDir)
+ // get size of full file
+ size, err := getFileSize(r.check.fullPath)
+ if errors.Is(err, fs.ErrNotExist) {
+ r.check.match = false
+ me.ch <- r
+ return
+ } else if err != nil {
+ r.check.err = err
+ me.ch <- r
+ return
+ }
+
+ // get meta hash
+ m, err := me.getMeta(r.check.metaPath)
+ if err != nil {
+ r.check.err = err
+ me.ch <- r
+ return
}
- // read results
- for i := 0; i < numYears + len(extraFiles); i++ {
- if r := <-ch; r.err != nil {
- log.Error().Str("url", r.src).Err(r.err).Send()
- // FIXME: errs = append(errs, r)
+ // check for file size match
+ if size != m.GzSize {
+ r.check.match = false
+ me.ch <- r
+ return
+ }
+
+ // get full hash
+ fh, err := me.getFileHash(r.check.fullPath)
+ if err != nil {
+ r.check.err = err
+ me.ch <- r
+ return
+ }
+
+ // return result
+ r.check.match = (bytes.Compare(m.Sha256[:], fh[:]) == 0)
+ me.ch <- r
+}
+
+// log array of strings
+func logArray(key string, strs []string) {
+ // populate array
+ a := zerolog.Arr()
+ for _, v := range(strs) {
+ a.Str(v)
+ }
+
+ // log array
+ log.Info().Array(key, a).Send()
+}
+
+// Sync to destination directory and return an array of updated files.
+func Sync(config SyncConfig, cache Cache, dstDir string) []string {
+ // log.Debug().Str("dstDir", dstDir).Msg("Sync")
+
+ // build sync context
+ ctx := newSyncContext(config, cache, dstDir)
+
+ // get meta URL to full URL map
+ metaUrls := config.getMetaUrls()
+
+ // fetch meta URLs
+ for metaUrl, _ := range(metaUrls) {
+ log.Debug().Str("url", metaUrl).Msg("init")
+ go ctx.fetch(metaUrl)
+ }
+
+ // build list of metas to check
+ checkUrls := make([]string, 0, len(metaUrls))
+
+ // read meta results
+ for range(metaUrls) {
+ r := <-ctx.ch
+ sl := log.With().Str("url", r.fetch.src).Logger()
+
+ if r.fetch.err != nil {
+ // URL error
+ sl.Error().Err(r.fetch.err).Send()
+ } else if !r.fetch.modified {
+ // URL not modified
+ sl.Debug().Msg("not modified")
} else {
- log.Info().Str("url", r.src).Str("etag", r.etag).Msg("ok")
-
- if r.modified {
- // build request headers
- headers := map[string]string {
- "if-none-match": r.etag,
- "if-modified-since": r.lastModified,
- }
-
- // save headers to cache
- if err := cache.Set(r.src, headers); err != nil {
- log.Error().Str("url", r.src).Err(r.err).Msg("Set")
- return err
- }
+ // URL updated
+ sl.Debug().Msg("update")
+
+ // build request headers
+ headers := map[string]string {
+ "if-none-match": r.fetch.headers.Get("etag"),
+ "if-modified-since": r.fetch.headers.Get("last-modified"),
+ }
+
+ // save headers to cache
+ if err := cache.Set(r.fetch.src, headers); err != nil {
+ sl.Error().Err(r.fetch.err).Msg("cache.Set")
+ } else {
+ // append to list of check URLs
+ checkUrls = append(checkUrls, r.fetch.src)
+ }
+ }
+ }
+
+ // check size and hash in updated metas
+ logArray("checkUrls", checkUrls)
+ for _, metaUrl := range(checkUrls) {
+ go ctx.check(metaUrl, metaUrls[metaUrl])
+ }
+
+ // build list of non-meta URLs to sync.
+ syncUrls := make([]string, 0, len(metaUrls))
+ syncUrls = append(syncUrls, config.GetCpeDictUrl())
+
+ for range(checkUrls) {
+ r := <-ctx.ch
+
+ // create sublogger
+ sl := log.With().
+ Str("metaUrl", r.check.metaUrl).
+ Str("metaPath", r.check.metaPath).
+ Str("fullPath", r.check.fullPath).
+ Logger()
+
+ if r.check.err != nil {
+ sl.Error().Err(r.check.err).Send()
+ } else if r.check.match {
+ sl.Debug().Msg("match")
+ } else {
+ syncUrls = append(syncUrls, metaUrls[r.check.metaUrl])
+ }
+ }
+
+ logArray("syncUrls", syncUrls)
+ for _, fullUrl := range(syncUrls) {
+ go ctx.fetch(fullUrl)
+ }
+
+ // build list of changed files
+ changed := make([]string, 0, len(syncUrls))
+
+ // read sync results
+ for range(syncUrls) {
+ r := <-ctx.ch
+ // build sublogger
+ sl := log.With().Str("url", r.fetch.src).Logger()
+
+ if r.fetch.err != nil {
+ sl.Error().Err(r.fetch.err).Send()
+ } else if !r.fetch.modified {
+ sl.Debug().Msg("not modified")
+ } else {
+ sl.Debug().Msg("update")
+
+ // build request headers
+ headers := map[string]string {
+ "if-none-match": r.fetch.headers.Get("etag"),
+ "if-modified-since": r.fetch.headers.Get("last-modified"),
+ }
+
+ // save headers to cache
+ if err := cache.Set(r.fetch.src, headers); err != nil {
+ sl.Error().Err(r.fetch.err).Msg("cache.Set")
+ } else {
+ // append to list of changed files
+ changed = append(changed, filepath.Base(r.fetch.src))
}
}
}
- // TODO: fetch cpe dictionary
- // TODO: fetch cpematch
- // "nvdcpematch-1.0.{meta,json}",
+ // log changed files
+ logArray("changed", changed)
// return success
- return nil
+ return changed
}
diff --git a/nvdmirror/nvdmirror_test.go b/nvdmirror/nvdmirror_test.go
index 2c65d41..fb9b56b 100644
--- a/nvdmirror/nvdmirror_test.go
+++ b/nvdmirror/nvdmirror_test.go
@@ -8,6 +8,18 @@ import (
"testing"
)
+// get test config
+func getTestConfig(port int) SyncConfig {
+ // build url
+ url := fmt.Sprintf("http://localhost:%d", port)
+
+ return SyncConfig {
+ Cve11BaseUrl: url,
+ CpeMatch10BaseUrl: url,
+ Cpe23DictUrl: fmt.Sprintf("%s/official-cpe-dictionary_v2.3.xml.gz", url),
+ }
+}
+
// serve on given port
func serve(port int, ch chan bool) {
s := http.Server {
@@ -59,64 +71,44 @@ func TestSync(t *testing.T) {
defer cache.Close()
// custom sync config
- // FIXME: stand up custom server for this
- config := SyncConfig {
- Cve11BaseUrl: fmt.Sprintf("http://localhost:%d", port),
- }
+ config := getTestConfig(port)
// sync data
t.Run("initial", func(t *testing.T) {
- if err := Sync(config, &cache, dir); err != nil {
- t.Error(err)
- }
+ Sync(config, &cache, dir)
})
// sync data again (to test caching)
t.Run("caching", func(t *testing.T) {
- if err := Sync(config, &cache, dir); err != nil {
- t.Error(err)
- }
+ Sync(config, &cache, dir)
})
// sync w/ missing dir
t.Run("missingDir", func(t *testing.T) {
missingDir := filepath.Join(dir, "does/not/exist")
- if err := Sync(config, &cache, missingDir); err != nil {
- t.Error(err)
- }
+ Sync(config, &cache, missingDir)
})
// sync w/ bad cache
t.Run("failSetCache", func(t *testing.T) {
var cache FailSetCache
- if err := Sync(config, &cache, dir); err == nil {
- t.Error(err)
- }
+ Sync(config, &cache, dir)
})
t.Run("customUserAgent", func(t *testing.T) {
// custom sync config
- // FIXME: stand up custom server for this
- config := SyncConfig {
- Cve11BaseUrl: fmt.Sprintf("http://localhost:%d", port),
- UserAgent: "custom-user-agent/0.0.0",
- }
-
- if err := Sync(config, &cache, dir); err != nil {
- t.Error(err)
- }
+ config := getTestConfig(port)
+ config.UserAgent = "custom-user-agent/0.0.0"
+
+ Sync(config, &cache, dir)
})
t.Run("clientFail", func(t *testing.T) {
// custom sync config
- // FIXME: stand up custom server for this
- config := SyncConfig {
- Cve11BaseUrl: "http://localhost:0",
- }
-
- if err := Sync(config, &cache, dir); err != nil {
- t.Error(err)
- }
+ config := getTestConfig(port)
+ config.Cve11BaseUrl = "http://localhost:0"
+
+ Sync(config, &cache, dir)
})
}
@@ -145,14 +137,13 @@ func TestBadUrls(t *testing.T) {
for _, test := range(failTests) {
t.Run(test, func(t *testing.T) {
// custom sync config
- config := SyncConfig { Cve11BaseUrl: test }
+ config := getTestConfig(0)
+ config.Cve11BaseUrl = test
// sync data; note: even with an invalid base URL we still expect
// this call to succeed; it's just that all of the URLs will be
// nonsensical
- if err := Sync(config, &cache, dir); err != nil {
- t.Error(err)
- }
+ Sync(config, &cache, dir)
})
}
}