// mirror files from upstream NVD source package nvdmirror import ( "bytes" "crypto/sha256" "errors" "fmt" "github.com/pablotron/cvez/atomictemp" "github.com/pablotron/cvez/feed" "github.com/rs/zerolog/log" "io" "io/fs" "net/http" "net/url" "os" "path/filepath" ) // Fetch result. type fetchResult struct { src string // source URL err error // fetch result modified bool // Was the result modified? path string // Destination file. headers http.Header // response headers } // Check result. type checkResult struct { metaUrl string // meta full url metaPath string // meta file path fullPath string // full file path err error // error match bool // true if size and hash match } type syncMessage struct { fetch fetchResult // fetch result check checkResult // check result } // sync context type syncContext struct { config SyncConfig // sync config client *http.Client // shared HTTP client cache Cache // cache dstDir string // destination directory ch chan syncMessage // sync message channel } // Create sync context. func newSyncContext(config SyncConfig, cache Cache, dstDir string) syncContext { // create shared transport and client tr := &http.Transport { MaxIdleConns: config.MaxIdleConns, IdleConnTimeout: config.IdleConnTimeout, } return syncContext { config: config, client: &http.Client{Transport: tr}, cache: cache, dstDir: dstDir, ch: make(chan syncMessage), } } // Build request func (me syncContext) getRequest(srcUrl string) (*http.Request, error) { // create HTTP request req, err := http.NewRequest("GET", srcUrl, nil) if err != nil { return nil, err } // Add user-agent, if-none-match, and if-modified-since headers. req.Header.Add("user-agent", me.config.GetUserAgent()) if headers, ok := me.cache.Get(srcUrl); ok { for k, v := range(headers) { req.Header.Add(k, v) } } // return success return req, nil } // Fetch URL and write result to destination directory. // // Note: This method is called from a goroutine and writes the results // back via the member channel. func (me syncContext) fetch(srcUrl string) { // parse source url src, err := url.Parse(srcUrl) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } // build destination path path := filepath.Join(me.dstDir, filepath.Base(src.Path)) log.Debug().Str("url", srcUrl).Str("path", path).Send() // create request req, err := me.getRequest(srcUrl) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } // send request resp, err := me.client.Do(req) if err != nil { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } return } defer resp.Body.Close() switch resp.StatusCode { case 200: // success // write to output file err := atomictemp.Create(path, func(f io.Writer) error { _, err := io.Copy(f, resp.Body) return err }) if err != nil { // write failed me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } } else { me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, modified: true, path: path, headers: resp.Header, }, } } case 304: // not modified me.ch <- syncMessage { fetch: fetchResult { src: srcUrl }, } default: // error code := resp.StatusCode err := fmt.Errorf("%d: %s", code, http.StatusText(code)) me.ch <- syncMessage { fetch: fetchResult { src: srcUrl, err: err }, } } } // read hash from given meta file. func (me syncContext) getMeta(path string) (*feed.Meta, error) { // open meta file f, err := os.Open(path) if err != nil { return nil, err } defer f.Close() // parse meta return feed.NewMeta(f) } // get hash of file in destination directory. func (me syncContext) getFileHash(path string) ([32]byte, error) { var r [32]byte // open file f, err := os.Open(path) if err != nil { return r, err } defer f.Close() // hash file hash := sha256.New() if _, err := io.Copy(hash, f); err != nil { return r, err } // copy sum to result, return success hash.Sum(r[:]) return r, nil } // Check the size and hash in the metadata file against the full file. // // Note: This method is called from a goroutine and returns it's value // via the internal channel. func (me syncContext) check(metaUrl, fullUrl string) { // build result r := syncMessage { check: checkResult { metaUrl: metaUrl, // build paths metaPath: filepath.Join(me.dstDir, filepath.Base(metaUrl)), fullPath: filepath.Join(me.dstDir, filepath.Base(fullUrl)), }, } // get size of full file size, err := getFileSize(r.check.fullPath) if errors.Is(err, fs.ErrNotExist) { r.check.match = false me.ch <- r return } else if err != nil { r.check.err = err me.ch <- r return } // get meta hash m, err := me.getMeta(r.check.metaPath) if err != nil { r.check.err = err me.ch <- r return } // check for file size match if size != m.GzSize { r.check.match = false me.ch <- r return } // get full hash fh, err := me.getFileHash(r.check.fullPath) if err != nil { r.check.err = err me.ch <- r return } // return result r.check.match = (bytes.Compare(m.Sha256[:], fh[:]) == 0) me.ch <- r } // Sync to destination directory and return an array of updated files. func Sync(config SyncConfig, cache Cache, dstDir string) []string { // log.Debug().Str("dstDir", dstDir).Msg("Sync") // build sync context ctx := newSyncContext(config, cache, dstDir) // get meta URL to full URL map metaUrls := config.getMetaUrls() // fetch meta URLs for metaUrl, _ := range(metaUrls) { log.Debug().Str("url", metaUrl).Msg("init") go ctx.fetch(metaUrl) } // build list of metas to check checkUrls := make([]string, 0, len(metaUrls)) // read meta results for range(metaUrls) { r := <-ctx.ch sl := log.With().Str("url", r.fetch.src).Logger() if r.fetch.err != nil { // URL error sl.Error().Err(r.fetch.err).Send() } else if !r.fetch.modified { // URL not modified sl.Debug().Msg("not modified") } else { // URL updated sl.Debug().Msg("update") // build request headers headers := map[string]string { "if-none-match": r.fetch.headers.Get("etag"), "if-modified-since": r.fetch.headers.Get("last-modified"), } // save headers to cache if err := cache.Set(r.fetch.src, headers); err != nil { sl.Error().Err(r.fetch.err).Msg("cache.Set") } else { // append to list of check URLs checkUrls = append(checkUrls, r.fetch.src) } } } // check size and hash in updated metas logArray("checkUrls", checkUrls) for _, metaUrl := range(checkUrls) { go ctx.check(metaUrl, metaUrls[metaUrl]) } // build list of non-meta URLs to sync. syncUrls := make([]string, 0, len(metaUrls)) syncUrls = append(syncUrls, config.GetCpeDictUrl()) for range(checkUrls) { r := <-ctx.ch // create sublogger sl := log.With(). Str("metaUrl", r.check.metaUrl). Str("metaPath", r.check.metaPath). Str("fullPath", r.check.fullPath). Logger() if r.check.err != nil { sl.Error().Err(r.check.err).Send() } else if r.check.match { sl.Debug().Msg("match") } else { syncUrls = append(syncUrls, metaUrls[r.check.metaUrl]) } } logArray("syncUrls", syncUrls) for _, fullUrl := range(syncUrls) { go ctx.fetch(fullUrl) } // build list of changed files changed := make([]string, 0, len(syncUrls)) // read sync results for range(syncUrls) { r := <-ctx.ch // build sublogger sl := log.With().Str("url", r.fetch.src).Logger() if r.fetch.err != nil { sl.Error().Err(r.fetch.err).Send() } else if !r.fetch.modified { sl.Debug().Msg("not modified") } else { sl.Debug().Msg("update") // build request headers headers := map[string]string { "if-none-match": r.fetch.headers.Get("etag"), "if-modified-since": r.fetch.headers.Get("last-modified"), } // save headers to cache if err := cache.Set(r.fetch.src, headers); err != nil { sl.Error().Err(r.fetch.err).Msg("cache.Set") } else { // append to list of changed files changed = append(changed, filepath.Base(r.fetch.src)) } } } // log changed files logArray("changed", changed) // return success return changed }