#!/usr/bin/env ruby # frozen_string_literal: true # # build-mirror.rb: Mirror current data from all upstream sources for # testing. # # Usage: # # create mirror in ./20220318 # bin/mirror.rb ./20220318 # # Environment Variables: # # * MIRROR_DRY_RUN: Set to do a dry run (default: false) # * MIRROR_LOG_LEVEL: log level (default: "info") # * MIRROR_THREADED: Set to download in threads (default: false) # require 'fileutils' require 'json' require 'logger' require 'net/http' require 'open-uri' require 'uri' # check command-line args raise "Usage #$0 dir" unless ARGV.size == 1 CVE_YEARS = (2002...Time.now.year).to_a # format strings F = { cve: 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-%s.%s', cpematch: 'https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.%s', } # data sources SOURCES = [{ dst: 'nvd/cve', urls: %w{modified recent}.concat(CVE_YEARS).each.with_object([]) do |s, r| %w{meta json.gz}.each do |ext| r << F[:cve] % { id: s, ext: ext } end end, }, { dst: 'nvd/cpedict', urls: %w{https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz}, }, { dst: 'nvd/cpematch', urls: %w{meta json.gz}.map do |ext| F[:cpematch] % { ext: ext } end, }, { dst: 'cisa', urls: %w{https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json}, }, { dst: 'cwe', urls: %w{https://cwe.mitre.org/data/xml/cwec_latest.xml.zip}, }] def get(log, dst, uri) sum, len = 0, nil uri.open({ content_length_proc: proc do |val| len = val log.debug(uri) { JSON({ type: 'size', len: len }) } end, progress_proc: proc do |val| sum += val log.debug(uri) { JSON({ type: 'read', val: val, sum: sum, len: len }) } end, }) { |src| IO.copy_stream(src, dst) } log.info(uri) { JSON({ type: 'done', sum: sum }) end log = Logger.new(STDOUT) log.level = ENV.fetch('MIRROR_LOG_LEVEL', 'info') # get base directory DST = ARGV.shift log.debug('dst') { DST } # build list of directories DIRS = SOURCES.map { |s| File.join(DST, s[:dst]) }.tap do |dirs| log.debug('dirs') { JSON(dirs) } end # build list of urls URIS = SOURCES.each.with_object([]) do |src, r| r.concat(src[:urls].map { |url| URI.parse(url) }.map do |uri| { uri: uri, dst: File.join(DST, src[:dst], File.basename(uri.path)) } end) end.tap { |uris| log.debug('uris') { JSON(uris) } } unless ENV.key?('MIRROR_DRY_RUN') # create dirs FileUtils.mkdir_p(DIRS) if ENV.key?('MIRROR_THREADED') # fetch URLs in threads, join threads URIS.map do |row| Thread.new(log, row) { |log, row| get(log, row[:dst], row[:uri]) } end.each { |t| t.join } else # fetch URLs in sequence URIS.each { |row| get(log, row[:dst], row[:uri]) } end end