aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xbin/mirror.rb111
1 files changed, 111 insertions, 0 deletions
diff --git a/bin/mirror.rb b/bin/mirror.rb
new file mode 100755
index 0000000..7a66180
--- /dev/null
+++ b/bin/mirror.rb
@@ -0,0 +1,111 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+#
+# build-mirror.rb: Mirror current data from all upstream sources for
+# testing.
+#
+# Usage:
+# # create mirror in ./20220318
+# bin/mirror.rb ./20220318
+#
+# Environment Variables:
+#
+# * MIRROR_DRY_RUN: Set to do a dry run (default: false)
+# * MIRROR_LOG_LEVEL: log level (default: "info")
+# * MIRROR_THREADED: Set to download in threads (default: false)
+#
+
+require 'fileutils'
+require 'json'
+require 'logger'
+require 'net/http'
+require 'open-uri'
+require 'uri'
+
+# check command-line args
+raise "Usage #$0 dir" unless ARGV.size == 1
+
+CVE_YEARS = (2002...Time.now.year).to_a
+
+# format strings
+F = {
+ cve: 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-%<id>s.%<ext>s',
+ cpematch: 'https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.%<ext>s',
+}
+
+# data sources
+SOURCES = [{
+ dst: 'nvd/cve',
+ urls: %w{modified recent}.concat(CVE_YEARS).each.with_object([]) do |s, r|
+ %w{meta json.gz}.each do |ext|
+ r << F[:cve] % { id: s, ext: ext }
+ end
+ end,
+}, {
+ dst: 'nvd/cpedict',
+ urls: %w{https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz},
+}, {
+ dst: 'nvd/cpematch',
+ urls: %w{meta json.gz}.map do |ext|
+ F[:cpematch] % { ext: ext }
+ end,
+}, {
+ dst: 'cisa',
+ urls: %w{https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json},
+}, {
+ dst: 'cwe',
+ urls: %w{https://cwe.mitre.org/data/xml/cwec_latest.xml.zip},
+}]
+
+def get(log, dst, uri)
+ sum, len = 0, nil
+
+ uri.open({
+ content_length_proc: proc do |val|
+ len = val
+ log.debug(uri) { JSON({ type: 'size', len: len }) }
+ end,
+
+ progress_proc: proc do |val|
+ sum += val
+ log.debug(uri) { JSON({ type: 'read', val: val, sum: sum, len: len }) }
+ end,
+ }) { |src| IO.copy_stream(src, dst) }
+
+ log.info(uri) { JSON({ type: 'done', sum: sum })
+end
+
+log = Logger.new(STDOUT)
+log.level = ENV.fetch('MIRROR_LOG_LEVEL', 'info')
+
+# get base directory
+DST = ARGV.shift
+log.debug('dst') { DST }
+
+# build list of directories
+DIRS = SOURCES.map { |s| File.join(DST, s[:dst]) }.tap do |dirs|
+ log.debug('dirs') { JSON(dirs) }
+end
+
+# build list of urls
+URIS = SOURCES.each.with_object([]) do |src, r|
+ r.concat(src[:urls].map { |url| URI.parse(url) }.map do |uri|
+ { uri: uri, dst: File.join(DST, src[:dst], File.basename(uri.path)) }
+ end)
+end.tap { |uris| log.debug('uris') { JSON(uris) } }
+
+unless ENV.key?('MIRROR_DRY_RUN')
+ # create dirs
+ FileUtils.mkdir_p(DIRS)
+
+ if ENV.key?('MIRROR_THREADED')
+ # fetch URLs in threads, join threads
+ URIS.map do |row|
+ Thread.new(log, row) { |log, row| get(log, row[:dst], row[:uri]) }
+ end.each { |t| t.join }
+ else
+ # fetch URLs in sequence
+ URIS.each { |row| get(log, row[:dst], row[:uri]) }
+ end
+end