From 60259c99d9ea952e8c9d838edc60984484007530 Mon Sep 17 00:00:00 2001 From: Paul Duncan Date: Fri, 18 Mar 2022 03:25:55 -0400 Subject: add bin/mirror.rb --- bin/mirror.rb | 111 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100755 bin/mirror.rb diff --git a/bin/mirror.rb b/bin/mirror.rb new file mode 100755 index 0000000..7a66180 --- /dev/null +++ b/bin/mirror.rb @@ -0,0 +1,111 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +# +# build-mirror.rb: Mirror current data from all upstream sources for +# testing. +# +# Usage: +# # create mirror in ./20220318 +# bin/mirror.rb ./20220318 +# +# Environment Variables: +# +# * MIRROR_DRY_RUN: Set to do a dry run (default: false) +# * MIRROR_LOG_LEVEL: log level (default: "info") +# * MIRROR_THREADED: Set to download in threads (default: false) +# + +require 'fileutils' +require 'json' +require 'logger' +require 'net/http' +require 'open-uri' +require 'uri' + +# check command-line args +raise "Usage #$0 dir" unless ARGV.size == 1 + +CVE_YEARS = (2002...Time.now.year).to_a + +# format strings +F = { + cve: 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-%s.%s', + cpematch: 'https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.%s', +} + +# data sources +SOURCES = [{ + dst: 'nvd/cve', + urls: %w{modified recent}.concat(CVE_YEARS).each.with_object([]) do |s, r| + %w{meta json.gz}.each do |ext| + r << F[:cve] % { id: s, ext: ext } + end + end, +}, { + dst: 'nvd/cpedict', + urls: %w{https://nvd.nist.gov/feeds/xml/cpe/dictionary/official-cpe-dictionary_v2.3.xml.gz}, +}, { + dst: 'nvd/cpematch', + urls: %w{meta json.gz}.map do |ext| + F[:cpematch] % { ext: ext } + end, +}, { + dst: 'cisa', + urls: %w{https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json}, +}, { + dst: 'cwe', + urls: %w{https://cwe.mitre.org/data/xml/cwec_latest.xml.zip}, +}] + +def get(log, dst, uri) + sum, len = 0, nil + + uri.open({ + content_length_proc: proc do |val| + len = val + log.debug(uri) { JSON({ type: 'size', len: len }) } + end, + + progress_proc: proc do |val| + sum += val + log.debug(uri) { JSON({ type: 'read', val: val, sum: sum, len: len }) } + end, + }) { |src| IO.copy_stream(src, dst) } + + log.info(uri) { JSON({ type: 'done', sum: sum }) +end + +log = Logger.new(STDOUT) +log.level = ENV.fetch('MIRROR_LOG_LEVEL', 'info') + +# get base directory +DST = ARGV.shift +log.debug('dst') { DST } + +# build list of directories +DIRS = SOURCES.map { |s| File.join(DST, s[:dst]) }.tap do |dirs| + log.debug('dirs') { JSON(dirs) } +end + +# build list of urls +URIS = SOURCES.each.with_object([]) do |src, r| + r.concat(src[:urls].map { |url| URI.parse(url) }.map do |uri| + { uri: uri, dst: File.join(DST, src[:dst], File.basename(uri.path)) } + end) +end.tap { |uris| log.debug('uris') { JSON(uris) } } + +unless ENV.key?('MIRROR_DRY_RUN') + # create dirs + FileUtils.mkdir_p(DIRS) + + if ENV.key?('MIRROR_THREADED') + # fetch URLs in threads, join threads + URIS.map do |row| + Thread.new(log, row) { |log, row| get(log, row[:dst], row[:uri]) } + end.each { |t| t.join } + else + # fetch URLs in sequence + URIS.each { |row| get(log, row[:dst], row[:uri]) } + end +end -- cgit v1.2.3