moduleify color logger, use rspec

2023-03-06 09:58:32 -08:00
parent ea261b228e
commit 406080cef6
39 changed files with 803 additions and 273 deletions
--- a/.rspec
+++ b/.rspec
@@ -0,0 +1 @@
+--require spec_helper
--- a/3
+++ b/3
@@ -80,7 +80,7 @@ group :test do
  gem "capybara"
  gem "selenium-webdriver"
  gem "webdrivers"
-  # gem "minitest-rspec_mocks"
+  gem "rspec-rails"
 end

 gem "xdiff", path: "../xdiff-rb"
@@ -104,6 +104,7 @@ gem "colorize"
 gem "daemons"
 gem "delayed_job_worker_pool"
 gem "ripcord"
+gem "influxdb-client"
 # gem 'cli-ui'
 # gem "paper_trail"
 # gem "paper_trail-hashdiff"
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -121,6 +121,7 @@ GEM
      sinatra (>= 1.4.4)
    delayed_job_worker_pool (1.0.0)
      delayed_job (>= 3.0, < 4.2)
+    diff-lcs (1.5.0)
    diffy (3.4.2)
    domain_name (0.5.20190701)
      unf (>= 0.0.5, < 1.0.0)
@@ -135,6 +136,7 @@ GEM
    importmap-rails (1.1.5)
      actionpack (>= 6.0.0)
      railties (>= 6.0.0)
+    influxdb-client (2.9.0)
    io-console (0.6.0)
    irb (1.6.2)
      reline (>= 0.3.0)
@@ -239,6 +241,23 @@ GEM
      io-console (~> 0.5)
    rexml (3.2.5)
    ripcord (2.0.0)
+    rspec-core (3.12.1)
+      rspec-support (~> 3.12.0)
+    rspec-expectations (3.12.2)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-mocks (3.12.3)
+      diff-lcs (>= 1.2.0, < 2.0)
+      rspec-support (~> 3.12.0)
+    rspec-rails (6.0.1)
+      actionpack (>= 6.1)
+      activesupport (>= 6.1)
+      railties (>= 6.1)
+      rspec-core (~> 3.11)
+      rspec-expectations (~> 3.11)
+      rspec-mocks (~> 3.11)
+      rspec-support (~> 3.11)
+    rspec-support (3.12.0)
    ruby-prof (1.4.5)
    ruby-prof-speedscope (0.3.0)
      ruby-prof (~> 1.0)
@@ -314,6 +333,7 @@ DEPENDENCIES
  diffy
  http-cookie
  importmap-rails
+  influxdb-client
  jbuilder
  kaminari
  listen
@@ -326,6 +346,7 @@ DEPENDENCIES
  rails (~> 7.0.4, >= 7.0.4.2)
  rb-bsdiff!
  ripcord
+  rspec-rails
  ruby-prof
  ruby-prof-speedscope
  selenium-webdriver
--- a/2
+++ b/2
@@ -9,7 +9,7 @@ $LOAD_PATH << Rails.root.join("rake")
 Rake.application.rake_require "sst"
 Rake.application.rake_require "log_entry"
 Rake.application.rake_require "worker"
-
+Rake.application.rake_require "metrics"
 Rake.application.rake_require "fa"
 Rake.application.rake_require "e621"

--- a/app/jobs/domain/fa/job/fa_job_base.rb
+++ b/app/jobs/domain/fa/job/fa_job_base.rb
@@ -11,6 +11,11 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
      end
  end

+  def self.reset_http_client
+    @@fa_base_http_client.close!
+    @@fa_base_http_client = nil
+  end
+
  protected

  def find_or_intitialize_user_from_args(args)
--- a/app/jobs/domain/fa/job/scan_file_job.rb
+++ b/app/jobs/domain/fa/job/scan_file_job.rb
@@ -18,7 +18,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::FaJobBase
      return
    end

-    logger_prefix! "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]"
+    logger.prefix = "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]"

    if @post.state == "removed" && @post.file_uri.nil?
      logger.error "removed and has no file, skipping"
@@ -36,12 +36,20 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::FaJobBase
      return
    end

-    if @post.file_uri.host == "d9.facdn.net"
-      logger.warn("host is d9.facdn.net, which will not resolve")
-      @post.state = :file_error
-      @post.state_detail = { file_error: "d9.facdn.net is unresolvable" }
-      @post.save!
-      return
+    file_uri_host = @post.file_uri&.host
+    if file_uri_host
+      is_unresolvable_host = false
+      is_unresolvable_host ||= file_uri_host == "d9.facdn.net"
+      uri_tld = file_uri_host.split(".").last
+      is_unresolvable_host ||= uri_tld.length >= 6 && file_uri_host.start_with?("d.facdn.net")
+
+      if is_unresolvable_host
+        logger.error("host is #{file_uri_host}, which will not resolve")
+        @post.state = :file_error
+        @post.state_detail = { file_error: "#{file_uri_host} is unresolvable" }
+        @post.save!
+        return
+      end
    end

    if @post.state == "file_error" && !@force_scan
--- a/app/jobs/domain/fa/job/scan_post_job.rb
+++ b/app/jobs/domain/fa/job/scan_post_job.rb
@@ -8,7 +8,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::FaJobBase
    end
    @caused_by_entry = args[:caused_by_entry]
    @force_scan = !!args[:force_scan]
-    logger_prefix! "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]"
+    logger.prefix = "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]"

    logger.info "start scan, triggered by #{@caused_by_entry&.id}"

@@ -74,7 +74,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::FaJobBase
    @post.title = submission.title
    @post.creator = Domain::Fa::User.find_or_build_from_submission_parser(submission)
    @post.category = submission.category
-    @post.description = submission.description_html
+    @post.description = submission.description_html.encode("UTF-8", :invalid => :replace, :undef => :replace)
    @post.keywords = submission.keywords_array
    @post.file_uri = submission.full_res_img
    @post.theme = submission.theme
--- a/app/jobs/domain/fa/job/user_gallery_job.rb
+++ b/app/jobs/domain/fa/job/user_gallery_job.rb
@@ -7,7 +7,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::FaJobBase
  def perform(args)
    @force_scan = !!args[:force_scan]
    @user = find_or_intitialize_user_from_args(args)
-    logger_prefix! "[user #{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
+    logger.prefix = "[user #{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"

    if @user.state != "ok" && @user.scanned_gallery_at
      logger.warn("state == #{@user.state} and already scanned, skipping")
--- a/app/jobs/domain/fa/job/user_page_job.rb
+++ b/app/jobs/domain/fa/job/user_page_job.rb
@@ -6,7 +6,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::FaJobBase
    @user = find_or_intitialize_user_from_args(args)
    @caused_by_entry = args[:caused_by_entry]
    @force_scan = !!args[:force_scan]
-    logger_prefix! "[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
+    logger.prefix = "[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"

    # buggy (sentinal) user
    return if @user.id == 117552 && @user.url_name == "click here"
--- a/app/jobs/scraper/job_base.rb
+++ b/app/jobs/scraper/job_base.rb
@@ -1,5 +1,5 @@
 class Scraper::JobBase < ApplicationJob
-  attr_reader :logger_prefix
+  include HasColorLogger

  def self.ignore_signature_args(args = nil)
    @ignore_signature_args ||= []
@@ -9,16 +9,43 @@ class Scraper::JobBase < ApplicationJob

  def initialize(http_client = nil)
    @http_client = http_client || self.class.build_http_client
-    @logger_prefix = ""
    super
  end

-  def logger
-    @logger ||= ColorLogger.make($stdout, self)
+  def write_point(name, tags: {}, fields: {})
+    Metrics::Reporter.singleton.write_point(
+      name,
+      tags: tags.merge({
+        class_name: self.class.name,
+      }),
+      fields: fields,
+    )
  end

-  def logger_prefix!(prefix)
-    @logger_prefix = prefix
+  around_perform do |job, block|
+    error = nil
+    start = Time.now
+    block.call
+  rescue Net::ReadTimeout => e
+    logger.error "Net::ReadTimeout - resetting http client"
+    error = e
+    job.class.reset_http_client
+    raise
+  rescue Exception => e
+    error = e
+    raise
+  ensure
+    duration_ms = (Time.now - start) * 1000
+    job.write_point(
+      "delayed_job_performed",
+      tags: {
+        success: error.nil?,
+        error_class: error&.class&.name,
+      },
+      fields: {
+        duration_ms: duration_ms,
+      },
+    )
  end

  def enqueue_job(job_class, args, set_args = {})
--- a/app/lib/color_logger.rb
+++ b/app/lib/color_logger.rb
@@ -15,17 +15,22 @@ class ColorLogger
      klass_name = "Fa::#{klass_name}"
    end

-    klass_name = "[#{klass_name.light_blue}]".ljust(32)
-
    Logger.new(sink).tap do |logger|
-      logger.formatter = proc do |severity, datetime, progname, msg|
-        prefix = if instance.respond_to?(:logger_prefix)
-          instance.logger_prefix || ""
-        else
-          ""
-        end.strip
+      prefix = ""

-        [klass_name, prefix, msg].reject(&:blank?).join(" ") + "\n"
+      def logger.prefix=(p)
+        prefix = p
+      end
+
+      logger.formatter = proc do |severity, datetime, progname, msg|
+        color = case severity
+          when "ERROR" then :red
+          when "WARN" then :yellow
+          else :light_blue
+          end
+
+        klass_name_str = "[#{klass_name.send(color)}]".ljust(32)
+        [klass_name_str, prefix, msg].reject(&:blank?).join(" ") + "\n"
      end
    end
  end
--- a/app/lib/has_color_logger.rb
+++ b/app/lib/has_color_logger.rb
@@ -0,0 +1,11 @@
+require "active_support/concern"
+
+module HasColorLogger
+  extend ActiveSupport::Concern
+
+  included do
+    def logger
+      @logger ||= ColorLogger.make($stdout, self)
+    end
+  end
+end
--- a/app/lib/legacy_import/e621_csv_importer.rb
+++ b/app/lib/legacy_import/e621_csv_importer.rb
@@ -1,88 +0,0 @@
-require "csv"
-
-class LegacyImport::E621CsvImporter < LegacyImport::BulkImportJob
-  def initialize(
-    posts_csv_path:,
-    batch_size:,
-    forks:,
-    start_at:
-  )
-    @posts_csv_file = CSV.new(File.open(posts_csv_path, "r+"), headers: true)
-    @forks = forks || 16
-    @batch_size = batch_size || @forks * 32
-    @start_at = start_at || 0
-    @start_time = Time.now
-
-    logger.info "forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
-  end
-
-  def name
-    "e621_csv_post_importer"
-  end
-
-  def profile?
-    false
-  end
-
-  def run
-    start_profiling!
-
-    progress = 0
-    query = ::Legacy::E621::Post.includes(:blob_entry, :description_ref)
-    query.find_in_batches(start: @start_at, batch_size: @batch_size) do |batch|
-      last_id = batch.last&.id
-
-      if @forks <= 1
-        progress += import_e621_posts(batch)
-      else
-        progress += ForkFuture.parallel_map_slice(@forks, batch) do |fork_batch|
-          import_e621_posts(fork_batch)
-        end.sum
-      end
-
-      rate = progress.to_f / (Time.now - @start_time)
-      puts "finish batch, last id #{last_id} - #{progress} - #{rate.round(1)} / second"
-      write_progress last_id
-    end
-
-    stop_profiling!
-  end
-
-  private
-
-  def import_e621_posts(legacy_posts)
-    progress = 0
-
-    skip_posts_ids = Set.new(
-      ::Domain::E621::Post.select(:e621_id).
-        where(e621_id: legacy_posts.map(&:e621_id)).
-        pluck(:e621_id)
-    )
-
-    legacy_posts.reject! do |legacy_post|
-      skip_posts_ids.include?(legacy_post.e621_id)
-    end
-
-    legacy_posts.each do |legacy_post|
-      retries = 0
-      begin
-        ReduxApplicationRecord.transaction do
-          post = ::Domain::E621::Post.find_or_build_from_legacy(legacy_post)
-          unless post.valid?
-            raise("error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}")
-          end
-          post.save!
-          progress += 1
-        end
-      rescue
-        retries += 1
-        sleep 0.1 and retry if retries < 3
-        raise
-      end
-    end
-
-    ReduxApplicationRecord.clear_active_connections!
-    LegacyApplicationRecord.clear_active_connections!
-    progress
-  end
-end
--- a/app/lib/legacy_import/e621_csv_post_importer.rb
+++ b/app/lib/legacy_import/e621_csv_post_importer.rb
@@ -0,0 +1,87 @@
+require "csv"
+
+class LegacyImport::E621CsvPostImporter < LegacyImport::BulkImportJob
+  def initialize(
+    csv_path:,
+    batch_size:,
+    forks:,
+    start_at:
+  )
+    @csv_file = CSV.new(File.open(csv_path, "r+"), headers: true)
+    @forks = forks || 16
+    @batch_size = batch_size || @forks * 32
+    @start_at = start_at || 0
+    @start_time = Time.now
+
+    logger.info "forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
+  end
+
+  def name
+    "e621_csv_post_importer"
+  end
+
+  def profile?
+    false
+  end
+
+  def run_impl
+    progress = 0
+    while row = @csv_file.shift&.to_h
+      e621_id = row["id"].to_i
+      if @start_at && e621_id < @start_at
+        next
+      end
+      progress += import_e621_post(row)
+      rate = progress.to_f / (Time.now - @start_time)
+      puts "finish batch, last id #{e621_id} - #{progress} - #{rate.round(1)} / second"
+      write_last_id e621_id
+    end
+    progress
+  end
+
+  private
+
+  def import_e621_post(row)
+    e621_id = row["id"].to_i
+    post = Domain::E621::Post.find_by(e621_id: e621_id)
+    md5 = row["md5"]
+
+    if post
+      unless post.md5 == md5
+        post.file = nil
+        post.md5 = md5
+      end
+    else
+      post = Domain::E621::Post.new({
+        e621_id: e621_id,
+        md5: md5,
+      })
+    end
+
+    file_ext = row["file_ext"]
+    post.file_url_str = "https://static1.e621.net/data/#{md5[0...2]}/#{md5[2...4]}/#{md5}.#{file_ext}"
+    post.sources_array = row["source"].split("\n")
+    post.rating = row["rating"]
+    post.tags_array = row["tag_string"].split(" ").sort
+    post.num_favorites = row["fav_count"].to_i
+    post.num_comments = row["comment_count"].to_i
+    post.description = row["description"]
+    post.score = row["score"].to_i
+    post.score_up = row["up_score"].to_i
+    post.score_down = row["down_score"].to_i
+    post.parent_e621_id = row["parent_id"]&.to_i
+
+    flags_array = []
+    flags_array << "deleted" if row["is_deleted"] == "t"
+    flags_array << "pending" if row["is_pending"] == "t"
+    flags_array << "flagged" if row["is_flagged"] == "t"
+    post.flags_array = flags_array
+
+    if post.changed?
+      post.save!
+      return 1
+    else
+      return 0
+    end
+  end
+end
--- a/app/lib/legacy_import/e621_legacy_post_importer.rb
+++ b/app/lib/legacy_import/e621_legacy_post_importer.rb
@@ -62,7 +62,6 @@ class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
          unless post.valid?
            raise("error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}")
          end
-          # binding.pry
          post.save!
          progress += 1
        end
--- a/app/lib/metrics/delayed_job_reporter.rb
+++ b/app/lib/metrics/delayed_job_reporter.rb
@@ -0,0 +1,32 @@
+class Metrics::DelayedJobReporter < Metrics::Reporter
+  def report_impl
+    total = Delayed::Job.count
+    by_queue = Delayed::Job.group("queue").count
+    num_working = Delayed::Job.where("locked_at is not null").count
+    num_failed = Delayed::Job.where("last_error is not null").count
+    num_pending = total - num_working - num_failed
+    by_status = {
+      "working" => num_working,
+      "failed" => num_failed,
+      "pending" => num_pending,
+    }
+
+    write_point(
+      "delayed_jobs",
+      tags: { aggregation: "total" },
+      fields: { "total" => total },
+    )
+
+    write_point(
+      "delayed_jobs",
+      tags: { aggregation: "queue" },
+      fields: by_queue,
+    )
+
+    write_point(
+      "delayed_jobs",
+      tags: { aggregation: "status" },
+      fields: by_status,
+    )
+  end
+end
--- a/app/lib/metrics/estimate_db_rows_reporter.rb
+++ b/app/lib/metrics/estimate_db_rows_reporter.rb
@@ -0,0 +1,24 @@
+class Metrics::EstimateDbRowsReporter < Metrics::Reporter
+  def report_impl
+    extra_tables = [
+      "http_log_entries",
+      "http_log_entry_headers",
+      "versions",
+      "delayed_jobs",
+    ]
+    extra_tables_sql = extra_tables.map { |t| "'#{t}'" }.join(",")
+    row_estimates = ReduxApplicationRecord.connection.exec_query(
+      [
+        "SELECT relname, n_live_tup",
+        "FROM pg_stat_all_tables",
+        "WHERE relname IN (#{extra_tables_sql})",
+        "OR relname like 'domain_%'",
+      ].join(" ")
+    ).rows.to_h
+
+    write_point(
+      "estimate_db_rows",
+      fields: row_estimates,
+    )
+  end
+end
--- a/app/lib/metrics/reporter.rb
+++ b/app/lib/metrics/reporter.rb
@@ -0,0 +1,78 @@
+class Metrics::Reporter
+  attr_reader :logger
+
+  def self.singleton
+    @singleton ||= Metrics::Reporter.new
+  end
+  def self.singleton=(instance)
+    @singleton = instance
+  end
+
+  def initialize(host: nil, token: nil, org: nil, bucket: nil, default_tags: {})
+    host ||= Rails.application.config.x.influxdb.host || raise("no host")
+    token ||= Rails.application.config.x.influxdb.token || raise("no token")
+    org ||= Rails.application.config.x.influxdb.org || raise("no org")
+    bucket ||= Rails.application.config.x.influxdb.bucket || raise("no bucket")
+
+    @logger = ColorLogger.make($stdout, self)
+    @client = InfluxDB2::Client.new(
+      host, token,
+      org: org,
+      bucket: bucket,
+      use_ssl: false,
+      precision: InfluxDB2::WritePrecision::MILLISECOND,
+    )
+
+    write_options = InfluxDB2::WriteOptions.new(
+      write_type: InfluxDB2::WriteType::BATCHING,
+      batch_size: 100, flush_interval: 5_000,
+      max_retries: 3, max_retry_delay: 15_000,
+      exponential_base: 2,
+    )
+
+    point_settings = InfluxDB2::PointSettings.new(default_tags: default_tags)
+    @writer = @client.create_write_api(
+      write_options: write_options,
+      point_settings: point_settings,
+    )
+  end
+
+  def report
+    start_at = Time.now
+    success = true
+    begin
+      self.report_impl
+    rescue Exception => e
+      success = false
+      ex = e
+    end
+    duration = Time.now - start_at
+
+    write_point(
+      "metrics_reporter",
+      tags: { class_name: self.class.name },
+      fields: { success: success, duration: duration },
+    )
+    close!
+
+    color = success ? :light_blue : :red
+    if success
+      logger.info "reporter finished (#{duration.round(2)} sec)"
+    else
+      logger.error "reporter failed (#{duration.round(2)} sec) - #{ex.message}"
+    end
+    raise ex unless success
+  end
+
+  def close!
+    @client.close!
+  end
+
+  def write_point(name, tags: {}, fields: {})
+    @writer.write(data: { name: name, tags: tags, fields: fields })
+  end
+
+  def report_impl
+    raise NotImplementedError, "need to implement #report on #{self.class.name}"
+  end
+end
--- a/app/lib/scraper/base_http_client.rb
+++ b/app/lib/scraper/base_http_client.rb
@@ -1,12 +1,12 @@
 class Scraper::BaseHttpClient
+  include HasColorLogger
+
  Response = Struct.new(
    :status_code,
    :body,
    :log_entry,
  )

-  attr_accessor :logger
-
  class InvalidURLError < ArgumentError; end

  def initialize(http_performer_or_proxy)
@@ -27,8 +27,11 @@ class Scraper::BaseHttpClient
      @http_performer = http_performer_or_proxy
    end
    @domain_last_requested_at = {}
-    @logger ||= ColorLogger.make($stdout, self)
-    @logger.level = :info
+    logger.level = :info
+  end
+
+  def close!
+    @http_performer.close!
  end

  def cookies
@@ -53,6 +56,10 @@ class Scraper::BaseHttpClient
    get_impl(url, caused_by_entry)
  end

+  def reporter
+    Metrics::Reporter.singleton
+  end
+
  private

  def get_impl(url, caused_by_entry)
@@ -152,12 +159,12 @@ class Scraper::BaseHttpClient
        response_code.to_s.yellow.bold
      end

-    actual_time_ms = ((Time.now - requested_at) * 1000).round(0).to_s
+    total_time_ms = ((Time.now - requested_at) * 1000).round(0)
    logger.info([
      "[entry #{log_entry.id.to_s.bold} /",
      "GET #{response_code_colorized} /",
      "#{HexUtil.humansize(response_blob_entry.bytes_stored).bold} / #{HexUtil.humansize(response_blob_entry.size).bold}]",
-      "[#{response_time_ms.to_s.bold} ms / #{actual_time_ms.bold} ms]",
+      "[#{response_time_ms.to_s.bold} ms / #{total_time_ms.to_s.bold} ms]",
      uri.to_s.black,
    ].reject(&:nil?).join(" "))

@@ -166,6 +173,22 @@ class Scraper::BaseHttpClient
      sleep 15
    end

+    reporter.write_point(
+      "http_client_response",
+      tags: {
+        method: "GET",
+        host: uri.host,
+        status_code: response_code,
+        performed_by: @http_performer.name,
+      },
+      fields: {
+        response_time_ms: response_time_ms,
+        total_time_ms: total_time_ms,
+        content_size: response_blob_entry.size,
+        content_stored: response_blob_entry.bytes_stored,
+      },
+    )
+
    Response.new(
      response_code,
      response_body,
--- a/app/lib/scraper/curl_http_performer.rb
+++ b/app/lib/scraper/curl_http_performer.rb
@@ -9,7 +9,7 @@ class Scraper::CurlHttpPerformer < Scraper::HttpPerformer

  def get(url, request_headers)
    start_at = Time.now
-    @curl.url = url
+    @curl.url = Addressable::URI.encode url
    @curl.headers = {}
    request_headers.each do |key, value|
      @curl.headers[key.to_s] = value
@@ -28,6 +28,10 @@ class Scraper::CurlHttpPerformer < Scraper::HttpPerformer
    )
  end

+  def close!
+    # no-op - don't need to close in the base class
+  end
+
  private

  def self.parse_header_str(header_str)
--- a/app/lib/scraper/http_performer.rb
+++ b/app/lib/scraper/http_performer.rb
@@ -1,4 +1,6 @@
 class Scraper::HttpPerformer
+  include HasColorLogger
+
  Response = Struct.new(
    :response_code,
    :response_headers,
@@ -9,4 +11,12 @@ class Scraper::HttpPerformer
  def get(url, request_headers)
    raise NotImplementedError, "implement in subclass"
  end
+
+  def close!
+    raise NotImplementedError, "implement in subclass"
+  end
+
+  def name
+    raise NotImplementedError, "implement in subclass"
+  end
 end
--- a/app/lib/scraper/proxy_http_performer.rb
+++ b/app/lib/scraper/proxy_http_performer.rb
@@ -1,9 +1,9 @@
 require "base64"

 class Scraper::ProxyHttpPerformer < Scraper::HttpPerformer
-  def initialize(proxy)
-    @name = proxy
-    proxy_url = Rails.application.config.x.proxies[proxy]
+  def initialize(proxy_name)
+    @name = proxy_name
+    proxy_url = Rails.application.config.x.proxies[proxy_name]
    @client = Ripcord::Client.new(proxy_url)
  end

@@ -21,4 +21,12 @@ class Scraper::ProxyHttpPerformer < Scraper::HttpPerformer
    end
    Scraper::HttpPerformer::Response.new(code, headers, time_ms, body)
  end
+
+  def close!
+    begin
+      @client.instance_variable_get("@http_client").finish
+    rescue IOError => e
+      Rails.logger.error("Failed to close http client: #{e.inspect}")
+    end
+  end
 end
--- a/app/models/concerns/immutable_model.rb
+++ b/app/models/concerns/immutable_model.rb
@@ -1,5 +1,6 @@
 module ImmutableModel
  extend ActiveSupport::Concern
+
  included do
    def readonly?
      !new_record?
--- a/app/models/domain/e621/post.rb
+++ b/app/models/domain/e621/post.rb
@@ -4,7 +4,7 @@ class Domain::E621::Post < ReduxApplicationRecord

  # see state_detail for scan_error/file_error
  enum state: %i[ok scan_error, file_error]
-  enum rating: %i[safe questionable explicit]
+  enum rating: %i[s q e]

  validates_presence_of(
    :e621_id,
@@ -33,8 +33,14 @@ class Domain::E621::Post < ReduxApplicationRecord
             optional: :true,
             autosave: true

+  belongs_to :parent_e621,
+             class_name: "Domain::E621::Post",
+             foreign_key: :e621_id,
+             optional: true
+
  SKIP_MISMATCH_LEGACY_IDS = Set.new([
    836414,
+    1070178,
  ])

  def self.find_or_build_from_legacy(legacy_model)
@@ -43,7 +49,7 @@ class Domain::E621::Post < ReduxApplicationRecord
    model = self.new({
      state: :ok,
      file_url_str: legacy_model.file_url,
-      rating: legacy_model.class.ratings[legacy_model.rating],
+      rating: legacy_model.rating,
      sources_array: legacy_model.sources,
      tags_array: legacy_model.tags.map(&:value),
      artists_array: legacy_model.artists || [],
@@ -94,18 +100,28 @@ class Domain::E621::Post < ReduxApplicationRecord

      http_log_entry.response ||= ::BlobEntry.find_or_build_from_legacy(legacy_model.blob_entry)
      blob_entry = http_log_entry.response
-      http_log_entry.content_type ||= blob_entry.content_type

-      raise("#{http_log_entry.content_type} != #{blob_entry.content_type}") unless blob_entry.content_type.start_with?(http_log_entry.content_type)
+      if blob_entry && http_log_entry
+        http_log_entry.content_type ||= blob_entry.content_type
+      else
+        # unable to construct http & blob entries, skip
+        File.write(
+          Rails.root.join("tmp/e621_legacy_post_importer_failures"),
+          "#{model.e621_id} - (no hle) - unable to reconstruct http / blob entry\n"
+        )
+        http_log_entry = nil
+      end
    end

    if http_log_entry
      blob_entry = http_log_entry.response

      if model.md5 != Digest::MD5.hexdigest(blob_entry.contents)
-        if http_log_entry.status_code != 404
-          raise("#{model.md5} != #{Digest::MD5.hexdigest(blob_entry.contents)}")
-        end
+        File.write(
+          Rails.root.join("tmp/e621_legacy_post_importer_failures"),
+          "#{model.e621_id} - #{http_log_entry.status_code} - expected #{model.md5} != actual #{Digest::MD5.hexdigest(blob_entry.contents)}\n"
+        )
+        http_log_entry = nil
      end
    end

--- a/config/application.rb
+++ b/config/application.rb
@@ -27,5 +27,7 @@ module ReduxScraper
      "proxy-1" => "http://proxy-1.local:9292",
      "dedipath-1" => "http://10.200.0.6:9292",
    }
+
+    config.x.influxdb = ReduxScraper::Application.config_for("influxdb")
  end
 end
--- a/config/cookies/fa.yml
+++ b/config/cookies/fa.yml
@@ -31,6 +31,24 @@ vipvillageworker: &vipvillageworker
        value: 1905x976
        path: /

+blazeandwish: &blazeandwish
+  - domain: .furaffinity.net
+    cookies:
+      - name: a
+        value: 343f3180-1ba4-4e20-af45-b0ce39201a78
+      - name: b
+        value: 05784dcd-1fe7-482a-8191-4470e68b776a
+  - domain: rv.furaffinity.net
+    cookies:
+      - name: OAID
+        value: a0196c5a7bdc3fa4865b86dab9be9ce4
+        path: /
+  - domain: www.furaffinity.net
+    cookies:
+      - name: sz
+        value: 1600x944
+        path: /
+
 testcookies: &testcookies
  - domain: .furaffinity.net
    cookies:
@@ -49,14 +67,14 @@ testcookies: &testcookies
 development:
  direct: *ddwhatnow
  proxy-1: *vipvillageworker
-  dedipath-1: []
+  dedipath-1: *blazeandwish

 production:
  direct: *ddwhatnow
  proxy-1: *vipvillageworker
-  dedipath-1: []
+  dedipath-1: *blazeandwish

 test:
  direct: *testcookies
  proxy-1: *testcookies
-  dedipath-1: []
+  dedipath-1: *testcookies
--- a/config/influxdb.yml
+++ b/config/influxdb.yml
@@ -0,0 +1,17 @@
+development:
+  host: "http://grafana.local:8086"
+  token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
+  org: "primary"
+  bucket: "redux-scraper"
+
+production:
+  host: "http://grafana.local:8086"
+  token: "W2ikhmpQxQHZAStgSEK6s6aJxnOqeD4Zz2MI1m_lnD1JX57V9Esqm0zXb3DWbN7Gnj2GdmF_YrcvE8cy6NbIqQ=="
+  org: "primary"
+  bucket: "redux-scraper"
+
+test:
+  host: "http://grafana.local:8086"
+  token: "P0hzfg-vKiBTmpE-wPMJKkU8wj8VjUN1_OMxNVFhKRqxSdq3msQZ0ZWpVCT9MPAzsvoR40Caaxd2kq_LEIJpxQ=="
+  org: "primary"
+  bucket: "redux-scraper-test"
--- a/pool_combined.rb
+++ b/pool_combined.rb
@@ -0,0 +1,52 @@
+# This runs in the master process after it preloads the app
+after_preload_app do
+  puts "Master #{Process.pid} preloaded app"
+
+  # Don't hang on to database connections from the master after we've
+  # completed initialization
+  ActiveRecord::Base.connection_pool.disconnect!
+end
+
+# This runs in the worker processes after it has been forked
+on_worker_boot do |worker_info|
+  ActiveJob::Base.logger = Logger.new("/dev/null")
+  Delayed::Worker.logger = Logger.new(STDOUT)
+  Delayed::Worker.logger.level = :error
+  Rails.logger = Logger.new(STDOUT)
+
+  puts "Worker #{Process.pid} started"
+  sleep rand(1..5)
+
+  # Reconnect to the database
+  ActiveRecord::Base.establish_connection
+end
+
+# This runs in the master process after a worker starts
+after_worker_boot do |worker_info|
+  puts "Master #{Process.pid} booted worker #{worker_info.name} with " \
+       "process id #{worker_info.process_id}"
+end
+
+# This runs in the master process after a worker shuts down
+after_worker_shutdown do |worker_info|
+  puts "Master #{Process.pid} detected dead worker #{worker_info.name} " \
+       "with process id #{worker_info.process_id}"
+end
+
+{
+  default: 1,
+  fa_user_page: 1,
+  fa_user_gallery: 1,
+  fa_post: 3,
+  manual: 4,
+  static_file: 6,
+}.each do |queue, workers|
+  worker_group(queue) do |g|
+    g.read_ahead = 8
+    g.sleep_delay = 5
+    g.workers = workers
+    g.queues = [queue.to_s]
+  end
+end
+
+preload_app
--- a/rake/e621.rake
+++ b/rake/e621.rake
@@ -15,10 +15,13 @@ namespace :e621 do
    batch_size = ENV["batch_size"]&.to_i
    forks = ENV["forks"]&.to_i
    start_at = ENV["start_at"]&.to_i
-    posts_csv_path = ENV["posts_csv_path"] || raise("must supply `posts_csv_path`")
+    csv_path = ENV["csv"] || raise("must supply `csv`")

-    LegacyImport::E621CsvPostImporter.
-      new(batch_size: batch_size, forks: forks, start_at: start_at).
-      run
+    LegacyImport::E621CsvPostImporter.new(
+      csv_path: csv_path,
+      batch_size: batch_size,
+      forks: forks,
+      start_at: start_at,
+    ).run
  end
 end
--- a/rake/fa.rake
+++ b/rake/fa.rake
@@ -115,106 +115,4 @@ namespace :fa do
      })
    end
  end
-
-  # treat this like a bomb
-  # desc "delete / undo changes made by buggy proxy responses"
-  # task :fix_buggy_posts => [:set_logger_stdout, :environment] do
-  #   raise "Already ran, would destroy everything if it ran again"
-  #   # log_entries = HttpLogEntry.where("id > ?", 54040076).to_a
-
-  #   earliest_created_at = log_entries.map(&:created_at).min
-  #   puts "earliest: #{earliest_created_at}"
-
-  #   users_to_revert = Set.new
-  #   posts_to_revert = Set.new
-  #   files_to_revert = Set.new
-
-  #   log_entries.each do |log_entry|
-  #     uri = log_entry.uri
-  #     # puts "[id #{log_entry.id} / uri #{uri}]"
-
-  #     case uri.host
-  #     when "ipinfo.io"
-  #       puts "ipinfo, do nothing"
-  #     when "d.furaffinity.net"
-  #       # puts "fix static file #{uri.path.bold}"
-  #       files_to_revert.add(uri.to_s)
-  #       post = Domain::Fa::Post.find_by(file_id: log_entry.id)
-  #       posts_to_revert.add(post) if post
-  #     when "www.furaffinity.net"
-  #       if uri.path.start_with?("/user/")
-  #         url_name = uri.path.split("/")[2]
-  #         user = Domain::Fa::User.find_by(url_name: url_name) || raise
-  #         # puts "fix fa user #{url_name.bold}"
-  #         users_to_revert.add(user)
-  #       elsif uri.path.start_with?("/view/")
-  #         fa_id = uri.path.split("/")[2]
-  #         post = Domain::Fa::Post.find_by(fa_id: fa_id) || raise
-  #         # puts "fix fa post #{fa_id.to_s.bold}"
-  #         posts_to_revert.add(post)
-  #       elsif uri.path.start_with?("/gallery/") || uri.path.start_with?("/scraps/")
-  #         url_name = uri.path.split("/")[2]
-  #         user = Domain::Fa::User.find_by(url_name: url_name) || raise
-  #         # puts "fix fa user gallery #{url_name.bold}"
-  #         users_to_revert.add(user)
-  #       elsif uri.path == "/"
-  #         next
-  #       else
-  #         raise("unsupported path: #{uri.path}")
-  #       end
-  #     else raise("unsupported host: #{uri.host}")
-  #     end
-  #   end
-
-  #   puts "entries: #{log_entries.size}"
-  #   puts "users: #{users_to_revert.size}"
-  #   puts "posts: #{posts_to_revert.size}"
-  #   puts "files: #{files_to_revert.size}"
-
-  #   # puts "----- users -----"
-  #   # users_to_revert.each do |user|
-  #   #   puts user.to_json
-  #   # end
-
-  #   # puts "----- posts -----"
-  #   # posts_to_revert.each do |post|
-  #   #   puts post.to_json
-  #   # end
-
-  #   reset_detail = ({ :info => "reverted on #{Time.now}" })
-  #   job_params = ({ :queue => "manual", :priority => -20 })
-
-  #   ReduxApplicationRecord.transaction do
-  #     users_to_revert.each do |user|
-  #       user.state = :ok
-  #       user.state_detail = reset_detail
-  #       user.log_entry_detail = reset_detail
-  #       user.scanned_page_at = nil
-  #       user.scanned_gallery_at = nil
-  #       user.save!
-
-  #       Domain::Fa::Job::UserGalleryJob.set(job_params).perform_later({
-  #         user: user,
-  #       })
-  #       Domain::Fa::Job::UserPageJob.set(job_params).perform_later({
-  #         user: user,
-  #       })
-  #     end
-
-  #     posts_to_revert.each do |post|
-  #       post.state = :ok
-  #       post.state_detail = reset_detail
-  #       post.log_entry_detail = reset_detail
-  #       post.file = nil
-  #       post.file_url_str = nil
-  #       post.save!
-
-  #       Domain::Fa::Job::ScanPostJob.set(job_params).perform_later({
-  #         post: post,
-  #       })
-  #     end
-
-  #     log_entries.reverse.each(&:destroy!)
-  #   end
-  # end
 end
--- a/rake/metrics.rake
+++ b/rake/metrics.rake
@@ -0,0 +1,37 @@
+namespace :metrics do
+  desc "run reporters periodically"
+  task :report_all => [:environment, :set_logger_stdout] do
+    schedule = {
+      Rake::Task["metrics:delayed_job"] => 10.seconds,
+      Rake::Task["metrics:estimate_db_rows"] => 10.seconds,
+    }
+
+    last_ran = {}
+    while true
+      schedule.each do |task, run_every|
+        task_name = task.name
+        if last_ran[task_name].nil? || last_ran[task_name] < run_every.ago
+          begin
+            task.execute
+          rescue NotImplementedError, StandardError => e
+            Rails.logger.error("error invoking #{task_name}: #{e.message.red}")
+          end
+          last_ran[task_name] = Time.now
+        end
+      end
+      sleep_amt = schedule.values.min
+      puts "sleeping #{sleep_amt}"
+      sleep sleep_amt
+    end
+  end
+
+  desc "Report DelayedJob queue metrics"
+  task :delayed_job => :environment do
+    Metrics::DelayedJobReporter.new.report
+  end
+
+  desc "Report estimated db row metrics"
+  task :estimate_db_rows => :environment do
+    Metrics::EstimateDbRowsReporter.new.report
+  end
+end
--- a/test/lib/domain/fa/scraper/http_client_test.rb
+++ b/test/lib/domain/fa/scraper/http_client_test.rb
@@ -1,6 +1,8 @@
-class Domain::Fa::Scraper::HttpClientTest < ActiveSupport::TestCase
-  test "creates an http log entry" do
-    client = Domain::Fa::Scraper::HttpClient.new(TestUtil.mock_http_performer(
+require "rails_helper"
+
+describe Domain::Fa::Scraper::HttpClient do
+  it "creates an http log entry" do
+    client = Domain::Fa::Scraper::HttpClient.new(SpecUtil.mock_http_performer(
      "https://www.furaffinity.net/",
      request_headers: Hash,
      response_code: 200,
--- a/spec/lib/scraper/base_http_client_spec.rb
+++ b/spec/lib/scraper/base_http_client_spec.rb
@@ -1,4 +1,6 @@
-class Scraper::BaseHttpClientTest < ActiveSupport::TestCase
+require "rails_helper"
+
+describe Scraper::BaseHttpClient do
  class TestHttpClient < Scraper::BaseHttpClient
    def cookies
      []
@@ -9,21 +11,21 @@ class Scraper::BaseHttpClientTest < ActiveSupport::TestCase
    end
  end

-  test "throws on unallowed domain" do
-    client = TestHttpClient.new(TestUtil.mock_http_performer(""))
+  it "throws on unallowed domain" do
+    client = TestHttpClient.new(SpecUtil.mock_http_performer(""))
    assert_raises(Scraper::BaseHttpClient::InvalidURLError) do
      client.get("https://foobar.com")
    end
  end

-  test "creates an http log entry" do
-    client = TestHttpClient.new(TestUtil.mock_http_performer(
+  it "creates an http log entry" do
+    client = TestHttpClient.new(SpecUtil.mock_http_performer(
      "https://www.example.com/",
      request_headers: { "cookie" => "" },
      response_code: 200,
      response_time_ms: 15,
      response_headers: { "content-type" => "text/plain" },
-      response_body: "the response " + TestUtil.random_string(16),
+      response_body: "the response " + SpecUtil.random_string(16),
    ))
    client.logger.level = :error

--- a/spec/lib/scraper/curl_http_performer_spec.rb
+++ b/spec/lib/scraper/curl_http_performer_spec.rb
@@ -0,0 +1,35 @@
+describe Scraper::CurlHttpPerformer do
+  it "can parse header string" do
+    headers = Scraper::CurlHttpPerformer.parse_header_str(
+      "HTTP/2 200 \r\n" +
+        "date: Mon, 20 Feb 2023 00:57:22 GMT\r\n" +
+        "content-type: text/html; charset=UTF-8\r\n" +
+        "cache-control: no-cache\r\n" +
+        "expires: Thu, 01 Jan 1970 00:00:01 GMT\r\n" +
+        "referrer-policy: strict-origin-when-cross-origin\r\n" +
+        "x-frame-options: SAMEORIGIN\r\n" +
+        "content-security-policy: frame-ancestors 'self';\r\n" +
+        "fa-upstream: mainsite-01\r\n" +
+        "content-encoding: gzip\r\n" +
+        "cf-cache-status: DYNAMIC\r\n" +
+        "server: cloudflare\r\n" +
+        "cf-ray: 79c349abd918abd2-CPH\r\n" +
+        "\r\n"
+    )
+    assert_equal ["date", "Mon, 20 Feb 2023 00:57:22 GMT"], headers[0]
+    assert_equal ["content-encoding", "gzip"], headers[8]
+  end
+
+  it "correctly wraps curl" do
+    body_str = "the response body " + SpecUtil.random_string(16)
+    client = Scraper::CurlHttpPerformer.new(
+      SpecUtil.mock_curl_easy("https://www.example.com/", body_str: body_str)
+    )
+
+    response = client.get("https://www.example.com/", {})
+    assert_equal 200, response.response_code
+    assert_equal ({ "content-type" => "text/plain", "resp" => "respheader" }),
+                 response.response_headers
+    assert_equal 0, response.response_time_ms
+  end
+end
--- a/spec/rails_helper.rb
+++ b/spec/rails_helper.rb
@@ -0,0 +1,65 @@
+# This file is copied to spec/ when you run 'rails generate rspec:install'
+require "spec_helper"
+ENV["RAILS_ENV"] ||= "test"
+require_relative "../config/environment"
+# Prevent database truncation if the environment is production
+abort("The Rails environment is running in production mode!") if Rails.env.production?
+require "rspec/rails"
+# Add additional requires below this line. Rails is not loaded until this point!
+
+# Requires supporting ruby files with custom matchers and macros, etc, in
+# spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are
+# run as spec files by default. This means that files in spec/support that end
+# in _spec.rb will both be required and run as specs, causing the specs to be
+# run twice. It is recommended that you do not name files matching this glob to
+# end with _spec.rb. You can configure this pattern with the --pattern
+# option on the command line or in ~/.rspec, .rspec or `.rspec-local`.
+#
+# The following line is provided for convenience purposes. It has the downside
+# of increasing the boot-up time by auto-requiring all files in the support
+# directory. Alternatively, in the individual `*_spec.rb` files, manually
+# require only the support files necessary.
+#
+# Dir[Rails.root.join('spec', 'support', '**', '*.rb')].sort.each { |f| require f }
+
+# Checks for pending migrations and applies them before tests are run.
+# If you are not using ActiveRecord, you can remove these lines.
+begin
+  ActiveRecord::Migration.maintain_test_schema!
+rescue ActiveRecord::PendingMigrationError => e
+  abort e.to_s.strip
+end
+RSpec.configure do |config|
+  # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
+  config.fixture_path = "#{::Rails.root}/spec/fixtures"
+
+  # If you're not using ActiveRecord, or you'd prefer not to run each of your
+  # examples within a transaction, remove the following line or assign false
+  # instead of true.
+  config.use_transactional_fixtures = true
+
+  # You can uncomment this line to turn off ActiveRecord support entirely.
+  # config.use_active_record = false
+
+  # RSpec Rails can automatically mix in different behaviours to your tests
+  # based on their file location, for example enabling you to call `get` and
+  # `post` in specs under `spec/controllers`.
+  #
+  # You can disable this behaviour by removing the line below, and instead
+  # explicitly tag your specs with their type, e.g.:
+  #
+  #     RSpec.describe UsersController, type: :controller do
+  #       # ...
+  #     end
+  #
+  # The different available types are documented in the features, such as in
+  # https://relishapp.com/rspec/rspec-rails/docs
+  config.infer_spec_type_from_file_location!
+
+  # Filter lines from Rails gems in backtraces.
+  config.filter_rails_from_backtrace!
+  # arbitrary gems may also be filtered via:
+  # config.filter_gems_from_backtrace("gem name")
+end
+
+require "spec_util"
--- a/spec/spec_helper.rb
+++ b/spec/spec_helper.rb
@@ -0,0 +1,94 @@
+# This file was generated by the `rails generate rspec:install` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# The generated `.rspec` file contains `--require spec_helper` which will cause
+# this file to always be loaded, without a need to explicitly require it in any
+# files.
+#
+# Given that it is always loaded, you are encouraged to keep this file as
+# light-weight as possible. Requiring heavyweight dependencies from this file
+# will add to the boot time of your test suite on EVERY test run, even for an
+# individual file that may not need all of that loaded. Instead, consider making
+# a separate helper file that requires the additional dependencies and performs
+# the additional setup, and require it from the spec files that actually need
+# it.
+#
+# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  # rspec-expectations config goes here. You can use an alternate
+  # assertion/expectation library such as wrong or the stdlib/minitest
+  # assertions if you prefer.
+  config.expect_with :rspec do |expectations|
+    # This option will default to `true` in RSpec 4. It makes the `description`
+    # and `failure_message` of custom matchers include text for helper methods
+    # defined using `chain`, e.g.:
+    #     be_bigger_than(2).and_smaller_than(4).description
+    #     # => "be bigger than 2 and smaller than 4"
+    # ...rather than:
+    #     # => "be bigger than 2"
+    expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+  end
+
+  # rspec-mocks config goes here. You can use an alternate test double
+  # library (such as bogus or mocha) by changing the `mock_with` option here.
+  config.mock_with :rspec do |mocks|
+    # Prevents you from mocking or stubbing a method that does not exist on
+    # a real object. This is generally recommended, and will default to
+    # `true` in RSpec 4.
+    mocks.verify_partial_doubles = true
+  end
+
+  # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
+  # have no way to turn it off -- the option exists only for backwards
+  # compatibility in RSpec 3). It causes shared context metadata to be
+  # inherited by the metadata hash of host groups and examples, rather than
+  # triggering implicit auto-inclusion in groups with matching metadata.
+  config.shared_context_metadata_behavior = :apply_to_host_groups
+
+  # The settings below are suggested to provide a good initial experience
+  # with RSpec, but feel free to customize to your heart's content.
+=begin
+  # This allows you to limit a spec run to individual examples or groups
+  # you care about by tagging them with `:focus` metadata. When nothing
+  # is tagged with `:focus`, all examples get run. RSpec also provides
+  # aliases for `it`, `describe`, and `context` that include `:focus`
+  # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
+  config.filter_run_when_matching :focus
+
+  # Allows RSpec to persist some state between runs in order to support
+  # the `--only-failures` and `--next-failure` CLI options. We recommend
+  # you configure your source control system to ignore this file.
+  config.example_status_persistence_file_path = "spec/examples.txt"
+
+  # Limits the available syntax to the non-monkey patched syntax that is
+  # recommended. For more details, see:
+  # https://relishapp.com/rspec/rspec-core/docs/configuration/zero-monkey-patching-mode
+  config.disable_monkey_patching!
+
+  # Many RSpec users commonly either run the entire suite or an individual
+  # file, and it's useful to allow more verbose output when running an
+  # individual spec file.
+  if config.files_to_run.one?
+    # Use the documentation formatter for detailed output,
+    # unless a formatter has already been configured
+    # (e.g. via a command-line flag).
+    config.default_formatter = "doc"
+  end
+
+  # Print the 10 slowest examples and example groups at the
+  # end of the spec run, to help surface which specs are running
+  # particularly slow.
+  config.profile_examples = 10
+
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = :random
+
+  # Seed global randomization in this process using the `--seed` CLI option.
+  # Setting this allows you to use `--seed` to deterministically reproduce
+  # test failures related to randomization by passing the same `--seed` value
+  # as the one that triggered the failure.
+  Kernel.srand config.seed
+=end
+end
--- a/spec/spec_util.rb
+++ b/spec/spec_util.rb
@@ -0,0 +1,52 @@
+require "rspec/mocks"
+
+class SpecUtil
+  extend RSpec::Mocks::ExampleMethods
+
+  def self.random_string(length)
+    (0...length).map { (65 + rand(26)).chr }.join
+  end
+
+  def self.mock_http_performer(
+    expected_url,
+    request_headers: {},
+    response_code: 200,
+    response_time_ms: 15,
+    response_headers: { "content-type" => "text/plain" },
+    response_body: "http body"
+  )
+    mock = instance_double("Scraper::HttpPerformer")
+    allow(mock).to receive(:is_a?).with(String).and_return(false)
+    allow(mock).to receive(:name).and_return("direct")
+    allow(mock).to receive(:get).
+                     with(expected_url, request_headers).
+                     and_return(Scraper::HttpPerformer::Response.new(
+                     response_code,
+                     response_headers,
+                     response_time_ms,
+                     response_body
+                   ))
+    mock
+  end
+
+  def self.mock_curl_easy(
+    expected_url,
+    response_code: 200,
+    body_str: "http body",
+    request_headers: {},
+    response_headers_str: "HTTP 200\r\nresp: respheader\r\ncontent-type: text/plain\r\n\r\n"
+  )
+    mock = instance_double("Curl::Easy")
+
+    allow(mock).to receive(:url=).with(expected_url)
+    allow(mock).to receive(:headers=).with(Hash)
+
+    allow(mock).to receive(:perform)
+    allow(mock).to receive(:response_code).and_return(response_code)
+    allow(mock).to receive(:body_str).and_return(body_str)
+    allow(mock).to receive(:header_str).and_return(response_headers_str)
+    allow(mock).to receive(:headers).and_return(request_headers)
+
+    mock
+  end
+end
--- a/test/lib/scraper/curl_http_performer_test.rb
+++ b/test/lib/scraper/curl_http_performer_test.rb
@@ -1,20 +0,0 @@
-class Scraper::CurlHttpPerformerTest < ActiveSupport::TestCase
-  test "can parse header string" do
-    headers = Scraper::CurlHttpPerformer.parse_header_str("HTTP/2 200 \r\ndate: Mon, 20 Feb 2023 00:57:22 GMT\r\ncontent-type: text/html; charset=UTF-8\r\ncache-control: no-cache\r\nexpires: Thu, 01 Jan 1970 00:00:01 GMT\r\nreferrer-policy: strict-origin-when-cross-origin\r\nx-frame-options: SAMEORIGIN\r\ncontent-security-policy: frame-ancestors 'self';\r\nfa-upstream: mainsite-01\r\ncontent-encoding: gzip\r\ncf-cache-status: DYNAMIC\r\nserver: cloudflare\r\ncf-ray: 79c349abd918abd2-CPH\r\n\r\n")
-    assert_equal ["date", "Mon, 20 Feb 2023 00:57:22 GMT"], headers[0]
-    assert_equal ["content-encoding", "gzip"], headers[8]
-  end
-
-  test "correctly wraps curl" do
-    body_str = "the response body " + TestUtil.random_string(16)
-    client = Scraper::CurlHttpPerformer.new(
-      TestUtil.mock_curl_easy("https://www.example.com/", body_str: body_str)
-    )
-
-    response = client.get("https://www.example.com/", {})
-    assert_equal 200, response.response_code
-    assert_equal ({ "content-type" => "text/plain", "resp" => "respheader" }),
-                 response.response_headers
-    assert_equal 0, response.response_time_ms
-  end
-end
--- a/test/models/domain/e621/post_test.rb
+++ b/test/models/domain/e621/post_test.rb
@@ -28,7 +28,7 @@ class Domain::E621::PostTest < ActiveSupport::TestCase
      assert_equal post.updated_at, updated_rating_version.created_at
    }

-    post.rating = "safe"
+    post.rating = "s"

    assert post.save
    check_update.call