Enhance strict typing and refactor API and job classes

- Updated `ApiController` in the FA domain to enforce strict typing with Sorbet, including the addition of type signatures and improved method parameters.
- Refactored `users_for_name` method to accept a limit parameter for better control over user search results.
- Enhanced error handling in the `UserTimelineTweetsJob` to ensure proper logging and response management.
- Updated `GalleryDlClient` to include strict typing and improved method signatures for better clarity and maintainability.
- Refactored Prometheus metrics configuration to improve naming consistency and clarity.

These changes aim to improve type safety, maintainability, and robustness across the application.
This commit is contained in:
Dylan Knutson
2025-01-04 18:41:29 +00:00
parent 4d6c67b5a1
commit f0502f500d
14 changed files with 186 additions and 80 deletions

View File

@@ -1,4 +1,4 @@
# typed: false
# typed: true
class Domain::Fa::ApiController < ApplicationController
skip_before_action :authenticate_user!
before_action :validate_api_token!
@@ -11,7 +11,7 @@ class Domain::Fa::ApiController < ApplicationController
def search_user_names
name = params[:name]
limit = (params[:limit] || 5).to_i.clamp(0, 15)
users = users_for_name(name)
users = users_for_name(name, limit: limit)
if !Rails.env.production? && name == "error"
render status: 500, json: { error: "an error!" }
else
@@ -50,12 +50,16 @@ class Domain::Fa::ApiController < ApplicationController
fa_ids.each do |fa_id|
post = fa_id_to_post[fa_id]
post_response = {
terminal_state: false,
seen_at: time_ago_or_never(post&.created_at),
scanned_at: "never",
downloaded_at: "never",
}
post_response =
T.let(
{
terminal_state: false,
seen_at: time_ago_or_never(post&.created_at),
scanned_at: "never",
downloaded_at: "never",
},
T::Hash[Symbol, T.untyped],
)
if post
post_response[:info_url] = domain_fa_post_url(fa_id: post.fa_id)
@@ -63,7 +67,7 @@ class Domain::Fa::ApiController < ApplicationController
if post.file.present?
post_response[:downloaded_at] = time_ago_or_never(
post.file.created_at,
post.file&.created_at,
)
post_response[:state] = "have_file"
post_response[:terminal_state] = true
@@ -331,7 +335,7 @@ class Domain::Fa::ApiController < ApplicationController
end
end
def users_for_name(name)
def users_for_name(name, limit: 10)
users =
Domain::Fa::User
.where(
@@ -346,7 +350,7 @@ class Domain::Fa::ApiController < ApplicationController
"(SELECT COUNT(*) FROM domain_fa_posts WHERE creator_id = domain_fa_users.id) as num_posts",
)
.order(name: :asc)
.limit(10)
.limit(limit)
users.map do |user|
{
@@ -355,7 +359,9 @@ class Domain::Fa::ApiController < ApplicationController
url_name: user.url_name,
thumb: helpers.fa_user_avatar_path(user, thumb: "64-avatar"),
show_path: domain_fa_user_path(user.url_name),
num_posts: user.num_posts,
# `num_posts` is a manually added column, so we need to use T.unsafe to
# access it
num_posts: T.unsafe(user).num_posts,
}
end
end

View File

@@ -22,26 +22,26 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
if @user.state == "error" &&
@user.tweets_protected_error_proxies.include?(@proxy_name)
fatal_error(
"abort scan, this proxy (#{@proxy_name}) is in error proxies: #{@user.tweets_protected_error_proxies}"
"abort scan, this proxy (#{@proxy_name}) is in error proxies: #{@user.tweets_protected_error_proxies}",
)
end
if !@force_scan && !@user.due_for_timeline_tweets_scan?
logger.warn(
"scanned #{time_ago_in_words(@user.scanned_timeline_at)}, skipping"
"scanned #{time_ago_in_words(@user.scanned_timeline_at)}, skipping",
)
return
end
gallery_dl_client.start_twitter_user(
@name || @user.name,
caused_by_entry: @caused_by_entry
caused_by_entry: @caused_by_entry,
)
while true
event =
gallery_dl_client.next_message(
caused_by_entry: @first_twitter_caused_by || @caused_by_entry
caused_by_entry: @first_twitter_caused_by || @caused_by_entry,
)
fatal_error("nil event from gallery_dl_client") if event.nil?
@@ -68,8 +68,8 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
[
"created #{@num_created_tweets.to_s.bold} tweets",
"scanned #{@num_scanned_tweets.to_s.bold} tweets",
"created #{@num_created_medias.to_s.bold} medias"
].join(", ")
"created #{@num_created_medias.to_s.bold} medias",
].join(", "),
)
@user.scanned_timeline_at = Time.now
@@ -100,7 +100,7 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
def maybe_extract_user_info(http_event)
return unless http_event.response_code == 200
unless http_event.response_headers[:"content-type"].starts_with?(
"application/json"
"application/json",
)
return
end
@@ -146,7 +146,7 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
end
logger.info(
"+ tweet (#{@num_created_tweets.to_s.bold}) #{tweet_hash[:id].to_s.bold}"
"+ tweet (#{@num_created_tweets.to_s.bold}) #{tweet_hash[:id].to_s.bold}",
)
Domain::Twitter::Tweet.new(
@@ -155,8 +155,8 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
author: @user,
content: tweet_hash[:content],
reply_to_tweet_id: tweet_hash[:reply_to],
tweeted_at: Time.at(tweet_hash[:date])
}
tweeted_at: Time.at(tweet_hash[:date]),
},
).save!
@num_created_tweets += 1
@@ -171,7 +171,7 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
end
logger.info(
"+ media (#{@num_created_medias.to_s.bold}) #{media_event.filename.bold}"
"+ media (#{@num_created_medias.to_s.bold}) #{media_event.filename.bold}",
)
media =
@@ -179,8 +179,8 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
{
id: media_event.filename,
tweet_id: media_event.tweet_id,
url_str: media_event.file_url
}
url_str: media_event.file_url,
},
)
media.save!
@num_created_medias += 1
@@ -191,7 +191,7 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
defer_job(
Domain::Twitter::Job::MediaJob,
{ media: media || raise, caused_by_entry: @first_twitter_caused_by },
{ priority: self.priority }
{ priority: self.priority },
)
end

View File

@@ -1,6 +1,6 @@
# typed: false
# typed: strict
class Scraper::GalleryDlClient
extend T::Sig
include HasColorLogger
StartEvent = Struct.new(:url, :extractor)
@@ -31,25 +31,54 @@ class Scraper::GalleryDlClient
:width,
)
sig { params(name: String, host: String).void }
def initialize(name, host)
name || raise("no name provided")
host || raise("no host provided")
logger.info("build #{name.to_s.green.bold} - #{host.green}")
@performed_by = name
@client = Ripcord::Client.new(host)
@performed_by = T.let(name, String)
@client = T.let(Ripcord::Client.new(host), Ripcord::Client)
@token = T.let(Concurrent::ThreadLocalVar.new, Concurrent::ThreadLocalVar)
end
sig do
params(username: String, caused_by_entry: T.nilable(HttpLogEntry)).returns(
T.nilable(
T.any(
StartEvent,
FinishEvent,
HttpRequestEvent,
TweetEvent,
TweetMediaEvent,
),
),
)
end
def start_twitter_user(username, caused_by_entry: nil)
@token = SecureRandom.uuid
@token.value = SecureRandom.uuid
rpc =
@client.call(
"start_user",
[@token, "https://twitter.com/#{username}/tweets"],
T.let(
@client.call(
"start_user",
[@token, "https://twitter.com/#{username}/tweets"],
),
Ripcord::JsonRPC::Response,
)
raise rpc_error_str(rpc) unless rpc.successful?
decode_message(rpc.result, caused_by_entry)
end
sig do
params(caused_by_entry: T.nilable(HttpLogEntry)).returns(
T.nilable(
T.any(
StartEvent,
FinishEvent,
HttpRequestEvent,
TweetEvent,
TweetMediaEvent,
),
),
)
end
def next_message(caused_by_entry: nil)
rpc = @client.call("next_message", [@token])
raise rpc_error_str(rpc) unless rpc.successful?
@@ -58,10 +87,27 @@ class Scraper::GalleryDlClient
private
sig { params(rpc: Ripcord::JsonRPC::Response).returns(String) }
def rpc_error_str(rpc)
"#{rpc.error.message}: #{rpc.error.data}"
end
sig do
params(
response: T::Hash[Symbol, T.untyped],
caused_by_entry: T.nilable(HttpLogEntry),
).returns(
T.nilable(
T.any(
StartEvent,
FinishEvent,
HttpRequestEvent,
TweetEvent,
TweetMediaEvent,
),
),
)
end
def decode_message(response, caused_by_entry)
token = response[:token]
raise("token mismatch: #{token} != #{@token}") if token != @token
@@ -104,6 +150,12 @@ class Scraper::GalleryDlClient
end
end
sig do
params(
http_event: HttpRequestEvent,
caused_by_entry: T.nilable(HttpLogEntry),
).void
end
def log_and_set_http_request_event(http_event, caused_by_entry)
request_headers = http_event.request_headers
response_headers = http_event.response_headers

View File

@@ -1,5 +1,4 @@
# typed: strict
module Scraper::HttpClient::Metrics
extend T::Sig
@@ -65,7 +64,7 @@ module Scraper::HttpClient::Metrics
T.let(
PrometheusExporter::Client.default.register(
:counter,
"http_client_request_finish_size_counter",
"http_client_request_finish_response_size_counter",
"bytes of http client response size, labeled by their status code and host",
),
PrometheusExporter::Client::RemoteMetric,

View File

@@ -30,7 +30,11 @@ class Scraper::Metrics::GoodJobMetricsWithQueues < PrometheusExporter::Instrumen
.each do |(queue_name, job_class), count|
gauge.observe(
count,
{ queue_name: queue_name, job_class: job_class },
{
queue_name: queue_name,
job_class: job_class,
scope: scope.to_s,
},
)
end
end

View File

@@ -3,13 +3,6 @@ require_relative "boot"
require "rails/all"
require "sorbet-runtime"
require "prometheus_exporter"
require "prometheus_exporter/client"
require "prometheus_exporter/metric"
require "prometheus_exporter/metric/counter"
require "prometheus_exporter/instrumentation"
require "prometheus_exporter/middleware"
module ReduxScraper
class Application < Rails::Application
config.active_support.deprecation = :raise
@@ -39,7 +32,5 @@ module ReduxScraper
# in config/environments, which are processed later.
#
config.time_zone = "Pacific Time (US & Canada)"
config.x.prometheus_exporter =
Rails.application.config_for("prometheus_exporter")
end
end

View File

@@ -1,4 +1,4 @@
# typed: false
# typed: true
require "active_support/core_ext/integer/time"
Rails.application.configure do
@@ -99,12 +99,12 @@ Rails.application.configure do
user_name: ->(request) do
api_token = request.params[:api_token]
if api_token
user = ApplicationController::API_TOKENS[api_token]
user = Domain::Fa::ApiController::API_TOKENS[api_token]
user || "(nil user)"
else
"(nil api_token)"
end
end
end,
}
# Do not dump schema after migrations.

View File

@@ -1,6 +1,13 @@
# typed: strict
if Rails.configuration.x.prometheus_exporter[:enabled]
unless Rails.env.test?
require "prometheus_exporter"
require "prometheus_exporter/client"
require "prometheus_exporter/metric"
require "prometheus_exporter/metric/counter"
require "prometheus_exporter/instrumentation"
require "prometheus_exporter/middleware"
# This reports stats per request like HTTP status and timings
Rails.application.middleware.unshift PrometheusExporter::Middleware
PrometheusExporter::Instrumentation::ActiveRecord.start(
@@ -9,4 +16,59 @@ if Rails.configuration.x.prometheus_exporter[:enabled]
}, #optional params
config_labels: %i[database host], #optional params
)
else
# this mocks out the PrometheusExporter::Client for testing
#
# this is a bit of a hack, but it's a way to get around the fact that
# PrometheusExporter::Client is a singleton and we can't easily mock it out
# in a test environment, and simply constructing a new instance attempts to
# connect to a Prometheus server, which we can't rely on in a test environment.
module PrometheusExporter
class Client
class RemoteMetric
end
extend T::Sig
class NullClient < PrometheusExporter::Client
extend T::Sig
sig { void }
def initialize
end
sig { params(args: T.untyped).returns(NullMetric) }
def register(*args)
NullMetric.new
end
end
@default = T.let(NullClient.new, NullClient)
sig { returns(NullClient) }
def self.default
@default
end
class NullMetric < PrometheusExporter::Client::RemoteMetric
extend T::Sig
sig { void }
def initialize
end
sig { params(args: T.untyped).void }
def increment(*args)
end
sig { params(args: T.untyped).void }
def decrement(*args)
end
sig { params(args: T.untyped).void }
def observe(*args)
end
end
end
end
end

View File

@@ -1,14 +0,0 @@
test:
enabled: true
development:
enabled: true
staging:
enabled: true
production:
enabled: true
worker:
enabled: true

View File

@@ -4,9 +4,6 @@
# typed: false
module ApplicationController::API_TOKENS; end
module SpecHelpers::GDLClient::FinishEvent; end
module SpecHelpers::GDLClient::HttpRequestEvent; end
module SyntaxTree::Haml; end
module SyntaxTree::Haml::Format::Formatter; end
module SyntaxTree::RBS; end

View File

@@ -1,7 +1,7 @@
# typed: true
module DebugHelpers
# add `debug_sql: true` to tags
def debug_sql
def self.debug_sql
logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = Logger.new($stdout)
yield
@@ -10,7 +10,7 @@ module DebugHelpers
end
# add `quiet: false` to show ColorLogger output
def quiet_color_logger(&block)
def self.quiet_color_logger(&block)
ColorLogger.quiet(&block)
end
end

View File

@@ -1,5 +1,5 @@
# typed: false
module SpecHelpers
module PerformJobHelpers
# TODO - migrate all the calls to perform_now to use this
def perform_now(params, should_raise: false)
ret = described_class.perform_now(params)
@@ -54,7 +54,7 @@ module SpecHelpers
receive: :next_message,
with: [{ caused_by_entry: nil }],
return:
GDLClient::HttpRequestEvent.new(
Scraper::GalleryDlClient::HttpRequestEvent.new(
log_entry: instance_double("::HttpLogEntry"),
response_code: 200,
response_headers: {
@@ -78,7 +78,7 @@ module SpecHelpers
proc do |sequence|
[{ caused_by_entry: sequence[1][:return].log_entry }]
end,
return: GDLClient::FinishEvent.new,
return: Scraper::GalleryDlClient::FinishEvent.new,
},
],
)

View File

@@ -1,6 +1,12 @@
# typed: strict
# This file is copied to spec/ when you run 'rails generate rspec:install'
ENV["RAILS_ENV"] = "test"
module PrometheusExporter
class Client
end
end
require_relative "../config/environment"
require "spec_helper"
require "rspec/rails"

View File

@@ -1,4 +1,4 @@
# typed: false
# typed: strict
# This file was generated by the `rails generate rspec:install` command. Conventionally, all
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
# The generated `.rspec` file contains `--require spec_helper` which will cause
@@ -14,7 +14,7 @@
# it.
#
# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
require "./spec/helpers/spec_helpers"
require "./spec/helpers/perform_job_helpers"
require "./spec/helpers/debug_helpers"
require "./spec/helpers/http_client_mock_helpers"
require "./spec/support/matchers/html_matchers"
@@ -23,20 +23,23 @@ require "rspec/sorbet"
RSpec::Sorbet.allow_doubles!
RSpec.configure do |config|
config.include SpecHelpers
config.include DebugHelpers
config.include PerformJobHelpers
# can tag classes with `quiet: false` to make ColorLogger loud
config.around(:each) do |example|
if example.example.metadata[:quiet].is_a?(FalseClass)
example.call
else
quiet_color_logger(&example)
DebugHelpers.quiet_color_logger(&example)
end
end
config.around(:each) do |example|
example.example.metadata[:debug_sql] ? debug_sql(&example) : example.call
if example.example.metadata[:debug_sql]
DebugHelpers.debug_sql(&example)
else
example.call
end
end
# rspec-expectations config goes here. You can use an alternate