refactor fa jobs to accept old model tyles

This commit is contained in:
Dylan Knutson
2025-02-12 20:13:36 +00:00
parent 54a8cd2fff
commit 34337f6f57
11 changed files with 136 additions and 161 deletions

View File

@@ -10,9 +10,6 @@ class Domain::Fa::Job::Base < Scraper::JobBase
sig { params(args: T.untyped).void }
def initialize(*args)
super(*T.unsafe(args))
@user_old = T.let(nil, T.nilable(Domain::Fa::User))
@user = T.let(nil, T.nilable(Domain::User::FaUser))
@created_user = T.let(false, T::Boolean)
@posts_enqueued_for_scan = T.let(Set.new, T::Set[Integer])
@force_scan = T.let(false, T::Boolean)
end
@@ -24,87 +21,89 @@ class Domain::Fa::Job::Base < Scraper::JobBase
!!arguments[0][:force_scan]
end
sig do
params(
args: T.untyped,
set_user: T::Boolean,
build_user: T::Boolean,
require_user_exists: T::Boolean,
).returns(T.nilable(Domain::User::FaUser))
end
def init_from_args!(
args,
set_user: true,
build_user: true,
require_user_exists: false
)
@force_scan = !!args[:force_scan]
if set_user
if build_user
@user = find_or_build_user_from_args(args)
sig { params(build_post: T::Boolean).returns(Domain::Post::FaPost) }
def post_from_args!(build_post: false)
args = arguments[0]
post = args[:post]
if post.is_a?(Domain::Post::FaPost)
return post
elsif post.is_a?(Domain::Fa::Post)
return Domain::Post::FaPost.find_by!(fa_id: post.fa_id)
elsif fa_id = args[:fa_id]
if build_post
Domain::Post::FaPost.find_or_initialize_by(fa_id: fa_id)
else
@user = find_user_from_args(args)
Domain::Post::FaPost.find_by!(fa_id: fa_id)
end
@user_old = Domain::Fa::User.find_by(url_name: @user.url_name) if @user
else
fatal_error("arg 'post' must be a Domain::Post::FaPost or an Integer")
end
logger.prefix =
"[user #{(@user&.url_name || @user&.name || args[:url_name])&.bold} / #{@user&.state&.bold}]"
return nil unless @user
if @user.new_record?
if require_user_exists
fatal_error("user must already exist")
else
@user.save!
@created_user = true
end
end
@user
end
sig { params(args: T.untyped).returns(Domain::User::FaUser) }
def find_or_build_user_from_args(args)
find_user_from_args(args) ||
begin
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
user = Domain::User::FaUser.new
user.url_name = url_name
user.name = url_name
user
end
sig { returns(Domain::UserAvatar) }
def avatar_from_args!
args = arguments[0]
avatar = args[:avatar]
user = args[:user]
if avatar.is_a?(Domain::UserAvatar)
return avatar
elsif user.is_a?(Domain::User::FaUser)
return T.must(user.avatar)
elsif user.is_a?(Domain::Fa::User)
user = Domain::User::FaUser.find_by(url_name: user.url_name)
return T.must(user&.avatar)
else
fatal_error(
"arg 'avatar' must be a Domain::UserAvatar or user must be a Domain::Fa::User",
)
end
end
sig { params(args: T.untyped).returns(T.nilable(Domain::User::FaUser)) }
def find_user_from_args(args)
sig { params(create_if_missing: T::Boolean).returns(Domain::User::FaUser) }
def user_from_args!(create_if_missing: true)
args = arguments[0]
user = args[:user]
if user.is_a?(Domain::User::FaUser)
return user
user
elsif user.is_a?(Domain::Fa::User)
return Domain::User::FaUser.find_by(url_name: user.url_name)
end
Domain::User::FaUser.find_by!(url_name: user.url_name)
elsif url_name = args[:url_name]
if create_if_missing
user =
Domain::User::FaUser.find_or_initialize_by(url_name:) do |user|
user.name = url_name
end
if args[:url_name].blank?
fatal_error("arg 'url_name' is required if arg 'user' is nil")
if user.new_record?
user.save!
defer_job(
Domain::Fa::Job::UserPageJob,
{ user:, caused_by_entry: causing_log_entry },
)
end
user
else
Domain::User::FaUser.find_by!(url_name:)
end
else
fatal_error(
"arg 'user' must be a Domain::User::FaUser or Domain::Fa::User, or url_name must be provided",
)
end
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
Domain::User::FaUser.find_by(url_name: url_name)
end
sig { returns(T::Boolean) }
def user_due_for_favs_scan?
raise("user is nil") unless @user
unless @user.due_for_favs_scan?
if @force_scan
sig { params(user: Domain::User::FaUser).returns(T::Boolean) }
def user_due_for_favs_scan?(user)
unless user.due_for_favs_scan?
if force_scan?
logger.warn(
"scanned #{DateHelper.time_ago_in_words(@user.scanned_favs_at).bold} ago - force scanning",
"scanned #{DateHelper.time_ago_in_words(user.scanned_favs_at).bold} ago - force scanning",
)
return true
else
logger.warn(
"scanned #{DateHelper.time_ago_in_words(@user.scanned_favs_at).bold} ago - skipping",
"scanned #{DateHelper.time_ago_in_words(user.scanned_favs_at).bold} ago - skipping",
)
return false
end

View File

@@ -12,28 +12,18 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@page_id = T.let(nil, T.nilable(String))
@page_number = T.let(0, Integer)
@total_items_seen = T.let(0, Integer)
@first_job_entry = T.let(nil, T.nilable(HttpLogEntry))
@full_scan = T.let(false, T::Boolean)
@force_scan = T.let(false, T::Boolean)
@last_page_post_ids = T.let(Set.new, T::Set[Integer])
@use_http_cache = T.let(false, T::Boolean)
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
@first_job_entry = nil
user = init_from_args!(args, build_user: false)
@full_scan = !!args[:full_scan]
user = user_from_args!(create_if_missing: true)
full_scan = !!args[:full_scan]
@use_http_cache = !!args[:use_http_cache]
user ||
begin
defer_job(Domain::Fa::Job::UserPageJob, { url_name: args[:url_name] })
fatal_error("user does not exist: #{args}")
end
user = T.must(user)
logger.prefix = "[#{user.url_name&.bold} / #{user.state&.bold}]"
return unless user_due_for_favs_scan?
return unless user_due_for_favs_scan?(user)
max_page_number =
T.let([((user.num_favorites || 0) + 1) / 48, 100].max, Integer)
@@ -49,7 +39,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
break if ret == :break
return if ret == :stop
if !@full_scan
unless full_scan
new_favs = @last_page_post_ids - existing_faved_ids
if new_favs.empty?
user.scanned_favs_at = Time.zone.now
@@ -91,6 +81,8 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
user.save!
end
logger.info "[updated favs list] [posts: #{user.user_post_favs.count.to_s.bold}]"
ensure
user.save! if user
end
private

View File

@@ -26,7 +26,6 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
fatal_error(
"invalid post model: #{post.class}, expected Domain::Fa::Post or Domain::Post::FaPost",
)
raise
end
post.file
end
@@ -35,7 +34,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
logger.info "scanning file: #{file.id}, state=#{file.state}, retry_count=#{file.retry_count}"
if file.state == "terminal_error" && !@force_scan
if file.state == "terminal_error" && !force_scan?
logger.warn("state == terminal_error, abort without retrying")
return
end
@@ -45,16 +44,13 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
return
end
if file.state == "ok" && !@force_scan
if file.state == "ok" && !force_scan?
logger.warn("already have file, skipping")
return
end
file_url_str = file.url_str
if file_url_str.nil?
fatal_error("file has no url")
raise
end
fatal_error("file has no url") if file_url_str.nil?
response = http_client.get(file_url_str)
file.log_entry = response.log_entry

View File

@@ -4,14 +4,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
post =
T.cast(
args[:post] ||
begin
Domain::Post::FaPost.find_or_initialize_by(fa_id: args[:fa_id])
end,
Domain::Post::FaPost,
)
post = post_from_args!(build_post: true)
logger.prefix =
proc { "[fa_id #{post.fa_id.to_s.bold} / #{post.state&.bold}]" }
@@ -42,6 +35,8 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
)
end
logger.info "finished post scan"
ensure
post.save! if post
end
private
@@ -52,7 +47,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
http_client.get("https://www.furaffinity.net/view/#{post.fa_id}/")
if response.status_code == 404
post.state = "scan_error"
post.save!
return
end
@@ -66,7 +60,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
if page.submission_not_found?
logger.error("post was removed")
post.state = "removed"
post.save!
return
end
@@ -77,7 +70,6 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
/The page you are trying to reach is currently pending deletion/
logger.error("post is pending deletion")
post.state = "removed"
post.save!
return
else
fatal_error("not a submission page")
@@ -90,6 +82,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
raise("id mismatch: #{submission.id} != #{post.fa_id}")
end
# save before any changes so post has an id for any files
post.save!
post.last_submission_log_entry = first_log_entry
post.title = submission.title
@@ -116,6 +109,5 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
post.num_views = submission.num_views
post.posted_at = submission.posted_date
post.scanned_at = DateTime.current
post.save!
end
end

View File

@@ -5,14 +5,9 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
init_from_args!(args, build_user: false, set_user: false)
avatar =
args[:avatar] ||
begin
user = @user || raise("user must exist")
user.avatar || raise("user must have an avatar")
end
user = avatar.user
avatar = avatar_from_args!
user = T.cast(avatar.user, Domain::User::FaUser)
logger.prefix =
proc do
"[avatar #{avatar.id.to_s.bold} / user #{user.url_name.to_s.bold}]"

View File

@@ -21,9 +21,9 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
init_from_args!(args)
user = T.must(@user)
if !user.due_for_follows_scan? && !@force_scan
user = user_from_args!
if !user.due_for_follows_scan? && !force_scan?
logger.warn(
"scanned #{time_ago_in_words(user.scanned_follows_at)}, skipping",
)
@@ -33,6 +33,9 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
# buggy (sentinal) user
return if user.url_name == "click here"
# ensure user has an ID before we start scanning
user.save! if user.new_record?
while true
break if scan_follows_page(user) == :break
# bail out at 100,000 users
@@ -55,11 +58,6 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
end
logger.info "[updated following users list] [users: #{user.followed_users.count.to_s.bold}]"
if @created_user
logger.info("user was new record, enqueue page scan job")
defer_job(Domain::Fa::Job::UserPageJob, { user: user })
end
end
private

View File

@@ -20,8 +20,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
init_from_args!(args)
user = T.must(@user)
user = user_from_args!
if user.state != "ok" && user.scanned_gallery_at
logger.warn("state == #{user.state} and already scanned, skipping")
@@ -38,7 +37,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
@max_page_number = (num_submissions * 72) + 3
end
if !user.due_for_gallery_scan? && !@force_scan
if !user.due_for_gallery_scan? && !force_scan?
logger.warn(
"gallery scanned #{time_ago_in_words(user.scanned_page_at)}, skipping",
)
@@ -131,7 +130,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
total_num_new_posts_seen += listing_page_stats.new_seen
total_num_posts_seen += listing_page_stats.total_seen
page.submission_folders.each { |sf| @folders.add?(sf) } if @force_scan
page.submission_folders.each { |sf| @folders.add?(sf) } if force_scan?
page_number += 1
break if listing_page_stats.new_seen == 0 && !@go_until_end

View File

@@ -5,8 +5,7 @@ module Domain::Fa::Job
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
init_from_args!(args)
user = T.must(@user)
user = user_from_args!
# buggy user
return if user.id == 117_552 && user.url_name == "click here"
@@ -21,7 +20,7 @@ module Domain::Fa::Job
# - follows / following: look at the 'watchers' / 'watching' section,
# and add new follows.
if !user.due_for_incremental_scan? && !@force_scan
if !user.due_for_incremental_scan? && !force_scan?
logger.warn(
"scanned #{time_ago_in_words(user.scanned_incremental_at).bold}, skipping",
)
@@ -45,7 +44,6 @@ module Domain::Fa::Job
return
when ScanUserUtils::DisabledOrNotFoundResult::Fatal
fatal_error(ret.message)
return
end
update_user_fields_from_page(user, page, response)

View File

@@ -5,13 +5,12 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
init_from_args!(args)
user = T.must(@user)
user = user_from_args!
# buggy (sentinal) user
return if user.id == 117_552 && user.url_name == "click here"
if !user.due_for_page_scan? && !@force_scan
if !user.due_for_page_scan? && !force_scan?
logger.warn(
"scanned #{time_ago_in_words(user.scanned_page_at)}, skipping",
)
@@ -31,7 +30,6 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
return
when ScanUserUtils::DisabledOrNotFoundResult::Fatal
fatal_error(ret.message)
return
end
update_user_fields_from_page(user, page, response)

View File

@@ -375,7 +375,7 @@ class Scraper::JobBase < ApplicationJob
end
end
sig { params(msg: T.untyped).void }
sig { params(msg: T.untyped).returns(T.noreturn) }
def fatal_error(msg)
logger.error(msg)
raise JobError, msg.uncolorize

View File

@@ -3,14 +3,14 @@ require "rails_helper"
describe Domain::Fa::Job::FavsJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:client_mock_config) { [] }
before do
Scraper::ClientFactory.http_client_mock = http_client_mock
@log_entries =
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
let!(:log_entries) do
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
end
shared_context "user exists" do
@@ -19,28 +19,7 @@ describe Domain::Fa::Job::FavsJob do
end
end
context "the user does not yet exist" do
it "fails the job" do
perform_now({ url_name: "zzreg" }, should_raise: /user does not exist/)
expect(Domain::User::FaUser.find_by url_name: "zzreg").to be_nil
end
it "enqueues a page scan job" do
perform_now({ url_name: "zzreg" }, should_raise: true)
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob)).to match(
[including({ url_name: "zzreg" })],
)
end
it "does not create any new posts" do
expect do
perform_now({ url_name: "zzreg" }, should_raise: true)
end.not_to change(Domain::Post::FaPost, :count)
end
end
context "site indicates no favs" do
include_context "user exists"
shared_context "user has no favs" do
let(:client_mock_config) do
[
{
@@ -52,6 +31,35 @@ describe Domain::Fa::Job::FavsJob do
},
]
end
end
context "the user does not yet exist" do
include_context "user has no favs"
it "creates the user" do
perform_now({ url_name: "zzreg" })
expect(Domain::User::FaUser.find_by(url_name: "zzreg")).not_to be_nil
end
it "enqueues a page scan job" do
perform_now({ url_name: "zzreg" })
user = Domain::User::FaUser.find_by(url_name: "zzreg")
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob)).to match(
[including({ user:, caused_by_entry: log_entries[0] })],
)
end
it "does not create any new posts" do
expect do perform_now({ url_name: "zzreg" }) end.not_to change(
Domain::Post::FaPost,
:count,
)
end
end
context "site indicates no favs" do
include_context "user exists"
include_context "user has no favs"
it "records no favs for the user" do
perform_now({ url_name: "zzreg" })
@@ -145,23 +153,23 @@ describe Domain::Fa::Job::FavsJob do
array_including(
{
post: Domain::Post::FaPost.find_by(fa_id: 52_106_426),
caused_by_entry: @log_entries[0],
caused_by_entry: log_entries[0],
},
{
post: Domain::Post::FaPost.find_by(fa_id: 36_755_337),
caused_by_entry: @log_entries[0],
caused_by_entry: log_entries[0],
},
{
post: Domain::Post::FaPost.find_by(fa_id: 40_769_488),
caused_by_entry: @log_entries[0],
caused_by_entry: log_entries[0],
},
{
post: Domain::Post::FaPost.find_by(fa_id: 20_808_448),
caused_by_entry: @log_entries[0],
caused_by_entry: log_entries[0],
},
{
post: Domain::Post::FaPost.find_by(fa_id: 20_585_829),
caused_by_entry: @log_entries[0],
caused_by_entry: log_entries[0],
},
),
)