more precise fa job priorities

This commit is contained in:
Dylan Knutson
2025-02-25 02:57:44 +00:00
parent eb5ecb956d
commit 41324f019f
14 changed files with 63 additions and 36 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

View File

@@ -217,10 +217,6 @@ class Domain::Fa::Job::Base < Scraper::JobBase
sig { params(user: Domain::User::FaUser).void }
def enqueue_user_scan(user)
users_enqueued_for_page_scan ||= Set.new
users_enqueued_for_gallery_scan ||= Set.new
users_enqueued_for_favs_scan ||= Set.new
logger.tagged(make_arg_tag(user)) do
args =
if user.persisted?
@@ -259,6 +255,19 @@ class Domain::Fa::Job::Base < Scraper::JobBase
)
end
if user.due_for_follows_scan? &&
defer_job(Domain::Fa::Job::UserFollowsJob, args)
logger.info(
format_tags(
"enqueue user follows job",
make_tag(
"last scanned",
time_ago_in_words(user.scanned_follows_at),
),
),
)
end
if user.due_for_favs_scan? && defer_job(Domain::Fa::Job::FavsJob, args)
logger.info(
format_tags(

View File

@@ -2,6 +2,10 @@
class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
include HasBulkEnqueueJobs
queue_as :fa_user_favs
queue_with_priority do
T.bind(self, Domain::Fa::Job::FavsJob)
user_from_args!.scanned_favs_at.nil? ? -11 : -1
end
FAVED_POSTS_PER_PAGE_THRESHOLD = T.let(Rails.env.test? ? 2 : 36, Integer)
MAX_PAGE_NUMBER = 2000

View File

@@ -1,6 +1,7 @@
# typed: strict
class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
queue_as :fa_post
queue_with_priority(-15)
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)
@@ -20,17 +21,9 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
if force_scan? ||
(file.present? && (file.state == "pending") && file.url_str.present?)
logger.info(
format_tags(
"enqueue file job",
make_tag("url", file&.url_str),
make_tag("priority", self.priority),
),
)
defer_job(
Domain::Fa::Job::ScanFileJob,
{ file: post.file },
{ priority: self.priority },
format_tags("enqueue file job", make_tag("url", file&.url_str)),
)
defer_job(Domain::Fa::Job::ScanFileJob, { file: post.file })
end
if creator = post.creator

View File

@@ -6,9 +6,13 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
include HasMeasureDuration
include HasBulkEnqueueJobs
USERS_PER_FULL_PAGE = T.let(Rails.env.test? ? 9 : 190, Integer)
queue_as :fa_user_follows
queue_with_priority do
T.bind(self, Domain::Fa::Job::UserFollowsJob)
user_from_args!.scanned_follows_at.nil? ? -12 : -2
end
USERS_PER_FULL_PAGE = T.let(Rails.env.test? ? 9 : 190, Integer)
sig { params(args: T.untyped).void }
def initialize(*args)

View File

@@ -1,6 +1,10 @@
# typed: strict
class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
queue_as :fa_user_gallery
queue_with_priority do
T.bind(self, Domain::Fa::Job::UserGalleryJob)
user_from_args!.scanned_gallery_at.nil? ? -13 : -3
end
MAX_PAGE_NUMBER = 350
LISTINGS_PER_PAGE_THRESHOLD = 72

View File

@@ -2,6 +2,10 @@
class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
ScanUserUtils = Domain::Fa::Job::ScanUserUtils
queue_as :fa_user_page
queue_with_priority do
T.bind(self, Domain::Fa::Job::UserPageJob)
user_from_args!.scanned_page_at.nil? ? -14 : -4
end
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
def perform(args)

View File

@@ -371,8 +371,8 @@ class Scraper::JobBase < ApplicationJob
GoodJob::Bulk.enqueue do
@deferred_jobs.each do |deferred_job|
args = deferred_job.params.merge({ caused_by_entry: causing_log_entry })
job =
deferred_job.job_class.set(deferred_job.set_args).perform_later(args)
set_args = deferred_job.set_args
job = deferred_job.job_class.set(set_args).perform_later(args)
Scraper::Metrics::JobBaseMetrics.observe_job_enqueued(
source_class: self.class,
enqueued_class: deferred_job.job_class,

View File

@@ -18,15 +18,16 @@ class Domain::Fa::PostEnqueuer
Enumerator.new do |e|
if reverse_scan_holes
while start_at > 0
if !Domain::Fa::Post.exists?(fa_id: start_at)
if !Domain::Post::FaPost.exists?(fa_id: start_at)
e << [nil, start_at, nil]
end
start_at -= 1
end
else
Domain::Fa::Post
Domain::Post::FaPost
.where("id >= ?", start_at)
.where("file_id is null")
.where
.missing(:file)
.where(state: "ok")
.find_each { |p| e << [p.id, p.fa_id, p.file_url_str] }
end

View File

@@ -31,7 +31,7 @@ class Scraper::FaHttpClientConfig < Scraper::HttpClientConfig
def ratelimit
# number represents minimum delay in seconds between requests to the same domain
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]]
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]]
end
def allowed_domains

View File

@@ -4,13 +4,13 @@
<meta property="og:url" content="<%= furecs_user_script_url %>">
<meta property="og:title" content="FurAffinity User Recommender - Find Similar Artists">
<meta property="og:description" content="Discover new artists on FurAffinity based on who you already follow. Uses collaborative filtering to find users with similar interests and content.">
<meta property="og:image" content="<%= image_url('furecs/furecs-screenshot.png') %>">
<meta property="og:image" content="<%= image_url('furecs/furecs-screenshot-2.png') %>">
<%# Twitter %>
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:url" content="<%= furecs_user_script_url %>">
<meta name="twitter:title" content="FurAffinity User Recommender - Find Similar Artists">
<meta name="twitter:description" content="Discover new artists on FurAffinity based on who you already follow. Uses collaborative filtering to find users with similar interests and content.">
<meta name="twitter:image" content="<%= image_url('furecs/furecs-screenshot.png') %>">
<meta name="twitter:image" content="<%= image_url('furecs/furecs-screenshot-2.png') %>">
<% end %>
<div class="mx-auto mt-2 max-w-2xl p-4">
<h1 class="mb-4 text-2xl font-bold">FurAffinity User Recommender</h1>
@@ -41,7 +41,7 @@
</p>
<div class="mt-6 mb-8">
<p class="mb-2 text-sm text-slate-600 italic">Screenshot of the Similar Users section added to a user profile page:</p>
<%= image_tag "furecs/furecs-screenshot.png", alt: "Screenshot showing the Similar Users section added by the script", class: "rounded-lg border border-slate-300 shadow-lg" %>
<%= image_tag "furecs/furecs-screenshot-2.png", alt: "Screenshot showing the Similar Users section added by the script", class: "rounded-lg border border-slate-300 shadow-lg" %>
</div>
<h2 class="mt-6 mb-2 text-xl font-semibold">How it Works</h2>
<p class="mb-4">

View File

@@ -14,7 +14,7 @@ RSpec.describe Domain::Fa::ApiController, type: :controller do
it "returns status of posts and users" do
post :object_statuses, params: { fa_ids: [123], url_names: ["test"] }
expect(response).to be_successful
expect(JSON.parse(response.body)).to include("posts", "users", "queues")
expect(JSON.parse(response.body)).to include("posts", "users")
end
end

View File

@@ -95,10 +95,10 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
html =
'<a href="https://www.furaffinity.net/view/123456/"><b>Bold</b> <script>bad</script> <span style="color: red; background: blue;">Text</span></a>'
sanitized = sanitize_description_html(html)
expect(sanitized).to eq_html(
'<b>Bold</b> <span style="color: red; ">Text</span>',
)
expect(sanitized).to include("<a")
expect(sanitized).to include("Bold bad Text")
expect(sanitized).to include("https://www.furaffinity.net/view/123456/")
expect(sanitized).to include("_blank")
end
describe "post title lookup" do
@@ -125,12 +125,15 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
)
end
it "removes the link when the post doesn't exist" do
it "links to the external post when the post doesn't exist" do
html =
'<a href="https://www.furaffinity.net/view/789/">Original Text</a>'
sanitized = sanitize_description_html(html)
expect(sanitized).to eq_html("Original Text")
expect(sanitized).to include("Original Text")
expect(sanitized).to include("<a")
expect(sanitized).to include("https://www.furaffinity.net/view/789/")
expect(sanitized).to include("_blank")
end
it "replaces nested elements when replacing titles" do
@@ -188,7 +191,7 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
}.each do |input, expected|
it "processes '#{input}' correctly" do
sanitized = sanitize_description_html(input)
expect(sanitized).to eq_html(expected)
expect(sanitized).to include(expected)
end
end
end
@@ -289,12 +292,17 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
)
end
it "Removes the link when the user doesn't exist" do
it "links to the external user when the user doesn't exist" do
html =
'<a href="https://www.furaffinity.net/user/nonexistent/">Original Text</a>'
sanitized = sanitize_description_html(html)
expect(sanitized).to eq_html("Original Text")
expect(sanitized).to include("Original Text")
expect(sanitized).to include("<a")
expect(sanitized).to include(
"https://www.furaffinity.net/user/nonexistent/",
)
expect(sanitized).to include("_blank")
end
it "replaces nested elements when replacing names" do

View File

@@ -109,7 +109,7 @@ RSpec.describe Scraper::FaHttpClientConfig do
it "returns the configured rate limits" do
config = described_class.new
expect(config.ratelimit).to eq(
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1]],
[["d.furaffinity.net", :none], ["*.facdn.net", :none], ["*", 1.5]],
)
end
end