user page job improvement for skipping gallery scans
This commit is contained in:
1
TODO.md
1
TODO.md
@@ -35,3 +35,4 @@
|
||||
- [ ] tumblr domain icon
|
||||
- [ ] Do PCA on user factors table to display a 2D plot of users
|
||||
- [ ] Use links found in descriptions to indicate re-scanning a post? (e.g. for comic next/prev links)
|
||||
- [ ] fix for IDs that have a dot in them - e.g. https://refurrer.com/users/fa@jakke.
|
||||
|
||||
@@ -11,6 +11,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
logger.info("scanning post")
|
||||
|
||||
if force_scan? || (post.state_ok? && !post.scanned_at.present?) ||
|
||||
# check for !state_ok? on the file maybe?
|
||||
post.file&.state_file_error?
|
||||
ReduxApplicationRecord.transaction { scan_post(post) }
|
||||
end
|
||||
@@ -90,6 +91,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
# save before any changes so post has an id for any files
|
||||
post.state_ok!
|
||||
post.save!
|
||||
|
||||
post.title = submission.title
|
||||
|
||||
@@ -28,7 +28,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
if user.state_ok? && user_page
|
||||
check_skip_gallery_scan(user)
|
||||
check_skip_gallery_scan(user, user_page)
|
||||
check_skip_favs_scan(user, user_page)
|
||||
check_skip_followed_users_scan(user, user_page)
|
||||
check_skip_followed_by_users_scan(user, user_page)
|
||||
@@ -46,13 +46,87 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
|
||||
private
|
||||
|
||||
sig { params(user: Domain::User::FaUser).void }
|
||||
def check_skip_gallery_scan(user)
|
||||
sig do
|
||||
params(
|
||||
user: Domain::User::FaUser,
|
||||
user_page: Domain::Fa::Parser::UserPageHelper,
|
||||
).void
|
||||
end
|
||||
def check_skip_gallery_scan(user, user_page)
|
||||
# if the user has no submissions, we don't need to scan their gallery
|
||||
if user.num_submissions == 0
|
||||
user_page_num_submissions = user_page.num_submissions
|
||||
if user_page_num_submissions.nil?
|
||||
logger.warn(format_tags("user page num_submissions is nil"))
|
||||
return
|
||||
end
|
||||
|
||||
if user_page_num_submissions == 0
|
||||
logger.info(format_tags("skipping gallery scan, 0 submissions"))
|
||||
user.scanned_gallery_at = Time.current
|
||||
end
|
||||
|
||||
# create any submissions on the recent gallery page that are not yet known
|
||||
recent_gallery_fa_ids = user_page.recent_gallery_fa_ids
|
||||
known_posts = user.posts.where(fa_id: recent_gallery_fa_ids).to_a
|
||||
unknown_recent_gallery_fa_ids =
|
||||
recent_gallery_fa_ids - known_posts.map(&:fa_id)
|
||||
known_recent_gallery_posts =
|
||||
known_posts.filter { |post| recent_gallery_fa_ids.include?(post.fa_id) }
|
||||
|
||||
if unknown_recent_gallery_fa_ids.any?
|
||||
unknown_recent_gallery_fa_ids.each do |fa_id|
|
||||
post = Domain::Post::FaPost.find_or_initialize_by(fa_id:)
|
||||
post.creator = user
|
||||
post.enqueue_job_after_save(
|
||||
Domain::Fa::Job::ScanPostJob,
|
||||
{ post:, caused_by_entry: causing_log_entry },
|
||||
)
|
||||
post.save!
|
||||
end
|
||||
end
|
||||
|
||||
# recheck any known posts to see if they should be rescanned
|
||||
known_recent_gallery_posts.each do |post|
|
||||
post_file = post.file
|
||||
if !post.state_ok? || post_file.blank? || !post_file.state_ok?
|
||||
# we just saw it, so it must be ok to enqueue
|
||||
logger.tagged(make_arg_tag(post)) do
|
||||
logger.info(
|
||||
"post is in recent gallery section, force enqueue ScanPostJob",
|
||||
)
|
||||
end
|
||||
post.state_ok!
|
||||
post.save!
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, { post:, force_scan: true })
|
||||
end
|
||||
|
||||
if post_file && post_file.url_str.present? &&
|
||||
(
|
||||
post_file.state_pending? || post_file.log_entry.blank? ||
|
||||
!post_file.state_terminal_error?
|
||||
)
|
||||
logger.tagged(make_arg_tag(post_file)) do
|
||||
logger.info(
|
||||
"post is in recent gallery section, force enqueue ScanFileJob",
|
||||
)
|
||||
end
|
||||
defer_job(Domain::Fa::Job::ScanFileJob, { post_file: })
|
||||
end
|
||||
end
|
||||
|
||||
# if we know at least as many submissions as are indicated on the user page,
|
||||
# then we can skip the gallery scan
|
||||
if user_page_num_submissions <= user.posts.count
|
||||
logger.info(
|
||||
format_tags(
|
||||
make_tag("user_page_submissions", user_page_num_submissions),
|
||||
make_tag("known_submissions", user.posts.count),
|
||||
"skipping gallery scan, all submissions known",
|
||||
),
|
||||
)
|
||||
|
||||
user.scanned_gallery_at = Time.current
|
||||
end
|
||||
end
|
||||
|
||||
sig do
|
||||
|
||||
@@ -23,6 +23,7 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
@num_journals = T.let(nil, T.nilable(Integer))
|
||||
@num_favorites = T.let(nil, T.nilable(Integer))
|
||||
@recent_favs = T.let(nil, T.nilable(T::Array[Integer]))
|
||||
@recent_gallery_fa_ids = T.let(nil, T.nilable(T::Array[Integer]))
|
||||
@recent_watchers = T.let(nil, T.nilable(T::Array[RecentUser]))
|
||||
@recent_watching = T.let(nil, T.nilable(T::Array[RecentUser]))
|
||||
@statistics = T.let(nil, T.nilable(Nokogiri::XML::Element))
|
||||
@@ -232,6 +233,25 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
end
|
||||
|
||||
sig { returns(T::Array[Integer]) }
|
||||
def recent_gallery_fa_ids
|
||||
@recent_gallery_fa_ids ||=
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@elem
|
||||
.css("#gallery-latest-submissions")
|
||||
.first
|
||||
&.css("figure a")
|
||||
&.map do |elem|
|
||||
href = elem["href"]
|
||||
%r{/view/(\d+)}.match(href)&.[](1)&.to_i ||
|
||||
raise("invalid url: #{href}")
|
||||
end || []
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
class JSONSubmissionData < T::ImmutableStruct
|
||||
include T::Struct::ActsAsComparable
|
||||
|
||||
|
||||
@@ -7,13 +7,7 @@ class ApplicationPolicy
|
||||
sig { returns(T.nilable(User)) }
|
||||
attr_reader :user
|
||||
|
||||
sig do
|
||||
returns(
|
||||
T.nilable(
|
||||
T.any(ReduxApplicationRecord, T.class_of(ReduxApplicationRecord)),
|
||||
),
|
||||
)
|
||||
end
|
||||
sig { returns(T.untyped) }
|
||||
attr_reader :record
|
||||
|
||||
sig { returns(String) }
|
||||
@@ -33,15 +27,7 @@ class ApplicationPolicy
|
||||
"#{self.class.name}::#{Digest::SHA256.hexdigest(method_values_string)[0..16]}"
|
||||
end
|
||||
|
||||
sig do
|
||||
params(
|
||||
user: T.nilable(User),
|
||||
record:
|
||||
T.nilable(
|
||||
T.any(ReduxApplicationRecord, T.class_of(ReduxApplicationRecord)),
|
||||
),
|
||||
).void
|
||||
end
|
||||
sig { params(user: T.nilable(User), record: T.untyped).void }
|
||||
def initialize(user, record)
|
||||
@user = user
|
||||
@record = record
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
<span class="badge bg-primary">
|
||||
<i class="fa-solid fa-file me-1"></i><%= post_file.class.name %> #<%= post_file.id %>
|
||||
</span>
|
||||
<%= link_to domain_post_path(post),
|
||||
<%= link_to Rails.application.routes.url_helpers.domain_post_path(post),
|
||||
class: "badge bg-primary",
|
||||
target: "_blank" do %>
|
||||
<i class="fa-solid fa-image me-1"></i><%= post.class.name %> #<%= post.id %>
|
||||
|
||||
@@ -410,6 +410,299 @@ describe Domain::Fa::Job::UserPageJob do
|
||||
end
|
||||
end
|
||||
|
||||
context "a user with recent gallery submissions" do
|
||||
let(:user) { create(:domain_user_fa_user, url_name: "kutua") }
|
||||
|
||||
context "has one recent gallery submission, and user page indicates three total submissions" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
{
|
||||
uri: "https://www.furaffinity.net/user/kutua/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents:
|
||||
SpecUtil.read_fixture_file(
|
||||
# one recent gallery submission, and three submissions indicated on user page
|
||||
"domain/fa/user_page/user_page_kutua_one_recent_three_total_gallery.html",
|
||||
),
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
it "updates the num_submissions count" do
|
||||
expect do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
end.to change { user.num_submissions }.from(nil).to(3)
|
||||
end
|
||||
|
||||
context "no submissions are yet known" do
|
||||
it "creates the recent submission" do
|
||||
perform_now({ user: })
|
||||
expect(user.posts.count).to eq(1)
|
||||
post = user.posts.first
|
||||
expect(post.fa_id).to eq(60_073_062)
|
||||
end
|
||||
|
||||
it "does not mark the gallery as scanned" do
|
||||
expect do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
end.not_to change { user.scanned_gallery_at }
|
||||
end
|
||||
|
||||
it "enqueues a ScanPostJob for the recent submission" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match(
|
||||
[
|
||||
hash_including(
|
||||
post: user.posts.first,
|
||||
caused_by_entry: @log_entries[0],
|
||||
),
|
||||
],
|
||||
)
|
||||
end
|
||||
|
||||
it "enqueues a UserGalleryJob for the user" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
|
||||
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
|
||||
end
|
||||
end
|
||||
|
||||
context "one unlisted submission is known" do
|
||||
let!(:unseen_post_1) do
|
||||
create(
|
||||
:domain_post_fa_post,
|
||||
fa_id: 12_345,
|
||||
creator: user,
|
||||
title: "Not In The Gallery",
|
||||
)
|
||||
end
|
||||
|
||||
shared_examples "unlisted submission in state" do |state|
|
||||
context "and is in the '#{state}' state" do
|
||||
before do
|
||||
unseen_post_1.state = state
|
||||
unseen_post_1.save!
|
||||
end
|
||||
|
||||
it "enqueues a ScanPostJob only for the new gallery submission" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match(
|
||||
[
|
||||
hash_including(
|
||||
post: user.posts.find_by(fa_id: 60_073_062),
|
||||
caused_by_entry: @log_entries[0],
|
||||
),
|
||||
],
|
||||
)
|
||||
end
|
||||
|
||||
it "enqueues a UserGalleryJob for the user" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
|
||||
).to match(
|
||||
[hash_including(user:, caused_by_entry: @log_entries[0])],
|
||||
)
|
||||
end
|
||||
|
||||
it "does not mark the gallery as scanned" do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
expect(user.scanned_gallery_at).to be_nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# we do not know anything about the unseen submissions, so do
|
||||
# not re-enqueue them - only enqueue those in recent gallery section
|
||||
Domain::Post::FaPost.states.keys.each do |state|
|
||||
include_examples "unlisted submission in state", state
|
||||
end
|
||||
end
|
||||
|
||||
context "all two unlisted submissions are known" do
|
||||
let!(:unseen_post_1) do
|
||||
create(
|
||||
:domain_post_fa_post,
|
||||
fa_id: 12_345,
|
||||
creator: user,
|
||||
title: "Not In The Gallery 1",
|
||||
)
|
||||
end
|
||||
|
||||
let!(:unseen_post_2) do
|
||||
create(
|
||||
:domain_post_fa_post,
|
||||
fa_id: 12_346,
|
||||
creator: user,
|
||||
title: "Not In The Gallery 2",
|
||||
)
|
||||
end
|
||||
|
||||
it "enqueues a ScanPostJob for the recent submission" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match(
|
||||
[hash_including(post: user.posts.find_by(fa_id: 60_073_062))],
|
||||
)
|
||||
end
|
||||
|
||||
it "marks the gallery as scanned" do
|
||||
expect do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
end.to change { user.scanned_gallery_at }.to be_within(3.seconds).of(
|
||||
Time.current,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "has one recent gallery submission, and user page indicates one total submission" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
{
|
||||
# one recent gallery submission, and one submission indicated on user page
|
||||
uri: "https://www.furaffinity.net/user/kutua/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents:
|
||||
SpecUtil.read_fixture_file(
|
||||
"domain/fa/user_page/user_page_kutua_one_recent_one_total_gallery.html",
|
||||
),
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
context "the submission is not yet known" do
|
||||
it "updates the num_submissions count" do
|
||||
expect do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
end.to change { user.num_submissions }.from(nil).to(1)
|
||||
end
|
||||
|
||||
it "creates the submission" do
|
||||
expect do
|
||||
perform_now({ user: })
|
||||
user.reload
|
||||
end.to change { user.posts.count }.from(0).to(1)
|
||||
post = user.posts.first
|
||||
# only know the creator and fa_id when seen from the user page
|
||||
expect(post.fa_id).to eq(60_073_062)
|
||||
expect(post.title).to be_nil
|
||||
expect(post.description).to be_nil
|
||||
expect(post.file).to be_nil
|
||||
end
|
||||
|
||||
it "marks the gallery as scanned" do
|
||||
perform_now({ user: })
|
||||
expect(user.scanned_gallery_at).to be_within(3.seconds).of(
|
||||
Time.current,
|
||||
)
|
||||
end
|
||||
|
||||
it "enqueues a ScanPostJob" do
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match(
|
||||
[
|
||||
hash_including(
|
||||
post: user.posts.first,
|
||||
caused_by_entry: @log_entries[0],
|
||||
),
|
||||
],
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context "and recent gallery submissions are known" do
|
||||
let!(:post) do
|
||||
create(
|
||||
:domain_post_fa_post,
|
||||
fa_id: 60_073_062,
|
||||
state: "ok",
|
||||
creator: user,
|
||||
)
|
||||
end
|
||||
|
||||
shared_examples "force enqueues a ScanPostJob when post is in state" do |state|
|
||||
it "force enqueues a ScanPostJob when a post is in the '#{state}' state" do
|
||||
post.state = state
|
||||
post.save!
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match([hash_including(post:, force_scan: true)])
|
||||
end
|
||||
end
|
||||
|
||||
shared_examples "force enqueues a ScanFileJob when post file is in state" do |state|
|
||||
it "force enqueues a ScanPostJob when a post's file in the '#{state}' state" do
|
||||
file = create(:domain_post_file, post:)
|
||||
file.state = state
|
||||
file.save!
|
||||
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
|
||||
).to match([hash_including(post:, force_scan: true)])
|
||||
end
|
||||
end
|
||||
|
||||
%w[removed scan_error file_error].each do |state|
|
||||
include_examples(
|
||||
"force enqueues a ScanPostJob when post is in state",
|
||||
state,
|
||||
)
|
||||
end
|
||||
|
||||
%w[file_error retryable_error terminal_error removed].each do |state|
|
||||
include_examples(
|
||||
"force enqueues a ScanFileJob when post file is in state",
|
||||
state,
|
||||
)
|
||||
end
|
||||
|
||||
it "force enqueues a ScanFileJob a post's file is in the 'pending' state and has a url" do
|
||||
post.state_ok!
|
||||
post.save!
|
||||
|
||||
file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
post:,
|
||||
url_str: "https://example.com/file.png",
|
||||
)
|
||||
file.state_pending!
|
||||
file.save!
|
||||
|
||||
perform_now({ user: })
|
||||
expect(
|
||||
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanFileJob),
|
||||
).to match([hash_including(post_file: file)])
|
||||
end
|
||||
|
||||
it "marks the gallery as scanned" do
|
||||
perform_now({ user: })
|
||||
expect(user.scanned_gallery_at).to be_within(3.seconds).of(
|
||||
Time.current,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "the user has no recent favories" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
|
||||
@@ -513,6 +513,32 @@ describe Domain::Fa::Parser::Page do
|
||||
assert_equal 14, up.num_watching
|
||||
end
|
||||
|
||||
context "recent gallery submission parsing" do
|
||||
it "works when the user has recent gallery submissions" do
|
||||
parser =
|
||||
get_parser_at(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/domain/fa/user_page/user_page_kutua_one_recent_one_total_gallery.html",
|
||||
),
|
||||
)
|
||||
assert_page_type parser, :probably_user_page?
|
||||
up = parser.user_page
|
||||
expect(up.recent_gallery_fa_ids).to eq([60_073_062])
|
||||
end
|
||||
|
||||
it "works when the user has no submissions" do
|
||||
parser =
|
||||
get_parser_at(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/domain/fa/user_page/user_page_angelpawqt.html",
|
||||
),
|
||||
)
|
||||
assert_page_type parser, :probably_user_page?
|
||||
up = parser.user_page
|
||||
expect(up.recent_gallery_fa_ids).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
def get_parser(file, require_logged_in: true)
|
||||
path = File.join("domain/fa/parser/redux", file)
|
||||
get_parser_at(path, require_logged_in:)
|
||||
|
||||
913
test/fixtures/files/domain/fa/user_page/user_page_kutua_one_recent_one_total_gallery.html
vendored
Normal file
913
test/fixtures/files/domain/fa/user_page/user_page_kutua_one_recent_one_total_gallery.html
vendored
Normal file
File diff suppressed because one or more lines are too long
913
test/fixtures/files/domain/fa/user_page/user_page_kutua_one_recent_three_total_gallery.html
vendored
Normal file
913
test/fixtures/files/domain/fa/user_page/user_page_kutua_one_recent_three_total_gallery.html
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user