fill gaps in fa browse page jobs
This commit is contained in:
@@ -40,11 +40,24 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
enqueue_posts_pri:,
|
||||
enqueue_page_scan: true,
|
||||
enqueue_gallery_scan: true,
|
||||
page_desc: nil
|
||||
page_desc: nil,
|
||||
fill_id_gaps: false
|
||||
)
|
||||
fatal_error("not a listings page") unless page.probably_listings_page?
|
||||
submissions = page.submissions_parsed
|
||||
|
||||
fa_ids_to_manually_enqueue = []
|
||||
if fill_id_gaps && submissions.any?
|
||||
fa_ids = submissions.map(&:id)
|
||||
max_fa_id, min_fa_id = fa_ids.max, fa_ids.min
|
||||
# sanity check so we don't enqueue too many post jobs
|
||||
if max_fa_id - min_fa_id <= 250
|
||||
(min_fa_id..max_fa_id).each do |fa_id|
|
||||
fa_ids_to_manually_enqueue << fa_id unless fa_ids.include?(fa_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
page_desc = if page_desc
|
||||
"page #{page_desc.to_s.bold}"
|
||||
else
|
||||
@@ -80,6 +93,15 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
end
|
||||
end
|
||||
|
||||
fa_ids_to_manually_enqueue.each do |fa_id|
|
||||
# when filling gaps, only enqueue if the post wasn't found
|
||||
post = Domain::Fa::Post.find_or_initialize_by(fa_id: fa_id)
|
||||
if post.new_record?
|
||||
post.save!
|
||||
enqueue_post_scan(post, caused_by_entry, enqueue_posts_pri)
|
||||
end
|
||||
end
|
||||
|
||||
logger.info "#{page_desc} has #{submissions.count.to_s.bold} posts, " +
|
||||
"#{listing_page_stats.new_seen.to_s.bold} new"
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
|
||||
:browse_page, page, log_entry,
|
||||
enqueue_posts_pri: :high,
|
||||
page_desc: "Browse@#{@page_number}",
|
||||
fill_id_gaps: true,
|
||||
)
|
||||
|
||||
@total_num_new_posts_seen += listing_page_stats.new_seen
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Gather and record all the users that follow a user (not who this user follows)
|
||||
# Gather and record all the users that this user follows
|
||||
# This will be used to create an index of follower -> followed
|
||||
# of a specific user, for recommender training
|
||||
class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
|
||||
@@ -90,7 +90,8 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
|
||||
it "enqueues one" do
|
||||
expect do
|
||||
described_class.perform_later({})
|
||||
ret = described_class.perform_later({})
|
||||
expect(ret).not_to be(Exception)
|
||||
end.to change { GoodJob::Job.count }.by(1)
|
||||
end
|
||||
|
||||
@@ -155,7 +156,8 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
|
||||
it "creates a new post" do
|
||||
expect {
|
||||
described_class.perform_now({})
|
||||
ret = described_class.perform_now({})
|
||||
expect(ret).not_to be(Exception)
|
||||
}.to change {
|
||||
Domain::Fa::Post.count
|
||||
}.by(1)
|
||||
@@ -165,7 +167,7 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
expect {
|
||||
described_class.perform_now({})
|
||||
}.to change {
|
||||
Domain::Fa::Post.count
|
||||
Domain::Fa::User.count
|
||||
}.by(1)
|
||||
end
|
||||
|
||||
@@ -182,6 +184,65 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
end
|
||||
end
|
||||
|
||||
context "with gaps between posts" do
|
||||
include_context "user and post getters"
|
||||
let! :log_entries do
|
||||
SpecUtil.init_http_client_mock(
|
||||
http_client_mock, [
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_two_submissions.html"),
|
||||
caused_by_entry_idx: nil,
|
||||
},
|
||||
{
|
||||
uri: "https://www.furaffinity.net/browse/2/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/browse_page_no_submissions.html"),
|
||||
caused_by_entry_idx: 0,
|
||||
},
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
it "enqueues scan post jobs" do
|
||||
expect(described_class.perform_now({})).to_not be(Exception)
|
||||
|
||||
post1 = Domain::Fa::Post.find_by(fa_id: 51509268)
|
||||
post2 = Domain::Fa::Post.find_by(fa_id: 51509267)
|
||||
post3 = Domain::Fa::Post.find_by(fa_id: 51509266)
|
||||
|
||||
expect(post1).to_not be_nil
|
||||
expect(post1.creator).to eq(user.call)
|
||||
|
||||
expect(post2).to_not be_nil
|
||||
expect(post2.creator).to be_nil
|
||||
expect(post2.title).to be_nil
|
||||
|
||||
expect(post3).to_not be_nil
|
||||
expect(post3.creator).to eq(user.call)
|
||||
|
||||
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::ScanPostJob)).to match(
|
||||
[
|
||||
including(args: [{
|
||||
post: post1,
|
||||
caused_by_entry: log_entries[0],
|
||||
}]),
|
||||
including(args: [{
|
||||
post: post3,
|
||||
caused_by_entry: log_entries[0],
|
||||
}]),
|
||||
including(args: [{
|
||||
post: post2,
|
||||
caused_by_entry: log_entries[0],
|
||||
}]),
|
||||
]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context "with one seen post" do
|
||||
include_context "user and post getters"
|
||||
include_context "create user and post"
|
||||
@@ -200,7 +261,9 @@ describe Domain::Fa::Job::BrowsePageJob do
|
||||
end
|
||||
|
||||
context "and nothing yet scanned" do
|
||||
before { described_class.perform_now({}) }
|
||||
before {
|
||||
expect(described_class.perform_now({})).to_not be(Exception)
|
||||
}
|
||||
include_examples "enqueue post scan", true
|
||||
include_examples "enqueue file scan", false
|
||||
include_examples "enqueue user page scan", true
|
||||
|
||||
1317
test/fixtures/files/domain/fa/job/browse_page_two_submissions.html
vendored
Normal file
1317
test/fixtures/files/domain/fa/job/browse_page_two_submissions.html
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user