stop scanning favs page if no new

This commit is contained in:
Dylan Knutson
2023-08-18 18:10:29 -07:00
parent 4ce8c197c3
commit b7a14b7c58
2 changed files with 47 additions and 13 deletions

View File

@@ -10,6 +10,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
def perform(args)
@first_job_entry = nil
init_from_args!(args, build_user: false)
@full_scan = !!args[:full_scan]
@user || begin
Domain::Fa::Job::UserPageJob.perform_later({
url_name: args[:url_name],
@@ -25,10 +26,22 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@page_id = nil
@total_items_seen = 0
@seen_post_ids = Set.new
existing_faved_ids = Set.new(@user.fav_post_joins.pluck(:post_id))
while true
break if scan_page == :break
# bail out at 100,000 users
if !@full_scan
new_favs = @last_page_post_ids - existing_faved_ids
if new_favs.empty?
@user.scanned_favs_at = Time.now
@user.save!
logger.info "no new favs, stopping favs scan early"
return
end
end
# bail out at 100,000 items
break if @page_number > 500
@page_number += 1
end
@@ -39,9 +52,8 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
"add #{to_add.size.to_s.bold} favs, " +
"remove #{to_remove.size.to_s.bold} favs"
}) do
existing_ids = Set.new(@user.fav_post_joins.pluck(:post_id))
to_remove = existing_ids - @seen_post_ids
to_add = @seen_post_ids - existing_ids
to_remove = existing_faved_ids - @seen_post_ids
to_add = @seen_post_ids - existing_faved_ids
end
measure(proc {
@@ -68,14 +80,6 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@user.save!
end
end
if @created_user
logger.info("user was new record, enqueue page scan job")
Domain::Fa::Job::UserPageJob.perform_later({
user: @user,
caused_by_entry: best_caused_by_entry,
})
end
end
private
@@ -105,7 +109,7 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
@total_items_seen += submissions.length
posts_to_create_hashes = []
followed_user_ids = measure(proc {
measure(proc {
"page #{@page_number.to_s.bold} - " +
"#{submissions.length.to_s.bold} posts on page, " +
"created #{posts_to_create_hashes.size.to_s.bold}"
@@ -135,11 +139,14 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
hash[:fa_id]
end)
@last_page_post_ids = Set.new
created_post_ids.each do |id|
@seen_post_ids.add(id)
@last_page_post_ids.add(id)
end
existing_fa_id_to_post_id.values.each do |id|
@seen_post_ids.add(id)
@last_page_post_ids.add(id)
end
end

View File

@@ -150,5 +150,32 @@ describe Domain::Fa::Job::FavsJob do
expect(p1.faved_by).to eq([user])
end
end
context "the user has previously been scanned" do
include_context "user exists"
before do
perform_now({ url_name: "zzreg" })
user.update_attribute(:scanned_favs_at, 100.days.ago)
end
it "stops scanning after the first page" do
SpecUtil.init_http_client_mock(
http_client_mock, client_mock_config[0...1]
)
perform_now({ url_name: "zzreg" })
user.reload
expect(user.fav_posts.count).to eq(5)
expect(user.scanned_favs_at).to be_within(1.second).of(Time.now)
end
it "scans all pages if full_scan is true" do
SpecUtil.init_http_client_mock(http_client_mock, client_mock_config)
perform_now({ url_name: "zzreg", full_scan: true })
user.reload
expect(user.fav_posts.count).to eq(5)
expect(user.scanned_favs_at).to be_within(1.second).of(Time.now)
end
end
end
end