Files
redux-scraper/spec/jobs/domain/fa/job/user_page_job_spec.rb
2025-06-17 05:56:11 +00:00

867 lines
26 KiB
Ruby

# typed: false
require "rails_helper"
describe Domain::Fa::Job::UserPageJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before do
Scraper::ClientFactory.http_client_mock = http_client_mock
@log_entries =
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
end
context "scanning a normal user" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/meesh/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file("domain/fa/job/user_page_meesh.html"),
},
]
end
let(:user) { Domain::User::FaUser.find_by(url_name: "meesh") }
it "records the right stats" do
perform_now({ url_name: "meesh" })
expect(user).to_not be_nil
expect(user.num_pageviews).to eq(3_061_083)
expect(user.num_submissions).to eq(1590)
expect(user.num_favorites).to eq(1_422_886)
expect(user.num_comments_recieved).to eq(47_931)
expect(user.num_comments_given).to eq(17_741)
expect(user.num_journals).to eq(5)
expect(user.account_status).to eq("active")
end
it "enqueues a favs job scan" do
perform_now({ url_name: "meesh" })
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::FavsJob)).to match(
[hash_including(user:, caused_by_entry: @log_entries[0])],
)
end
context "the user does not yet exist" do
it "the user is created" do
expect do perform_now({ url_name: "meesh" }) end.to change {
Domain::User::FaUser.find_by(url_name: "meesh")
}.from(nil).to(be_present)
end
it "enqueues a user avatar job" do
perform_now({ url_name: "meesh" })
expect(user).to_not be_nil
avatar = user.avatar
expect(avatar).to_not be_nil
expect(avatar.url_str).to eq(
"https://a.furaffinity.net/1635789297/meesh.gif",
)
expect(avatar.state).to eq("pending")
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserAvatarJob),
).to match([hash_including(avatar:, caused_by_entry: @log_entries[0])])
end
it "enqueues a gallery job" do
perform_now({ url_name: "meesh" })
expect(user).to_not be_nil
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
end
context "the user exists" do
let!(:user) { create(:domain_user_fa_user, url_name: "meesh") }
context "gallery scan was recently performed" do
before do
user.scanned_gallery_at = 1.day.ago
user.save!
end
it "does not enqueue a gallery job" do
perform_now({ url_name: "meesh" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to be_empty
end
end
context "the gallery scan was not recently performed" do
before do
user.scanned_gallery_at = 10.years.ago
user.save!
end
it "enqueues a gallery job" do
perform_now({ url_name: "meesh" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
end
end
end
context "all watched users fit in the recently watched section" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/llllvi/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_llllvi_few_watched_users.html",
),
},
]
end
let(:user) { Domain::User::FaUser.find_by(url_name: "llllvi") }
it "does not enqueue a follows job" do
perform_now({ url_name: "llllvi" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserFollowsJob),
).to be_empty
end
it "adds watched users to the user's followed_users" do
perform_now({ url_name: "llllvi" })
expect(user.followed_users.count).to eq(6)
expect(user.followed_users.map(&:url_name)).to match_array(
%w[
koul
artii
aquadragon35
incredibleediblecalico
nummynumz
fidchellvore
],
)
end
it "marks scanned_follows_at as recent" do
perform_now({ url_name: "llllvi" })
expect(user.scanned_follows_at).to be_within(3.seconds).of(Time.current)
end
it "does not add any users to followed_by_users" do
perform_now({ url_name: "llllvi" })
expect(user.followed_by_users.count).to eq(0)
end
it "works when the user already has some followed users" do
user = create(:domain_user_fa_user, url_name: "llllvi")
followed_user = create(:domain_user_fa_user, url_name: "koul")
user.followed_users << followed_user
perform_now({ url_name: "llllvi" })
expect(user.followed_users.count).to eq(6)
expect(user.followed_by_users.count).to eq(0)
end
end
context "the user has no submissions" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/sealingthedeal/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_sealingthedeal_no_submissions.html",
),
},
]
end
it "records the right number of submissions" do
perform_now({ url_name: "sealingthedeal" })
user = Domain::User::FaUser.find_by(url_name: "sealingthedeal")
expect(user).to_not be_nil
expect(user.num_submissions).to eq(0)
end
it "does not enqueue a gallery job" do
perform_now({ url_name: "sealingthedeal" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to be_empty
end
end
context "the user has a single scrap submission and few watchers" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/zzreg/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_zzreg_one_scrap_submission.html",
),
},
]
end
let(:user) { Domain::User::FaUser.find_by(url_name: "zzreg") }
it "records the right number of submissions" do
perform_now({ url_name: "zzreg" })
expect(user.num_submissions).to eq(1)
end
it "enqueues a gallery job" do
perform_now({ url_name: "zzreg" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
it "adds watchers to followed_by_users" do
perform_now({ url_name: "zzreg" })
expect(user.followed_by_users.count).to eq(5)
expect(user.followed_by_users.map(&:url_name)).to match_array(
%w[noneedtothankme karenpls azureparagon zarmir iginger],
)
end
it "works when the user already has some followed_by_users" do
user = create(:domain_user_fa_user, url_name: "zzreg")
followed_by_user =
create(:domain_user_fa_user, url_name: "noneedtothankme")
user.followed_by_users << followed_by_user
perform_now({ url_name: "zzreg" })
expect(user.followed_by_users.count).to eq(5)
expect(user.followed_users.count).to eq(0)
end
it "marks scanned_followed_by_at as recent" do
perform_now({ url_name: "zzreg" })
expect(user.scanned_followed_by_at).to be_within(3.seconds).of(
Time.current,
)
end
it "does not mark scanned_follows_at as recent" do
perform_now({ url_name: "zzreg" })
expect(user.scanned_follows_at).to be_nil
end
end
context "the user has more than threshold watched users and they are all known" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/koul/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
# this user page indicates:
# - 14 watched
# - 25 watchers
"domain/fa/user_page/user_page_koul_over_threshold_watchers.html",
),
},
]
end
let(:user) { create(:domain_user_fa_user, url_name: "koul") }
let(:recent_watched_user_url_names) do
%w[
spiritinchoco
syrrvya
knyazkolosok
waspsalad
moth-sprout
suzamuri
2d10
lemithecat
fr95
floki.midnight
dizzy.
kilobytefox
]
end
it "does not mark scanned_follows_at if no followed users are known" do
expect do
perform_now({ user: })
user.reload
end.not_to change { user.scanned_follows_at }
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserFollowsJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
it "does not mark scanned_follows_at if page indicates more followed than are recorded" do
# all in recent are marked as followed
recent_watched_user_url_names.each do |url_name|
user.followed_users << create(
:domain_user_fa_user,
url_name:,
name: url_name,
)
end
expect(user.followed_users.count).to eq(12)
# but the page indicates user watching 14
expect do
perform_now({ user: })
user.reload
end.not_to change { user.scanned_follows_at }
expect(user.followed_users.count).to eq(12)
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserFollowsJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
it "marks scanned_follows_at as recent if all followed users are known and number of followed matches page" do
recent_watched_user_url_names.each do |url_name|
user.followed_users << create(:domain_user_fa_user, url_name:)
end
user.followed_users << create(:domain_user_fa_user, url_name: "test1")
user.followed_users << create(:domain_user_fa_user, url_name: "test2")
expect(user.followed_users.count).to eq(14)
expect do
perform_now({ user: })
user.reload
end.to change { user.scanned_follows_at }.to be_within(3.seconds).of(
Time.current,
)
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserFollowsJob),
).to be_empty
end
end
context "the user is watched by more users than the threshold" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/koul/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_koul_over_threshold_watchers.html",
),
},
]
end
let(:watched_by_user_url_names) do
%w[
skifmutt
k92
erri49
spaghetti779
nonuri
mkyosh
mystpaww
retrorinx
aurorethedire
rasssss
coyotesolo
commissarisador
]
end
let(:user) { create(:domain_user_fa_user, url_name: "koul") }
it "does not mark scanned_followed_by_at as recent if over threshold" do
expect do
perform_now({ user: })
user.reload
end.not_to change { user.scanned_followed_by_at }
end
it "marks scanned_followed_by_at as recent if all watched by users are known and number of watched by matches page" do
watched_by_user_url_names.each do |url_name|
user.followed_by_users << create(:domain_user_fa_user, url_name:)
end
# 25 recorded, so make 25-12 more
(25 - 12).times do |idx|
user.followed_by_users << create(
:domain_user_fa_user,
url_name: "test#{idx}",
)
end
expect do
perform_now({ user: })
user.reload
end.to change { user.scanned_followed_by_at }.to be_within(3.seconds).of(
Time.current,
)
end
end
context "a user with recent gallery submissions" do
let(:user) { create(:domain_user_fa_user, url_name: "kutua") }
context "has one recent gallery submission, and user page indicates three total submissions" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/kutua/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
# one recent gallery submission, and three submissions indicated on user page
"domain/fa/user_page/user_page_kutua_one_recent_three_total_gallery.html",
),
},
]
end
it "updates the num_submissions count" do
expect do
perform_now({ user: })
user.reload
end.to change { user.num_submissions }.from(nil).to(3)
end
context "no submissions are yet known" do
it "creates the recent submission" do
perform_now({ user: })
expect(user.posts.count).to eq(1)
post = user.posts.first
expect(post.fa_id).to eq(60_073_062)
end
it "does not mark the gallery as scanned" do
expect do
perform_now({ user: })
user.reload
end.not_to change { user.scanned_gallery_at }
end
it "enqueues a ScanPostJob for the recent submission" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match(
[
hash_including(
post: user.posts.first,
caused_by_entry: @log_entries[0],
),
],
)
end
it "enqueues a UserGalleryJob for the user" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to match([hash_including(user:, caused_by_entry: @log_entries[0])])
end
end
context "one unlisted submission is known" do
let!(:unseen_post_1) do
create(
:domain_post_fa_post,
fa_id: 12_345,
creator: user,
title: "Not In The Gallery",
)
end
shared_examples "unlisted submission in state" do |state|
context "and is in the '#{state}' state" do
before do
unseen_post_1.state = state
unseen_post_1.save!
end
it "enqueues a ScanPostJob only for the new gallery submission" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match(
[
hash_including(
post: user.posts.find_by(fa_id: 60_073_062),
caused_by_entry: @log_entries[0],
),
],
)
end
it "enqueues a UserGalleryJob for the user" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserGalleryJob),
).to match(
[hash_including(user:, caused_by_entry: @log_entries[0])],
)
end
it "does not mark the gallery as scanned" do
perform_now({ user: })
user.reload
expect(user.scanned_gallery_at).to be_nil
end
end
end
# we do not know anything about the unseen submissions, so do
# not re-enqueue them - only enqueue those in recent gallery section
Domain::Post::FaPost.states.keys.each do |state|
include_examples "unlisted submission in state", state
end
end
context "all two unlisted submissions are known" do
let!(:unseen_post_1) do
create(
:domain_post_fa_post,
fa_id: 12_345,
creator: user,
title: "Not In The Gallery 1",
)
end
let!(:unseen_post_2) do
create(
:domain_post_fa_post,
fa_id: 12_346,
creator: user,
title: "Not In The Gallery 2",
)
end
it "enqueues a ScanPostJob for the recent submission" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match(
[hash_including(post: user.posts.find_by(fa_id: 60_073_062))],
)
end
it "marks the gallery as scanned" do
expect do
perform_now({ user: })
user.reload
end.to change { user.scanned_gallery_at }.to be_within(3.seconds).of(
Time.current,
)
end
end
end
context "has one recent gallery submission, and user page indicates one total submission" do
let(:client_mock_config) do
[
{
# one recent gallery submission, and one submission indicated on user page
uri: "https://www.furaffinity.net/user/kutua/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_kutua_one_recent_one_total_gallery.html",
),
},
]
end
context "the submission is not yet known" do
it "updates the num_submissions count" do
expect do
perform_now({ user: })
user.reload
end.to change { user.num_submissions }.from(nil).to(1)
end
it "creates the submission" do
expect do
perform_now({ user: })
user.reload
end.to change { user.posts.count }.from(0).to(1)
post = user.posts.first
# only know the creator and fa_id when seen from the user page
expect(post.fa_id).to eq(60_073_062)
expect(post.title).to be_nil
expect(post.description).to be_nil
expect(post.file).to be_nil
end
it "marks the gallery as scanned" do
perform_now({ user: })
expect(user.scanned_gallery_at).to be_within(3.seconds).of(
Time.current,
)
end
it "enqueues a ScanPostJob" do
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match(
[
hash_including(
post: user.posts.first,
caused_by_entry: @log_entries[0],
),
],
)
end
end
context "and recent gallery submissions are known" do
let!(:post) do
create(
:domain_post_fa_post,
fa_id: 60_073_062,
state: "ok",
creator: user,
)
end
shared_examples "force enqueues a ScanPostJob when post is in state" do |state|
it "force enqueues a ScanPostJob when a post is in the '#{state}' state" do
post.state = state
post.save!
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match([hash_including(post:, force_scan: true)])
end
end
shared_examples "force enqueues a ScanFileJob when post file is in state" do |state|
it "force enqueues a ScanPostJob when a post's file in the '#{state}' state" do
file = create(:domain_post_file, post:)
file.state = state
file.save!
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanPostJob),
).to match([hash_including(post:, force_scan: true)])
end
end
%w[removed scan_error file_error].each do |state|
include_examples(
"force enqueues a ScanPostJob when post is in state",
state,
)
end
%w[file_error retryable_error terminal_error removed].each do |state|
include_examples(
"force enqueues a ScanFileJob when post file is in state",
state,
)
end
it "force enqueues a ScanFileJob a post's file is in the 'pending' state and has a url" do
post.state_ok!
post.save!
file =
create(
:domain_post_file,
post:,
url_str: "https://example.com/file.png",
)
file.state_pending!
file.save!
perform_now({ user: })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::ScanFileJob),
).to match([hash_including(post_file: file)])
end
it "marks the gallery as scanned" do
perform_now({ user: })
expect(user.scanned_gallery_at).to be_within(3.seconds).of(
Time.current,
)
end
end
end
end
context "the user has no recent favories" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/angu/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_angu_no_recent_favorites.html",
),
},
]
end
let(:user) { Domain::User::FaUser.find_by(url_name: "angu") }
it "does not enqueue a favs job" do
perform_now({ url_name: "angu" })
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::FavsJob)).to be_empty
end
it "marks scanned_favs_at as recent" do
perform_now({ url_name: "angu" })
expect(user.scanned_favs_at).to be_within(3.seconds).of(Time.current)
end
end
context "all favorites fit in the recently faved section" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/lleaued/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_lleaued_few_recent_favorites.html",
),
},
]
end
let(:user) { Domain::User::FaUser.find_by(url_name: "lleaued") }
it "does not enqueue a favs job" do
perform_now({ url_name: "lleaued" })
expect(SpecUtil.enqueued_job_args(Domain::Fa::Job::FavsJob)).to be_empty
end
it "marks scanned_favs_at as recent" do
perform_now({ url_name: "lleaued" })
expect(user.scanned_favs_at).to be_within(3.seconds).of(Time.current)
end
it "adds posts to the user's favorites" do
perform_now({ url_name: "lleaued" })
expect(user.faved_posts.count).to eq(1)
expect(user.faved_posts.map(&:fa_id)).to eq([51_355_154])
end
it "works when the user already has some favorites" do
user = create(:domain_user_fa_user, url_name: "lleaued")
post = create(:domain_post_fa_post, fa_id: 51_355_154)
user.user_post_favs.create!(post_id: post.id)
perform_now({ url_name: "lleaued" })
expect(user.faved_posts.count).to eq(1)
expect(user.faved_posts.map(&:fa_id)).to eq([51_355_154])
end
end
context "with a user with buggy favcount" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/marsdust/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file("domain/fa/job/user_page_marsdust.html"),
},
]
end
it "records the right fav count" do
perform_now({ url_name: "marsdust" })
user = Domain::User::FaUser.find_by(url_name: "marsdust")
expect(user).to_not be_nil
expect(user.avatar.url_str).to eq(
"https://a.furaffinity.net/1424255659/marsdust.gif",
)
expect(user.num_favorites).to eq(0)
end
end
context "user with page that links to unseen users" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/angelpawqt/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_angelpawqt.html",
),
},
]
end
it "enqueues jobs for the unseen users" do
perform_now({ url_name: "angelpawqt", skip_enqueue_found_links: false })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
).to include(
hash_including(
user: Domain::User::FaUser.find_by(url_name: "8bitstarshon1"),
),
)
end
end
shared_examples "user not found" do |status_code|
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/onefatpokemon/",
status_code:,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_onefatpokemon_not_found.html",
),
},
]
end
it "does not enqueue a user page job" do
perform_now({ url_name: "onefatpokemon" })
expect(
SpecUtil.enqueued_job_args(Domain::Fa::Job::UserPageJob),
).to be_empty
end
it "marks the user as error" do
perform_now({ url_name: "onefatpokemon" })
user = Domain::User::FaUser.find_by(url_name: "onefatpokemon")
expect(user).to_not be_nil
expect(user.state).to eq("error")
end
end
context "user not found with 200 status code" do
include_examples "user not found", 200
end
context "user not found with 400 status code" do
include_examples "user not found", 400
end
end