handle buggy fa favcount, add tests, make follows associations more clear

This commit is contained in:
Dylan Knutson
2023-05-02 12:49:22 -07:00
parent 3f0d845472
commit 32fe41ff04
7 changed files with 962 additions and 32 deletions

View File

@@ -1,4 +1,4 @@
# Gather and record all the follows for a user
# Gather and record all the users that follow a user (not who this user follows)
# This will be used to create an index of follower -> followed
# of a specific user, for recommender training
class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
@@ -43,7 +43,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
"add #{to_add.size.to_s.bold} follows, " +
"remove #{to_remove.size.to_s.bold} follows"
}) do
existing_followed_ids = Set.new(@user.follows.pluck(:followed_id))
existing_followed_ids = Set.new(@user.follower_joins.pluck(:followed_id))
to_remove = existing_followed_ids - @scanned_followed_ids
to_add = @scanned_followed_ids - existing_followed_ids
end
@@ -52,8 +52,8 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
"updated follows list to #{@user.follows.count.to_s.bold} users"
}) do
ReduxApplicationRecord.transaction do
@user.follows.where(followed_id: to_remove).delete_all
@user.follows.insert_all!(to_add.map do |id|
@user.follower_joins.where(followed_id: to_remove).delete_all
@user.follower_joins.insert_all!(to_add.map do |id|
{ followed_id: id }
end) unless to_add.empty?
@user.scanned_follows_at = Time.now

View File

@@ -115,12 +115,19 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
}
end
case @page_version
when VERSION_0, VERSION_1
statistics.children[legacy_map[legacy_name] || raise].text.strip.to_i
when VERSION_2
statistics.css(".highlight")[redux_idx]&.next_sibling&.text&.strip&.to_i
else unimplemented_version!
value = case @page_version
when VERSION_0, VERSION_1
statistics.children[legacy_map[legacy_name] || raise].text.strip.to_i
when VERSION_2
statistics.css(".highlight")[redux_idx]&.next_sibling&.text&.strip&.to_i
else unimplemented_version!
end
# FA databases can be a little weird
if value >= (2 ** 32 - 1)
0
else
value
end
end

View File

@@ -27,20 +27,26 @@ class Domain::Fa::User < ReduxApplicationRecord
has_many :follower_joins,
class_name: "::Domain::Fa::Follow",
foreign_key: :follower_id,
inverse_of: :follower,
dependent: :destroy
has_many :followed_joins,
class_name: "::Domain::Fa::Follow",
foreign_key: :followed_id,
inverse_of: :followed,
dependent: :destroy
# Domain::Fa::User
has_many :followers,
through: :follower_joins
# who this user follows
has_many :follows,
through: :follower_joins,
source: :followed
# Domain::Fa::User
has_many :followeds,
through: :followed_joins
# who follows this user
has_many :followed_by,
through: :followed_joins,
source: :follower
validates_presence_of(:name, :url_name)
validate do

View File

@@ -18,8 +18,8 @@ describe Domain::Fa::Job::BrowsePageJob do
shared_context "create user and post" do
before do
creator = Domain::Fa::User.create!({
name: "Ruby_69r",
url_name: "ruby69r",
name: "Ruby_68r",
})
Domain::Fa::Post.create!({
fa_id: 51509268,

View File

@@ -167,8 +167,8 @@ describe Domain::Fa::Job::UserFollowsJob do
user.reload
expect(user.follows.length).to eq(FOLLOWS_ON_ZZREG_PAGE)
expect(user.follows.where(followed: smaz_user).first).to be_nil
expect(user.follows.where(followed: agi_type01_user).first).to eq(follow_2)
expect(user.follows).to_not include(smaz_user)
expect(user.follows).to include(agi_type01_user)
# correct user page jobs should be enqueued
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserPageJob).find do |job|

View File

@@ -2,11 +2,16 @@ require "rails_helper"
describe Domain::Fa::Job::UserPageJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before do
Scraper::ClientFactory.http_client_mock = http_client_mock
@log_entries = SpecUtil.init_http_client_mock(
http_client_mock, [
http_client_mock, client_mock_config
)
end
context "scanning a normal user" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/meesh/",
status_code: 200,
@@ -14,22 +19,44 @@ describe Domain::Fa::Job::UserPageJob do
contents: SpecUtil.read_fixture_file("domain/fa/job/user_page_meesh.html"),
},
]
)
end
it "succeeds" do
ret = described_class.perform_now({ url_name: "meesh" })
expect(ret).to_not be_a(Exception)
user = Domain::Fa::User.find_by(url_name: "meesh")
expect(user).to_not be_nil
expect(user.avatar.file_uri.to_s).to eq("https://a.furaffinity.net/1635789297/meesh.gif")
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserAvatarJob)).to match(
[
including(args: [{
user: user,
caused_by_entry: @log_entries[0],
}]),
]
)
end
end
it "succeeds" do
ret = described_class.perform_now({ url_name: "meesh" })
expect(ret).to_not be_a(Exception)
user = Domain::Fa::User.find_by(url_name: "meesh")
expect(user).to_not be_nil
expect(user.avatar.file_uri.to_s).to eq("https://a.furaffinity.net/1635789297/meesh.gif")
expect(SpecUtil.enqueued_jobs(Domain::Fa::Job::UserAvatarJob)).to match(
context "with a user with buggy favcount" do
let(:client_mock_config) do
[
including(args: [{
user: user,
caused_by_entry: @log_entries[0],
}]),
{
uri: "https://www.furaffinity.net/user/marsdust/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/user_page_marsdust.html"),
},
]
)
end
it "records the right fav count" do
ret = described_class.perform_now({ url_name: "marsdust" })
expect(ret).to_not be_a(Exception)
user = Domain::Fa::User.find_by(url_name: "marsdust")
expect(user).to_not be_nil
expect(user.avatar.file_uri.to_s).to eq("https://a.furaffinity.net/1424255659/marsdust.gif")
expect(user.num_favorites).to eq(0)
end
end
end

File diff suppressed because one or more lines are too long