user avatar job
This commit is contained in:
43
app/jobs/domain/fa/job/user_avatar_job.rb
Normal file
43
app/jobs/domain/fa/job/user_avatar_job.rb
Normal file
@@ -0,0 +1,43 @@
|
||||
class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
queue_as :fa_user_avatar
|
||||
ignore_signature_args :caused_by_entry
|
||||
|
||||
def perform(args)
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
@user = args[:user] || raise("user arg is required")
|
||||
@force_scan = !!args[:force_scan]
|
||||
logger.prefix = "[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
|
||||
@avatar = @user.avatar_or_create
|
||||
|
||||
if @avatar.file_sha256 && !@force_scan
|
||||
logger.warn("downloaded #{time_ago_in_words(@avatar.downloaded_file_at)}, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
unless @avatar.file_uri
|
||||
# try to find a corresponding log entry
|
||||
log_entry = @avatar.guess_user_page_log_entry || raise("no user page log entry found")
|
||||
@caused_by_entry ||= log_entry
|
||||
parser = Domain::Fa::Parser::Page.new(log_entry.response.contents, require_logged_in: false)
|
||||
@avatar.file_uri = parser.user_page.profile_thumb_url
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
@avatar.file_uri.to_s,
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
@avatar.log_entry = response.log_entry
|
||||
|
||||
if response.status_code != 200
|
||||
@avatar.state = :download_error
|
||||
@avatar.state_detail["download_error"] = "http status #{response.status_code}"
|
||||
fatal_error("http #{response.status_code}, log entry #{response.log_entry.id}")
|
||||
end
|
||||
|
||||
@avatar.downloaded_file_at = response.log_entry.created_at
|
||||
@avatar.file = response.log_entry.response
|
||||
@avatar
|
||||
ensure
|
||||
@avatar.save! if @avatar
|
||||
end
|
||||
end
|
||||
@@ -13,6 +13,10 @@ class Domain::Fa::User < ReduxApplicationRecord
|
||||
inverse_of: :user,
|
||||
foreign_key: :user_id
|
||||
|
||||
has_one :avatar,
|
||||
class_name: "::Domain::Fa::UserAvatar",
|
||||
inverse_of: :user
|
||||
|
||||
enum :state, [
|
||||
:ok, # so far so good, user may not yet be scanned
|
||||
:scan_error, # user has been removed or otherwise, see state_detail
|
||||
@@ -32,6 +36,10 @@ class Domain::Fa::User < ReduxApplicationRecord
|
||||
self.log_entry_detail ||= {}
|
||||
end
|
||||
|
||||
def avatar_or_create
|
||||
avatar || create_avatar!
|
||||
end
|
||||
|
||||
def due_for_page_scan?
|
||||
scanned_page_at.nil? || scanned_page_at < 1.month.ago
|
||||
end
|
||||
|
||||
60
app/models/domain/fa/user_avatar.rb
Normal file
60
app/models/domain/fa/user_avatar.rb
Normal file
@@ -0,0 +1,60 @@
|
||||
class Domain::Fa::UserAvatar < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_user_avatars"
|
||||
has_lite_trail(
|
||||
schema_version: 1,
|
||||
separate_versions_table: true,
|
||||
map_attribute: {
|
||||
file_sha256: ::Sha256AttributeMapper,
|
||||
},
|
||||
)
|
||||
|
||||
enum :state, [:ok, :download_error]
|
||||
after_initialize do
|
||||
self.state ||= :ok
|
||||
self.state_detail ||= {}
|
||||
end
|
||||
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
belongs_to :file, foreign_key: :file_sha256, class_name: "::BlobEntry", optional: true
|
||||
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
|
||||
|
||||
before_validation do
|
||||
file_uri = Addressable::URI.parse(file_url_str)
|
||||
end
|
||||
|
||||
def file_uri
|
||||
Addressable::URI.parse(file_url_str) unless file_url_str.blank?
|
||||
end
|
||||
|
||||
def file_uri=(uri)
|
||||
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
|
||||
uri.scheme = "https" if uri.scheme.blank?
|
||||
self.file_url_str = uri.to_s
|
||||
end
|
||||
|
||||
def guess_user_page_log_entry
|
||||
for_path = proc { |uri_path|
|
||||
HttpLogEntry.where(
|
||||
uri_scheme: "https",
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: uri_path,
|
||||
).order(created_at: :desc).first
|
||||
}
|
||||
|
||||
for_hle_id = proc { |hle_id|
|
||||
hle_id && HttpLogEntry.find_by(id: hle_id)
|
||||
}
|
||||
|
||||
# older versions don't end in a trailing slash
|
||||
hle_id = user.log_entry_detail && user.log_entry_detail["last_user_page_id"]
|
||||
|
||||
# first try the last scanned user page (present on most fa user models)
|
||||
for_hle_id.call(hle_id) ||
|
||||
# if that's missing, see if there's an existing request logged to the user page
|
||||
for_path.call("/user/#{user.url_name}/") ||
|
||||
# and try the non-trailing-slash version as well
|
||||
for_path.call("/user/#{user.url_name}")
|
||||
|
||||
# TODO - maybe can look for posts as well, those might list an avatar
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,22 @@
|
||||
class CreateDomainFaUserAvatars < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_fa_user_avatars do |t|
|
||||
t.references :user, null: false, index: { unique: true }
|
||||
t.integer :state, null: false
|
||||
t.jsonb :state_detail
|
||||
# url of the file
|
||||
t.string :file_url_str
|
||||
t.references :log_entry, index: false
|
||||
# direct access to log_entry.file if request succeeded
|
||||
t.binary :file_sha256
|
||||
t.datetime :downloaded_file_at
|
||||
t.timestamps
|
||||
end
|
||||
|
||||
add_foreign_key :domain_fa_user_avatars, :domain_fa_users, column: :user_id, primary_key: :id
|
||||
add_foreign_key :domain_fa_user_avatars, :http_log_entries, column: :log_entry_id, primary_key: :id
|
||||
add_foreign_key :domain_fa_user_avatars, :blob_entries, column: :file_sha256, primary_key: :sha256
|
||||
|
||||
create_versions_table :domain_fa_user_avatars
|
||||
end
|
||||
end
|
||||
28
db/schema.rb
generated
28
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_04_07_162751) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
@@ -137,6 +137,28 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
|
||||
t.index ["file_id"], name: "index_domain_fa_posts_on_file_id"
|
||||
end
|
||||
|
||||
create_table "domain_fa_user_avatar_versions", force: :cascade do |t|
|
||||
t.bigint "item_id"
|
||||
t.integer "schema_version"
|
||||
t.string "event", null: false
|
||||
t.jsonb "diff"
|
||||
t.datetime "created_at", null: false
|
||||
t.index ["item_id"], name: "index_domain_fa_user_avatar_versions_on_item_id"
|
||||
end
|
||||
|
||||
create_table "domain_fa_user_avatars", force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.integer "state", null: false
|
||||
t.jsonb "state_detail"
|
||||
t.string "file_url_str"
|
||||
t.bigint "log_entry_id"
|
||||
t.binary "file_sha256"
|
||||
t.datetime "downloaded_file_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["user_id"], name: "index_domain_fa_user_avatars_on_user_id", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_fa_user_factors", force: :cascade do |t|
|
||||
t.bigint "user_id", null: false
|
||||
t.vector "for_follower", limit: 32
|
||||
@@ -353,6 +375,10 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
|
||||
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "follower_id"
|
||||
add_foreign_key "domain_fa_posts", "domain_fa_users", column: "creator_id"
|
||||
add_foreign_key "domain_fa_posts", "http_log_entries", column: "file_id"
|
||||
add_foreign_key "domain_fa_user_avatar_versions", "domain_fa_user_avatars", column: "item_id"
|
||||
add_foreign_key "domain_fa_user_avatars", "blob_entries", column: "file_sha256", primary_key: "sha256"
|
||||
add_foreign_key "domain_fa_user_avatars", "domain_fa_users", column: "user_id"
|
||||
add_foreign_key "domain_fa_user_avatars", "http_log_entries", column: "log_entry_id"
|
||||
add_foreign_key "domain_fa_user_factors", "domain_fa_users", column: "user_id"
|
||||
add_foreign_key "domain_twitter_medias", "domain_twitter_tweets", column: "tweet_id"
|
||||
add_foreign_key "domain_twitter_medias", "http_log_entries", column: "file_id"
|
||||
|
||||
71
spec/jobs/domain/fa/job/user_avatar_job_spec.rb
Normal file
71
spec/jobs/domain/fa/job/user_avatar_job_spec.rb
Normal file
@@ -0,0 +1,71 @@
|
||||
require "rails_helper"
|
||||
describe Domain::Fa::Job::UserAvatarJob do
|
||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
||||
before do
|
||||
Scraper::ClientFactory.http_client_mock = http_client_mock
|
||||
end
|
||||
|
||||
shared_context "create meesh user" do
|
||||
let(:user) { Domain::Fa::User.find_by(url_name: "meesh") || raise }
|
||||
before do
|
||||
Domain::Fa::User.create!({
|
||||
url_name: "meesh",
|
||||
name: "Meesh",
|
||||
})
|
||||
end
|
||||
end
|
||||
|
||||
shared_context "meesh avatar file mock" do
|
||||
before do
|
||||
@log_entries = SpecUtil.init_http_client_mock(
|
||||
http_client_mock, [
|
||||
{
|
||||
uri: "https://a.furaffinity.net/1635789297/meesh.gif",
|
||||
status_code: 200,
|
||||
content_type: "image/gif",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/meesh_avatar_file.gif", mode: "rb"),
|
||||
caused_by_entry: meesh_user_page_log_entry,
|
||||
},
|
||||
]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context "when the avatar model does not yet exist" do
|
||||
include_context "create meesh user"
|
||||
context "the user model has a last_user_page_id" do
|
||||
let(:meesh_user_page_log_entry) do
|
||||
SpecUtil.create_http_log_entry(
|
||||
uri: "https://www.furaffinity.net/user/meesh/",
|
||||
status_code: 200,
|
||||
content_type: "text/html",
|
||||
contents: SpecUtil.read_fixture_file("domain/fa/job/user_page_meesh.html"),
|
||||
)
|
||||
end
|
||||
include_context "meesh avatar file mock"
|
||||
|
||||
before do
|
||||
user.log_entry_detail["last_user_page_id"] = meesh_user_page_log_entry.id
|
||||
user.save!
|
||||
end
|
||||
|
||||
it "succeeds" do
|
||||
ret = described_class.perform_now({ user: user })
|
||||
expect(ret).to_not be_a(Exception)
|
||||
|
||||
user.reload
|
||||
avatar = user.avatar
|
||||
expect(avatar).not_to be_nil
|
||||
expect(avatar.log_entry).to eq(@log_entries[0])
|
||||
expect(HexUtil.bin2hex avatar.file_sha256).to eq("ebbafc07555df0a0656a9b32ec9b95723c62c5246937dc8434924d9241d1b570")
|
||||
expect(avatar.downloaded_file_at).to be_within(1.seconds).of(Time.now)
|
||||
|
||||
ret = described_class.perform_now({ user: user })
|
||||
expect(ret).to_not be_a(Exception)
|
||||
user.reload
|
||||
avatar2 = user.avatar
|
||||
expect(avatar).to eq(avatar2)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -62,30 +62,30 @@ describe Domain::Fa::PostEnqueuer do
|
||||
).to_not be_nil
|
||||
|
||||
enqueuer.run_once
|
||||
expect(enqueued_fa_ids.call).to eq(post_fa_ids[1...5])
|
||||
expect(enqueued_fa_jobs.call).to eq(
|
||||
[Domain::Fa::Job::ScanPostJob] * 3 +
|
||||
[Domain::Fa::Job::ScanFileJob] * 1
|
||||
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[1...5])
|
||||
expect(enqueued_fa_jobs.call).to contain_exactly(
|
||||
*([Domain::Fa::Job::ScanPostJob] * 3 +
|
||||
[Domain::Fa::Job::ScanFileJob] * 1)
|
||||
)
|
||||
SpecUtil.shift_jobs
|
||||
|
||||
enqueuer.run_once
|
||||
expect(enqueued_fa_ids.call).to eq(post_fa_ids[2...7])
|
||||
expect(enqueued_fa_jobs.call).to eq(
|
||||
[Domain::Fa::Job::ScanPostJob] * 2 +
|
||||
[Domain::Fa::Job::ScanFileJob] * 3
|
||||
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[2...7])
|
||||
expect(enqueued_fa_jobs.call).to contain_exactly(
|
||||
*([Domain::Fa::Job::ScanPostJob] * 2 +
|
||||
[Domain::Fa::Job::ScanFileJob] * 3)
|
||||
)
|
||||
|
||||
SpecUtil.shift_jobs(job_klasses, 3)
|
||||
|
||||
expect { enqueuer.run_once }.to raise_exception(StopIteration)
|
||||
expect(enqueued_fa_ids.call).to eq(post_fa_ids[5...7])
|
||||
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[5...7])
|
||||
end
|
||||
|
||||
it "does not enqueue posts which are already in the queue" do
|
||||
post_fa_ids = posts.map(&:fa_id)
|
||||
Domain::Fa::Job::ScanPostJob.perform_later({ fa_id: post_fa_ids[1] })
|
||||
expect(enqueued_fa_ids.call).to eq([post_fa_ids[1]])
|
||||
expect(enqueued_fa_ids.call).to contain_exactly(post_fa_ids[1])
|
||||
|
||||
# post [1] should be filtered out
|
||||
enqueuer.run_once
|
||||
|
||||
@@ -75,6 +75,11 @@ class SpecUtil
|
||||
|
||||
entry
|
||||
end
|
||||
def self.create_http_log_entry(...)
|
||||
model = build_http_log_entry(...)
|
||||
model.save!
|
||||
model
|
||||
end
|
||||
|
||||
def self.build_blob_entry(
|
||||
content_type: "text/plain",
|
||||
@@ -108,6 +113,8 @@ class SpecUtil
|
||||
caused_by_entry = nil
|
||||
if request[:caused_by_entry_idx]
|
||||
caused_by_entry = log_entries[request[:caused_by_entry_idx]]
|
||||
elsif request[:caused_by_entry]
|
||||
caused_by_entry = request[:caused_by_entry]
|
||||
end
|
||||
|
||||
expect(http_client_mock).to(receive(:get).
|
||||
@@ -125,8 +132,8 @@ class SpecUtil
|
||||
log_entries
|
||||
end
|
||||
|
||||
def self.read_fixture_file(path)
|
||||
File.read Rails.root.join("test/fixtures/files", path)
|
||||
def self.read_fixture_file(path, mode: "rt")
|
||||
File.read(Rails.root.join("test/fixtures/files", path), mode: mode)
|
||||
end
|
||||
|
||||
def self.enqueued_jobs(job_klass = nil)
|
||||
|
||||
BIN
test/fixtures/files/domain/fa/job/meesh_avatar_file.gif
vendored
Normal file
BIN
test/fixtures/files/domain/fa/job/meesh_avatar_file.gif
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 7.2 KiB |
1423
test/fixtures/files/domain/fa/job/user_page_meesh.html
vendored
Normal file
1423
test/fixtures/files/domain/fa/job/user_page_meesh.html
vendored
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user