user avatar job

This commit is contained in:
Dylan Knutson
2023-04-07 11:54:01 -07:00
parent ab13af43af
commit 35aa025778
10 changed files with 1673 additions and 13 deletions

View File

@@ -0,0 +1,43 @@
class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
queue_as :fa_user_avatar
ignore_signature_args :caused_by_entry
def perform(args)
@caused_by_entry = args[:caused_by_entry]
@user = args[:user] || raise("user arg is required")
@force_scan = !!args[:force_scan]
logger.prefix = "[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
@avatar = @user.avatar_or_create
if @avatar.file_sha256 && !@force_scan
logger.warn("downloaded #{time_ago_in_words(@avatar.downloaded_file_at)}, skipping")
return
end
unless @avatar.file_uri
# try to find a corresponding log entry
log_entry = @avatar.guess_user_page_log_entry || raise("no user page log entry found")
@caused_by_entry ||= log_entry
parser = Domain::Fa::Parser::Page.new(log_entry.response.contents, require_logged_in: false)
@avatar.file_uri = parser.user_page.profile_thumb_url
end
response = http_client.get(
@avatar.file_uri.to_s,
caused_by_entry: @caused_by_entry,
)
@avatar.log_entry = response.log_entry
if response.status_code != 200
@avatar.state = :download_error
@avatar.state_detail["download_error"] = "http status #{response.status_code}"
fatal_error("http #{response.status_code}, log entry #{response.log_entry.id}")
end
@avatar.downloaded_file_at = response.log_entry.created_at
@avatar.file = response.log_entry.response
@avatar
ensure
@avatar.save! if @avatar
end
end

View File

@@ -13,6 +13,10 @@ class Domain::Fa::User < ReduxApplicationRecord
inverse_of: :user,
foreign_key: :user_id
has_one :avatar,
class_name: "::Domain::Fa::UserAvatar",
inverse_of: :user
enum :state, [
:ok, # so far so good, user may not yet be scanned
:scan_error, # user has been removed or otherwise, see state_detail
@@ -32,6 +36,10 @@ class Domain::Fa::User < ReduxApplicationRecord
self.log_entry_detail ||= {}
end
def avatar_or_create
avatar || create_avatar!
end
def due_for_page_scan?
scanned_page_at.nil? || scanned_page_at < 1.month.ago
end

View File

@@ -0,0 +1,60 @@
class Domain::Fa::UserAvatar < ReduxApplicationRecord
self.table_name = "domain_fa_user_avatars"
has_lite_trail(
schema_version: 1,
separate_versions_table: true,
map_attribute: {
file_sha256: ::Sha256AttributeMapper,
},
)
enum :state, [:ok, :download_error]
after_initialize do
self.state ||= :ok
self.state_detail ||= {}
end
belongs_to :user, class_name: "::Domain::Fa::User"
belongs_to :file, foreign_key: :file_sha256, class_name: "::BlobEntry", optional: true
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
before_validation do
file_uri = Addressable::URI.parse(file_url_str)
end
def file_uri
Addressable::URI.parse(file_url_str) unless file_url_str.blank?
end
def file_uri=(uri)
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
uri.scheme = "https" if uri.scheme.blank?
self.file_url_str = uri.to_s
end
def guess_user_page_log_entry
for_path = proc { |uri_path|
HttpLogEntry.where(
uri_scheme: "https",
uri_host: "www.furaffinity.net",
uri_path: uri_path,
).order(created_at: :desc).first
}
for_hle_id = proc { |hle_id|
hle_id && HttpLogEntry.find_by(id: hle_id)
}
# older versions don't end in a trailing slash
hle_id = user.log_entry_detail && user.log_entry_detail["last_user_page_id"]
# first try the last scanned user page (present on most fa user models)
for_hle_id.call(hle_id) ||
# if that's missing, see if there's an existing request logged to the user page
for_path.call("/user/#{user.url_name}/") ||
# and try the non-trailing-slash version as well
for_path.call("/user/#{user.url_name}")
# TODO - maybe can look for posts as well, those might list an avatar
end
end

View File

@@ -0,0 +1,22 @@
class CreateDomainFaUserAvatars < ActiveRecord::Migration[7.0]
def change
create_table :domain_fa_user_avatars do |t|
t.references :user, null: false, index: { unique: true }
t.integer :state, null: false
t.jsonb :state_detail
# url of the file
t.string :file_url_str
t.references :log_entry, index: false
# direct access to log_entry.file if request succeeded
t.binary :file_sha256
t.datetime :downloaded_file_at
t.timestamps
end
add_foreign_key :domain_fa_user_avatars, :domain_fa_users, column: :user_id, primary_key: :id
add_foreign_key :domain_fa_user_avatars, :http_log_entries, column: :log_entry_id, primary_key: :id
add_foreign_key :domain_fa_user_avatars, :blob_entries, column: :file_sha256, primary_key: :sha256
create_versions_table :domain_fa_user_avatars
end
end

28
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
ActiveRecord::Schema[7.0].define(version: 2023_04_07_162751) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "pg_trgm"
@@ -137,6 +137,28 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
t.index ["file_id"], name: "index_domain_fa_posts_on_file_id"
end
create_table "domain_fa_user_avatar_versions", force: :cascade do |t|
t.bigint "item_id"
t.integer "schema_version"
t.string "event", null: false
t.jsonb "diff"
t.datetime "created_at", null: false
t.index ["item_id"], name: "index_domain_fa_user_avatar_versions_on_item_id"
end
create_table "domain_fa_user_avatars", force: :cascade do |t|
t.bigint "user_id", null: false
t.integer "state", null: false
t.jsonb "state_detail"
t.string "file_url_str"
t.bigint "log_entry_id"
t.binary "file_sha256"
t.datetime "downloaded_file_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["user_id"], name: "index_domain_fa_user_avatars_on_user_id", unique: true
end
create_table "domain_fa_user_factors", force: :cascade do |t|
t.bigint "user_id", null: false
t.vector "for_follower", limit: 32
@@ -353,6 +375,10 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_31_023807) do
add_foreign_key "domain_fa_follows", "domain_fa_users", column: "follower_id"
add_foreign_key "domain_fa_posts", "domain_fa_users", column: "creator_id"
add_foreign_key "domain_fa_posts", "http_log_entries", column: "file_id"
add_foreign_key "domain_fa_user_avatar_versions", "domain_fa_user_avatars", column: "item_id"
add_foreign_key "domain_fa_user_avatars", "blob_entries", column: "file_sha256", primary_key: "sha256"
add_foreign_key "domain_fa_user_avatars", "domain_fa_users", column: "user_id"
add_foreign_key "domain_fa_user_avatars", "http_log_entries", column: "log_entry_id"
add_foreign_key "domain_fa_user_factors", "domain_fa_users", column: "user_id"
add_foreign_key "domain_twitter_medias", "domain_twitter_tweets", column: "tweet_id"
add_foreign_key "domain_twitter_medias", "http_log_entries", column: "file_id"

View File

@@ -0,0 +1,71 @@
require "rails_helper"
describe Domain::Fa::Job::UserAvatarJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before do
Scraper::ClientFactory.http_client_mock = http_client_mock
end
shared_context "create meesh user" do
let(:user) { Domain::Fa::User.find_by(url_name: "meesh") || raise }
before do
Domain::Fa::User.create!({
url_name: "meesh",
name: "Meesh",
})
end
end
shared_context "meesh avatar file mock" do
before do
@log_entries = SpecUtil.init_http_client_mock(
http_client_mock, [
{
uri: "https://a.furaffinity.net/1635789297/meesh.gif",
status_code: 200,
content_type: "image/gif",
contents: SpecUtil.read_fixture_file("domain/fa/job/meesh_avatar_file.gif", mode: "rb"),
caused_by_entry: meesh_user_page_log_entry,
},
]
)
end
end
context "when the avatar model does not yet exist" do
include_context "create meesh user"
context "the user model has a last_user_page_id" do
let(:meesh_user_page_log_entry) do
SpecUtil.create_http_log_entry(
uri: "https://www.furaffinity.net/user/meesh/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/user_page_meesh.html"),
)
end
include_context "meesh avatar file mock"
before do
user.log_entry_detail["last_user_page_id"] = meesh_user_page_log_entry.id
user.save!
end
it "succeeds" do
ret = described_class.perform_now({ user: user })
expect(ret).to_not be_a(Exception)
user.reload
avatar = user.avatar
expect(avatar).not_to be_nil
expect(avatar.log_entry).to eq(@log_entries[0])
expect(HexUtil.bin2hex avatar.file_sha256).to eq("ebbafc07555df0a0656a9b32ec9b95723c62c5246937dc8434924d9241d1b570")
expect(avatar.downloaded_file_at).to be_within(1.seconds).of(Time.now)
ret = described_class.perform_now({ user: user })
expect(ret).to_not be_a(Exception)
user.reload
avatar2 = user.avatar
expect(avatar).to eq(avatar2)
end
end
end
end

View File

@@ -62,30 +62,30 @@ describe Domain::Fa::PostEnqueuer do
).to_not be_nil
enqueuer.run_once
expect(enqueued_fa_ids.call).to eq(post_fa_ids[1...5])
expect(enqueued_fa_jobs.call).to eq(
[Domain::Fa::Job::ScanPostJob] * 3 +
[Domain::Fa::Job::ScanFileJob] * 1
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[1...5])
expect(enqueued_fa_jobs.call).to contain_exactly(
*([Domain::Fa::Job::ScanPostJob] * 3 +
[Domain::Fa::Job::ScanFileJob] * 1)
)
SpecUtil.shift_jobs
enqueuer.run_once
expect(enqueued_fa_ids.call).to eq(post_fa_ids[2...7])
expect(enqueued_fa_jobs.call).to eq(
[Domain::Fa::Job::ScanPostJob] * 2 +
[Domain::Fa::Job::ScanFileJob] * 3
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[2...7])
expect(enqueued_fa_jobs.call).to contain_exactly(
*([Domain::Fa::Job::ScanPostJob] * 2 +
[Domain::Fa::Job::ScanFileJob] * 3)
)
SpecUtil.shift_jobs(job_klasses, 3)
expect { enqueuer.run_once }.to raise_exception(StopIteration)
expect(enqueued_fa_ids.call).to eq(post_fa_ids[5...7])
expect(enqueued_fa_ids.call).to contain_exactly(*post_fa_ids[5...7])
end
it "does not enqueue posts which are already in the queue" do
post_fa_ids = posts.map(&:fa_id)
Domain::Fa::Job::ScanPostJob.perform_later({ fa_id: post_fa_ids[1] })
expect(enqueued_fa_ids.call).to eq([post_fa_ids[1]])
expect(enqueued_fa_ids.call).to contain_exactly(post_fa_ids[1])
# post [1] should be filtered out
enqueuer.run_once

View File

@@ -75,6 +75,11 @@ class SpecUtil
entry
end
def self.create_http_log_entry(...)
model = build_http_log_entry(...)
model.save!
model
end
def self.build_blob_entry(
content_type: "text/plain",
@@ -108,6 +113,8 @@ class SpecUtil
caused_by_entry = nil
if request[:caused_by_entry_idx]
caused_by_entry = log_entries[request[:caused_by_entry_idx]]
elsif request[:caused_by_entry]
caused_by_entry = request[:caused_by_entry]
end
expect(http_client_mock).to(receive(:get).
@@ -125,8 +132,8 @@ class SpecUtil
log_entries
end
def self.read_fixture_file(path)
File.read Rails.root.join("test/fixtures/files", path)
def self.read_fixture_file(path, mode: "rt")
File.read(Rails.root.join("test/fixtures/files", path), mode: mode)
end
def self.enqueued_jobs(job_klass = nil)

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB

File diff suppressed because one or more lines are too long