user avatar fixer job

This commit is contained in:
Dylan Knutson
2023-08-23 08:26:56 -07:00
parent 3ab0fa4fa3
commit 85dec62850
12 changed files with 221 additions and 134 deletions

View File

@@ -0,0 +1,3 @@
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-6 h-6">
<path stroke-linecap="round" stroke-linejoin="round" d="M15.75 6a3.75 3.75 0 11-7.5 0 3.75 3.75 0 017.5 0zM4.501 20.118a7.5 7.5 0 0114.998 0A17.933 17.933 0 0112 21.75c-2.676 0-5.216-.584-7.499-1.632z" />
</svg>

After

Width:  |  Height:  |  Size: 344 B

View File

@@ -1,16 +1,6 @@
module Domain::Fa::PostsHelper
def hosted_post_link_url_and_options(post)
if VpnOnlyRouteConstraint.new.matches?(request)
[
domain_fa_post_path(post.fa_id),
{},
]
else
[
"https://www.furaffinity.net/view/#{post.fa_id}",
{ target: "_blank", rel: "noreferrer,nofollow" },
]
end
def can_see_hosted_post?(post)
VpnOnlyRouteConstraint.new.matches?(request)
end
def post_state_string(post)

View File

@@ -6,12 +6,11 @@ module Domain::Fa::UsersHelper
format: "jpg",
thumb: thumb,
)
elsif (path = user.avatar&.guess_file_uri_from_hles)
path
else
# default / 'not found' avatar image
# "/blobs/9080fd4e7e23920eb2dccfe2d86903fc3e748eebb2e5aa8c657bbf6f3d941cdc/contents.jpg"
image_path("user-circle.svg")
end
rescue
# default / 'not found' avatar image
"/blobs/9080fd4e7e23920eb2dccfe2d86903fc3e748eebb2e5aa8c657bbf6f3d941cdc/contents.jpg"
end
def sanitized_fa_user_profile_html(html)

View File

@@ -4,42 +4,31 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
def perform(args)
init_from_args!(args, build_user: false)
@user || begin
raise("user must exist")
end
@user || raise("user must exist")
@avatar = @user.avatar_or_create
logger.prefix = proc { "[avatar #{@avatar.id.to_s.bold} / user #{@user.url_name.to_s.bold}]" }
if @avatar.file_sha256 && !@force_scan
logger.warn("downloaded #{time_ago_in_words(@avatar.downloaded_file_at)}, skipping")
return
end
if @avatar.state != "ok" && !@force_scan
logger.warn("avatar is in error state, skipping")
logger.warn("in state #{@avatar.state.bold}, skipping")
return
end
unless @avatar.file_uri
# try to find a corresponding log entry
log_entry = @avatar.guess_user_page_log_entry || begin
Domain::Fa::Job::UserPageJob.perform_later({
if @avatar.file_uri.blank?
if @user.due_for_page_scan?
defer_job(Domain::Fa::Job::UserPageJob, {
user: @user,
caused_by_entry: @caused_by_entry,
})
logger.error("no user page log entry found, enqueued user page scan")
return
end
@caused_by_entry ||= log_entry
parser = Domain::Fa::Parser::Page.new(log_entry.response.contents, require_logged_in: false)
@avatar.state_detail["guessed_log_entry_id"] = log_entry.id
file_url_str = parser.user_page.profile_thumb_url
if file_url_str
@avatar.file_uri = file_url_str
logger.error("no file uri, scanning user page")
else
@avatar.state = :no_file_on_guessed_user_page_error
logger.error("no user avatar found on page #{log_entry.id}")
return
logger.error("no file uri")
end
return
end
response = http_client.get(
@@ -48,15 +37,21 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
)
@avatar.log_entry = response.log_entry
if response.status_code != 200
if [200, 404].include?(response.status_code)
if response.status_code == 404
@avatar.state = :file_not_found
else
@avatar.state = :ok
end
@avatar.downloaded_file_at = response.log_entry.created_at
@avatar.file = response.log_entry.response
logger.info("downloaded avatar file")
else
@avatar.state = :download_error
@avatar.state_detail["download_error"] = "http status #{response.status_code}"
fatal_error("http #{response.status_code}, log entry #{response.log_entry.id}")
end
@avatar.downloaded_file_at = response.log_entry.created_at
@avatar.file = response.log_entry.response
@avatar
ensure
@avatar.save! if @avatar
end

View File

@@ -0,0 +1,69 @@
class Domain::Fa::UserAvatarFixer < LegacyImport::BulkImportJob
def initialize(start_at:, limit: nil, url_name: nil)
@start_at = start_at
@limit = limit
@url_name = url_name
end
def name
"user_avatar_fixer"
end
def run_impl
@processed = 0
if @url_name
user = Domain::Fa::User.find_by(url_name: @url_name) || raise("user not found")
process_avatar(user.avatar)
else
Domain::Fa::UserAvatar.
where(state: "no_file_on_guessed_user_page_error").
find_each(start: @start_at, batch_size: 5) do |avatar|
@processed += 1
break if @limit && @processed > @limit
process_avatar(avatar)
end
end
@processed
end
private
def process_avatar(avatar)
user = avatar.user
logger.prefix = proc { "[avatar #{avatar.id.to_s.bold}, user #{user.url_name.to_s.bold}]" }
logger.info("guessing...")
if avatar.file
logger.warn("have file, skipping")
return
end
if !avatar.file_url_str.blank?
if avatar.file
logger.warn("have file_url_str, skipping")
else
avatar.state = :ok
avatar.save!
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
logger.info("existing file_url_str: #{avatar.file_url_str}")
logger.warn("have url, but no file, enqueue job")
end
return
end
guessed_avatar_uri = avatar.guess_file_uri_from_hles_with_info
if guessed_avatar_uri.first == :not_found
logger.error("did not find avatar url: #{guessed_avatar_uri.to_s.bold}")
else
logger.info("found uri, enqueue job: #{guessed_avatar_uri.to_s.bold}")
avatar.state = :ok
avatar.file_uri = guessed_avatar_uri[1]
avatar.state_detail["user_avatar_fixer_job"] = guessed_avatar_uri
avatar.save!
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
end
end
end

View File

@@ -1,10 +1,6 @@
class LegacyImport::BulkImportJob
attr_reader :logger_prefix
def logger
@logger_prefix ||= "[last_id (nil)]"
@logger ||= ColorLogger.make($stdout, self)
end
include HasColorLogger
def name
raise NotImplementedError.new("implement #name")
@@ -14,6 +10,10 @@ class LegacyImport::BulkImportJob
raise NotImplementedError.new("implement #run_impl")
end
def profile?
false
end
def run
start_profiling!
start_at = Time.now
@@ -24,7 +24,7 @@ class LegacyImport::BulkImportJob
end
def write_last_id(last_id)
@logger_prefix = "[last_id #{last_id.to_s.bold}]"
logger.prefix = proc { "[last_id #{last_id.to_s.bold}]" }
File.write("tmp/#{name}_progress", last_id.to_s)
end

View File

@@ -8,7 +8,12 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
},
)
enum :state, [:ok, :download_error, :no_file_on_guessed_user_page_error]
enum :state, [
:ok, # got the file, no problem
:download_error, # other error processing the file
:no_file_on_guessed_user_page_error,
:file_not_found, # 404 from server
]
after_initialize do
self.state ||= :ok
self.state_detail ||= {}
@@ -40,12 +45,12 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
user.guess_user_page_log_entry
end
def guess_file_uri_from_hles
def guess_file_uri_from_hles_with_info
hle = guess_user_page_log_entry
if hle
page = Domain::Fa::Parser::Page.new(hle.response.contents)
page = Domain::Fa::Parser::Page.new(hle.response.contents, require_logged_in: false)
if page.probably_user_page? && (url = page.user_page.profile_thumb_url)
return url
return [:user_page, url]
end
end
@@ -53,18 +58,22 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
posts.
where(state: [:ok, nil]).
where("file_url_str IS NOT NULL").
limit(1).
to_a
order(created_at: :desc).
limit(3)
for post in posts
if (hle = post.guess_last_submission_page)
page = Domain::Fa::Parser::Page.new(hle.response.contents)
next unless page.probably_submission?
url = page.submission.artist_avatar_url
return url if url
return [:post_page, url, post.fa_id] if url
end
end
nil
[:not_found, nil]
end
def guess_file_uri_from_hles
guess_file_uri_from_hles_with_info[1]
end
end

View File

@@ -0,0 +1,50 @@
<% posts = user.posts.limit(5).order(fa_id: :desc) %>
<% return unless posts.any? %>
<section class='border-2 border-slate-300 rounded-md mb-2'>
<div class='border-b-2 border-b-slate-300 text-slate-600 p-1 italic flex'>
<span class='text-lg grow'>
Recent Posts <span class='text-sm'>(<%= user.posts.count %> total)</span>
</span>
<span class='text-sm self-center'>Posted at</span>
</div>
<ul>
<% posts.each do |post| %>
<li class='p-1 border-b last:border-b-0 border-slate-300 flex'>
<span class='grow'>
<% post_link_opts = { class: "underline decoration-dashed text-slate-700" } %>
<% fa_post_link = "https://www.furaffinity.net/view/#{post.fa_id}" %>
<% fa_post_link_opts = post_link_opts.merge({
target: "_blank",
rel: "noreferrer,nofollow",
}) %>
<% if can_see_hosted_post?(post) %>
<%= link_to(
post.title,
domain_fa_post_path(post.fa_id),
post_link_opts
) %> -
<%= link_to(fa_post_link, fa_post_link_opts) do %>
<% image_tag(
image_path("arrow-top-right-on-square.svg"),
class: "w-4 inline",
) %>
<% end %>
<% else %>
<%= link_to(
post.title,
fa_post_link,
fa_post_link_opts
) %>
<% end %>
</span>
<span class='text-sm'>
<% if post.posted_at %>
<%= time_ago_in_words(post.posted_at) %> ago
<% else %>
<i><%= post.state %></i>
<% end %>
</span>
</li>
<% end %>
</ul>
</section>

View File

@@ -36,7 +36,7 @@
<span><%= number_with_delimiter(user.followed_joins.count, delimiter: ",") %></span>
</div>
</section>
<%= render partial: "user_recent_posts_section", locals: { user: user } %>
<%= render "recent_posts_section", user: user %>
<section class='border-2 border-slate-300 rounded-md mb-2 overflow-clip'>
<% if (profile_html = user.profile_html) %>
<div class='text-lg border-b-2 border-b-slate-300 text-slate-600 p-1 italic'>Profile Description</div>

View File

@@ -1,33 +0,0 @@
<% posts = user.posts.limit(5).order(fa_id: :desc) %>
<% return unless posts.any? %>
<section class='border-2 border-slate-300 rounded-md mb-2'>
<div class='border-b-2 border-b-slate-300 text-slate-600 p-1 italic flex'>
<span class='text-lg grow'>
Recent Posts <span class='text-sm'>(<%= user.posts.count %> total)</span>
</span>
<span class='text-sm self-center'>Posted at</span>
</div>
<ul>
<% posts.each do |post| %>
<li class='p-1 border-b last:border-b-0 border-slate-300 flex'>
<span class='grow'>
<% post_url, opts = hosted_post_link_url_and_options(post) %>
<%= link_to(
post.title,
post_url,
opts.merge({
class: "underline decoration-dashed text-slate-700",
})
) %>
</span>
<span class='text-sm'>
<% if post.posted_at %>
<%= time_ago_in_words(post.posted_at) %> ago
<% else %>
<i><%= post.state %></i>
<% end %>
</span>
</li>
<% end %>
</ul>
</section>

View File

@@ -261,4 +261,21 @@ namespace :fa do
exporter.run
exporter.end_profiling! if profile
end
task :fix_fa_user_avatars => [:environment, :set_logger_stdout] do
url_name = ENV["url_name"]
if url_name
start_at = 0
limit = 1
else
start_at = ENV["start_at"]&.to_i || raise("need start_at (user avatar id)")
limit = ENV["limit"]&.to_i
end
job = Domain::Fa::UserAvatarFixer.new(
start_at: start_at,
limit: limit,
url_name: url_name,
)
job.run
end
end

View File

@@ -23,49 +23,6 @@ describe Domain::Fa::Job::UserAvatarJob do
context "when the avatar model does not yet exist" do
include_context "create meesh user"
context "the user model has a last_user_page_id" do
let(:meesh_user_page_log_entry) do
SpecUtil.create_http_log_entry(
uri: "https://www.furaffinity.net/user/meesh/",
status_code: 200,
content_type: "text/html",
contents: SpecUtil.read_fixture_file("domain/fa/job/user_page_meesh.html"),
)
end
before do
@log_entries = SpecUtil.init_http_client_mock(
http_client_mock, [
{
uri: "https://a.furaffinity.net/1635789297/meesh.gif",
status_code: 200,
content_type: "image/gif",
contents: SpecUtil.read_fixture_file("domain/fa/job/meesh_avatar_file.gif", mode: "rb"),
caused_by_entry: meesh_user_page_log_entry,
},
]
)
user.log_entry_detail["last_user_page_id"] = meesh_user_page_log_entry.id
user.save!
end
it "succeeds" do
perform_now({ user: user })
user.reload
avatar = user.avatar
expect(avatar).not_to be_nil
expect(avatar.log_entry).to eq(@log_entries[0])
expect(HexUtil.bin2hex avatar.file_sha256).to eq("ebbafc07555df0a0656a9b32ec9b95723c62c5246937dc8434924d9241d1b570")
expect(avatar.downloaded_file_at).to be_within(1.seconds).of(Time.now)
perform_now({ user: user })
user.reload
avatar2 = user.avatar
expect(avatar).to eq(avatar2)
end
end
context "the user has not been page scanned yet" do
it "enqueues a user page scan job" do
@@ -110,4 +67,35 @@ describe Domain::Fa::Job::UserAvatarJob do
expect(avatar.downloaded_file_at).to be_within(1.seconds).of(Time.now)
end
end
context "the avatar is 404" do
include_context "create meesh user"
before do
avatar = user.avatar_or_create
avatar.file_uri = "https://www.furaffinity.net/a/test/uri.gif"
avatar.save!
@log_entries = SpecUtil.init_http_client_mock(
http_client_mock, [
{
uri: "https://www.furaffinity.net/a/test/uri.gif",
status_code: 404,
content_type: "image/gif",
contents: SpecUtil.read_fixture_file("domain/fa/job/meesh_avatar_file.gif", mode: "rb"),
},
]
)
end
it "has a file and the right state" do
perform_now({ user: user })
user.reload
avatar = user.avatar
expect(avatar).not_to be_nil
expect(avatar.log_entry).to eq(@log_entries[0])
expect(HexUtil.bin2hex avatar.file_sha256).to eq("ebbafc07555df0a0656a9b32ec9b95723c62c5246937dc8434924d9241d1b570")
expect(avatar.downloaded_file_at).to be_within(1.seconds).of(Time.now)
expect(avatar.state).to eq("file_not_found")
end
end
end