FurArchiver fallback for FA posts missing media
This commit is contained in:
@@ -11,9 +11,12 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
logger.info("scanning post")
|
||||
|
||||
if force_scan? || (post.state_ok? && !post.scanned_at.present?) ||
|
||||
# check for !state_ok? on the file maybe?
|
||||
post.file&.state_file_error?
|
||||
ReduxApplicationRecord.transaction { scan_post(post) }
|
||||
ReduxApplicationRecord.transaction do
|
||||
scan_post(post)
|
||||
post.save!
|
||||
post.reload
|
||||
end
|
||||
end
|
||||
|
||||
file = post.file
|
||||
|
||||
@@ -11,6 +11,10 @@ class Domain::PostFileThumbnailJob < Scraper::JobBase
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform(args)
|
||||
post_file = T.cast(args[:post_file], Domain::PostFile)
|
||||
unless post_file.state_ok?
|
||||
logger.warn("post file not in ok state, skipping")
|
||||
return
|
||||
end
|
||||
Domain::PostFile::Thumbnail.create_for_post_file!(post_file)
|
||||
Domain::PostFile::BitFingerprint.create_for_post_file!(post_file)
|
||||
nil
|
||||
|
||||
@@ -10,8 +10,6 @@ module Domain::StaticFileJobHelper
|
||||
|
||||
sig { params(post_file: Domain::PostFile).void }
|
||||
def download_post_file(post_file)
|
||||
should_enqueue_thumbnail_job = T.let(false, T::Boolean)
|
||||
|
||||
if post_file.state_terminal_error?
|
||||
logger.error(format_tags("terminal error state, skipping"))
|
||||
return
|
||||
@@ -51,6 +49,19 @@ module Domain::StaticFileJobHelper
|
||||
return
|
||||
end
|
||||
|
||||
handle_file_download_response(post_file, response)
|
||||
ensure
|
||||
post_file.save! if post_file
|
||||
end
|
||||
|
||||
sig do
|
||||
params(
|
||||
post_file: Domain::PostFile,
|
||||
response: Scraper::HttpClient::Response,
|
||||
).void
|
||||
end
|
||||
def handle_file_download_response(post_file, response)
|
||||
should_enqueue_thumbnail_job = T.let(false, T::Boolean)
|
||||
status_code = response.status_code
|
||||
if response.log_entry.response_sha256 == BlobFile::EMPTY_FILE_SHA256 &&
|
||||
post_file.retry_count == 0
|
||||
@@ -61,6 +72,7 @@ module Domain::StaticFileJobHelper
|
||||
end
|
||||
|
||||
post_file.log_entry = response.log_entry
|
||||
post_file.blob_sha256 = response.log_entry.response_sha256
|
||||
post_file.last_status_code = status_code
|
||||
|
||||
logger.tagged(make_arg_tag(response.log_entry)) do
|
||||
|
||||
129
app/jobs/job/fa_post_fur_archiver_post_file_job.rb
Normal file
129
app/jobs/job/fa_post_fur_archiver_post_file_job.rb
Normal file
@@ -0,0 +1,129 @@
|
||||
# typed: strict
|
||||
class Job::FaPostFurArchiverPostFileJob < Scraper::JobBase
|
||||
extend T::Sig
|
||||
include Domain::StaticFileJobHelper
|
||||
|
||||
queue_as :static_file
|
||||
discard_on Scraper::JobBase::JobError, ActiveJob::DeserializationError
|
||||
|
||||
sig { override.returns(Symbol) }
|
||||
def self.http_factory_method
|
||||
:get_fur_archiver_http_client
|
||||
end
|
||||
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform(args)
|
||||
post_file = T.cast(args[:post_file], Domain::PostFile)
|
||||
logger.tagged(make_arg_tag(post_file), make_arg_tag(post_file.post)) do
|
||||
handle(post_file)
|
||||
end
|
||||
end
|
||||
|
||||
FA_URL_PATTERN =
|
||||
%r{
|
||||
https://(d\.facdn\.net|d\.furaffinity\.net)/art/([^\/]+)/(\d+)/([^\/]+)
|
||||
}x
|
||||
|
||||
sig { params(post_file: Domain::PostFile).void }
|
||||
def handle(post_file)
|
||||
post = T.cast(post_file.post, Domain::Post::FaPost)
|
||||
|
||||
if post.file != post_file && post.file&.state_ok?
|
||||
logger.info("file already downloaded, deleting old file")
|
||||
post_file.destroy
|
||||
return
|
||||
end
|
||||
|
||||
if post_file.state_ok? && post_file.last_status_code == 200
|
||||
logger.info("file already downloaded, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
if post.tried_from_fur_archiver?
|
||||
logger.warn("already tried to download from fur archiver, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
unless post_file.last_status_code == 404
|
||||
logger.warn("last status code is not 404, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
unless post_file.state_terminal_error?
|
||||
logger.warn("post file not in terminal error state, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
user_url_name = post.creator&.url_name
|
||||
fatal_error("no user url name") unless user_url_name
|
||||
|
||||
fa_file_url_str = post_file.url_str
|
||||
fatal_error("no fa file url") unless fa_file_url_str
|
||||
|
||||
match = fa_file_url_str.match(FA_URL_PATTERN)
|
||||
unless match
|
||||
if fa_file_url_str.include?("#{user_url_name}/stories/")
|
||||
logger.warn("old stories URL, force rescan")
|
||||
post.reload
|
||||
Domain::Fa::Job::ScanPostJob.perform_now(
|
||||
{ post: post, force_scan: true },
|
||||
)
|
||||
post.reload
|
||||
unless post.state_ok?
|
||||
fatal_error("post not in ok state after rescan: #{post.state}")
|
||||
end
|
||||
return if post.file&.state_ok? || post.file&.state_pending?
|
||||
match = fa_file_url_str.match(FA_URL_PATTERN)
|
||||
unless match
|
||||
fatal_error("invalid fa file url after rescan: #{fa_file_url_str}")
|
||||
end
|
||||
else
|
||||
fatal_error("invalid fa file url: #{fa_file_url_str}")
|
||||
end
|
||||
end
|
||||
|
||||
unless url_user_url_name = match.captures[1]
|
||||
fatal_error("no user url name in url: #{fa_file_url_str}")
|
||||
end
|
||||
|
||||
unless url_file_name = match.captures[3]
|
||||
fatal_error("no file name in url: #{fa_file_url_str}")
|
||||
end
|
||||
|
||||
unless user_url_name == url_user_url_name
|
||||
logger.tagged(
|
||||
make_tag("in_db", user_url_name),
|
||||
make_tag("in_url", url_user_url_name),
|
||||
) { fatal_error("user name mismatch") }
|
||||
end
|
||||
|
||||
fur_archiver_url_str =
|
||||
"https://furarchiver.net/File/View?artist=#{url_user_url_name}&filename=#{url_file_name}"
|
||||
|
||||
post.tried_from_fur_archiver = true
|
||||
post_file = post.files.build(url_str: fur_archiver_url_str)
|
||||
|
||||
begin
|
||||
response = http_client.get(fur_archiver_url_str)
|
||||
rescue Scraper::HttpClient::InvalidURLError,
|
||||
Curl::Err::HostResolutionError => e
|
||||
post_file.state_terminal_error!
|
||||
post_file.error_message = e.message
|
||||
logger.error(
|
||||
format_tags(
|
||||
"invalid fur archiver url, terminal error state",
|
||||
make_tag("error_message", e.message),
|
||||
make_tag("url", fur_archiver_url_str),
|
||||
),
|
||||
)
|
||||
return
|
||||
ensure
|
||||
post.save! if post
|
||||
end
|
||||
|
||||
post_file.save!
|
||||
post.reload
|
||||
|
||||
handle_file_download_response(post_file, response)
|
||||
end
|
||||
end
|
||||
@@ -64,6 +64,14 @@ class Scraper::ClientFactory
|
||||
end
|
||||
end
|
||||
|
||||
def self.get_fur_archiver_http_client
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:fur_archiver, Scraper::FurArchiverHttpClientConfig)
|
||||
end
|
||||
end
|
||||
|
||||
def self._gallery_dl_client_impl
|
||||
@gallery_dl_clients.value ||=
|
||||
begin
|
||||
|
||||
24
app/lib/scraper/fur_archiver_http_client_config.rb
Normal file
24
app/lib/scraper/fur_archiver_http_client_config.rb
Normal file
@@ -0,0 +1,24 @@
|
||||
# typed: strict
|
||||
class Scraper::FurArchiverHttpClientConfig < Scraper::HttpClientConfig
|
||||
extend T::Sig
|
||||
|
||||
sig { override.returns(T::Array[[String, Numeric]]) }
|
||||
def ratelimit
|
||||
[["furarchiver.net", 1.0]]
|
||||
end
|
||||
|
||||
sig { override.returns(T::Array[String]) }
|
||||
def allowed_domains
|
||||
["furarchiver.net"]
|
||||
end
|
||||
|
||||
sig { override.returns(Integer) }
|
||||
def redirect_limit
|
||||
2
|
||||
end
|
||||
|
||||
sig { override.returns(T.nilable(T::Array[T.untyped])) }
|
||||
def cookies
|
||||
nil
|
||||
end
|
||||
end
|
||||
@@ -171,6 +171,24 @@ class Scraper::HttpClient
|
||||
response_headers["Content-Type"] || response_headers["content-type"] ||
|
||||
"none/none"
|
||||
|
||||
if method == :get && uri.host == "furarchiver.net" && response_code == 200
|
||||
if response_body.include?(
|
||||
"Error 404 - File Not Found -- - Fur Affinity [dot] net",
|
||||
)
|
||||
logger.warn("fixing buggy fur archiver 404 response (Not Found XML)")
|
||||
response_code = 404
|
||||
content_type = "text/xml"
|
||||
elsif response_body.size == 3072 &&
|
||||
Digest::SHA256.hexdigest(response_body) ==
|
||||
"fbe9b8727e0ae24baacf63b6553d331c84779e40b743380628a5181e0e9fa2ff"
|
||||
logger.warn(
|
||||
"fixing buggy fur archiver 404 response (image not found gif)",
|
||||
)
|
||||
response_code = 404
|
||||
content_type = "image/gif"
|
||||
end
|
||||
end
|
||||
|
||||
retries = 0
|
||||
total_time_ms = -1
|
||||
begin
|
||||
|
||||
@@ -64,7 +64,6 @@ class Scraper::InkbunnyHttpClientConfig < Scraper::HttpClientConfig
|
||||
end
|
||||
|
||||
sig { override.returns(Integer) }
|
||||
|
||||
def redirect_limit
|
||||
2
|
||||
end
|
||||
|
||||
@@ -21,6 +21,10 @@ class Domain::Post::FaPost < Domain::Post
|
||||
attr_json :first_gallery_page_id, :integer
|
||||
attr_json :first_seen_entry_id, :integer
|
||||
|
||||
# TODO - convert `file` to Domain::PostFile::FaPostFile and
|
||||
# move this to Domain::PostFile::FaPostFile
|
||||
attr_json :tried_from_fur_archiver, :boolean, default: false
|
||||
|
||||
belongs_to :last_user_page, class_name: "::HttpLogEntry", optional: true
|
||||
belongs_to :first_browse_page, class_name: "::HttpLogEntry", optional: true
|
||||
belongs_to :first_gallery_page, class_name: "::HttpLogEntry", optional: true
|
||||
|
||||
@@ -8,10 +8,11 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
class_name: "::Domain::Post",
|
||||
touch: true
|
||||
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
|
||||
belongs_to :blob,
|
||||
class_name: "::BlobFile",
|
||||
optional: true,
|
||||
foreign_key: :blob_sha256
|
||||
|
||||
has_one :blob,
|
||||
class_name: "::BlobFile",
|
||||
through: :log_entry,
|
||||
source: :response
|
||||
|
||||
has_many :bit_fingerprints,
|
||||
class_name: "::Domain::PostFile::BitFingerprint",
|
||||
@@ -53,8 +54,6 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
self.type ||= self.class.name if new_record?
|
||||
end
|
||||
|
||||
before_save { self.blob_sha256 ||= self.log_entry&.response&.sha256 }
|
||||
|
||||
sig { returns(T.nilable(String)) }
|
||||
def content_type
|
||||
return nil unless log_entry = self.log_entry
|
||||
@@ -68,29 +67,7 @@ class Domain::PostFile < ReduxApplicationRecord
|
||||
|
||||
sig { returns(T.nilable(String)) }
|
||||
def sha256
|
||||
self.blob_sha256 ||
|
||||
begin
|
||||
return nil unless log_entry = self.log_entry
|
||||
return nil unless response = log_entry.response
|
||||
response.sha256
|
||||
end
|
||||
end
|
||||
|
||||
sig { returns(T.nilable(BlobFile)) }
|
||||
def blob
|
||||
super ||
|
||||
begin
|
||||
@blob_file_model = T.let(@blob_file_model, T.nilable(BlobFile))
|
||||
@blob_file_model ||=
|
||||
((sha256 = self.blob_sha256) ? BlobFile.migrate_sha256!(sha256) : nil)
|
||||
@blob_file_model
|
||||
end || log_entry&.response
|
||||
end
|
||||
|
||||
sig { params(le: T.nilable(HttpLogEntry)).returns(T.nilable(HttpLogEntry)) }
|
||||
def log_entry=(le)
|
||||
self.blob_sha256 ||= le.response_sha256 if le.present?
|
||||
super(le)
|
||||
blob&.sha256
|
||||
end
|
||||
|
||||
sig { returns(Integer) }
|
||||
|
||||
@@ -1,3 +1,9 @@
|
||||
# typed: strict
|
||||
class Domain::Post::FaPostPolicy < Domain::PostPolicy
|
||||
extend T::Sig
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def view_tried_from_fur_archiver?
|
||||
is_role_at_least_moderator?
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
<% post_file = post.primary_file_for_view %>
|
||||
<section class="sky-section">
|
||||
<div class="section-header">Visually Similar Posts</div>
|
||||
<div class="grid grid-cols-[auto,auto,1fr,auto] bg-slate-100">
|
||||
@@ -15,6 +16,8 @@
|
||||
%>
|
||||
<% if fprint.nil? %>
|
||||
<div class="col-span-full p-4 text-center text-slate-500">File not processed</div>
|
||||
<% elsif post_file && post_file.state_terminal_error? %>
|
||||
<div class="col-span-full p-4 text-center text-slate-500">File not found</div>
|
||||
<% elsif fprints.any? %>
|
||||
<% num_neighbors = fprints.size %>
|
||||
<% fprints.each_with_index do |similar_fingerprint, index| %>
|
||||
|
||||
@@ -10,3 +10,9 @@
|
||||
<i class="fa-solid fa-calendar-days mr-1"></i>
|
||||
Status: <%= post.status_for_view %>
|
||||
</span>
|
||||
<% if policy(post).view_tried_from_fur_archiver? && post.tried_from_fur_archiver? %>
|
||||
<span>
|
||||
<i class="fa-solid fa-download mr-1"></i>
|
||||
FurArchiver
|
||||
</span>
|
||||
<% end %>
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
<% end %>
|
||||
<% if post_file.blob.present? %>
|
||||
<span class="badge bg-light text-dark">
|
||||
<i class="fa-solid fa-file-arrow-down me-1"></i>SHA256: <%= HexUtil.bin2hex( post_file.blob_sha256[0..7] )%>
|
||||
<i class="fa-solid fa-file-arrow-down me-1"></i>SHA256: <%= HexUtil.bin2hex( post_file.blob.sha256[0..7] )%>
|
||||
</span>
|
||||
<% end %>
|
||||
<% if post_file.error_message.present? %>
|
||||
|
||||
35
rake/fa.rake
35
rake/fa.rake
@@ -302,4 +302,39 @@ namespace :fa do
|
||||
task enqueue_pending_user_pages: :environment do
|
||||
Domain::Fa::EnqueueDueUserPageScans.new.run
|
||||
end
|
||||
|
||||
desc "Get 404 files from FurArchiver"
|
||||
task get_404_files_from_fur_archiver: :set_logger_stdout do
|
||||
query =
|
||||
Domain::PostFile
|
||||
.joins(:post)
|
||||
.for_post_type(Domain::Post::FaPost)
|
||||
.where(state: "terminal_error", last_status_code: 404)
|
||||
.where(
|
||||
"((\"post\".\"json_attributes\"->>'tried_from_fur_archiver')::bool) IS NULL OR ((\"post\".\"json_attributes\"->>'tried_from_fur_archiver')::bool) != TRUE",
|
||||
)
|
||||
|
||||
# query =
|
||||
# Domain::User
|
||||
# .find_by_param("fa@wolfsparta")
|
||||
# .posts
|
||||
# .flat_map do |post|
|
||||
# post.files.where(state: "terminal_error", last_status_code: 404)
|
||||
# end
|
||||
|
||||
puts "counting..."
|
||||
total = query.count
|
||||
puts "total: #{total}"
|
||||
pb = ProgressBar.create(total: total, format: "%t: %c/%C %B %p%% %a %e")
|
||||
|
||||
counter = 0
|
||||
query.find_each do |post_file|
|
||||
next if post_file.url_str.include?("/stories/")
|
||||
Job::FaPostFurArchiverPostFileJob.perform_now({ post_file: })
|
||||
pb.progress = [pb.progress + 1, total].min
|
||||
post = post_file.post
|
||||
puts "processed #{post.to_param} / #{post.title_for_view}".bold
|
||||
counter += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
57
sorbet/rbi/dsl/domain/post/fa_post.rbi
generated
57
sorbet/rbi/dsl/domain/post/fa_post.rbi
generated
@@ -1812,6 +1812,9 @@ class Domain::Post::FaPost
|
||||
sig { void }
|
||||
def restore_title!; end
|
||||
|
||||
sig { void }
|
||||
def restore_tried_from_fur_archiver!; end
|
||||
|
||||
sig { void }
|
||||
def restore_type!; end
|
||||
|
||||
@@ -1962,6 +1965,12 @@ class Domain::Post::FaPost
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_title?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(T::Boolean), T.nilable(T::Boolean)])) }
|
||||
def saved_change_to_tried_from_fur_archiver; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def saved_change_to_tried_from_fur_archiver?; end
|
||||
|
||||
sig { returns(T.nilable([T.untyped, T.untyped])) }
|
||||
def saved_change_to_type; end
|
||||
|
||||
@@ -2264,6 +2273,51 @@ class Domain::Post::FaPost
|
||||
sig { void }
|
||||
def title_will_change!; end
|
||||
|
||||
sig { returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver; end
|
||||
|
||||
sig { params(value: T.nilable(T::Boolean)).returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver=(value); end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def tried_from_fur_archiver?; end
|
||||
|
||||
sig { returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver_before_last_save; end
|
||||
|
||||
sig { returns(T.untyped) }
|
||||
def tried_from_fur_archiver_before_type_cast; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def tried_from_fur_archiver_came_from_user?; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(T::Boolean), T.nilable(T::Boolean)])) }
|
||||
def tried_from_fur_archiver_change; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(T::Boolean), T.nilable(T::Boolean)])) }
|
||||
def tried_from_fur_archiver_change_to_be_saved; end
|
||||
|
||||
sig { params(from: T.nilable(T::Boolean), to: T.nilable(T::Boolean)).returns(T::Boolean) }
|
||||
def tried_from_fur_archiver_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver_in_database; end
|
||||
|
||||
sig { returns(T.nilable([T.nilable(T::Boolean), T.nilable(T::Boolean)])) }
|
||||
def tried_from_fur_archiver_previous_change; end
|
||||
|
||||
sig { params(from: T.nilable(T::Boolean), to: T.nilable(T::Boolean)).returns(T::Boolean) }
|
||||
def tried_from_fur_archiver_previously_changed?(from: T.unsafe(nil), to: T.unsafe(nil)); end
|
||||
|
||||
sig { returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver_previously_was; end
|
||||
|
||||
sig { returns(T.nilable(T::Boolean)) }
|
||||
def tried_from_fur_archiver_was; end
|
||||
|
||||
sig { void }
|
||||
def tried_from_fur_archiver_will_change!; end
|
||||
|
||||
sig { returns(T.untyped) }
|
||||
def type; end
|
||||
|
||||
@@ -2436,6 +2490,9 @@ class Domain::Post::FaPost
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_title?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_tried_from_fur_archiver?; end
|
||||
|
||||
sig { returns(T::Boolean) }
|
||||
def will_save_change_to_type?; end
|
||||
|
||||
|
||||
27
sorbet/rbi/dsl/job/fa_post_fur_archiver_post_file_job.rbi
generated
Normal file
27
sorbet/rbi/dsl/job/fa_post_fur_archiver_post_file_job.rbi
generated
Normal file
@@ -0,0 +1,27 @@
|
||||
# typed: true
|
||||
|
||||
# DO NOT EDIT MANUALLY
|
||||
# This is an autogenerated file for dynamic methods in `Job::FaPostFurArchiverPostFileJob`.
|
||||
# Please instead update this file by running `bin/tapioca dsl Job::FaPostFurArchiverPostFileJob`.
|
||||
|
||||
|
||||
class Job::FaPostFurArchiverPostFileJob
|
||||
sig { returns(ColorLogger) }
|
||||
def logger; end
|
||||
|
||||
class << self
|
||||
sig { returns(ColorLogger) }
|
||||
def logger; end
|
||||
|
||||
sig do
|
||||
params(
|
||||
args: T::Hash[::Symbol, T.untyped],
|
||||
block: T.nilable(T.proc.params(job: Job::FaPostFurArchiverPostFileJob).void)
|
||||
).returns(T.any(Job::FaPostFurArchiverPostFileJob, FalseClass))
|
||||
end
|
||||
def perform_later(args, &block); end
|
||||
|
||||
sig { params(args: T::Hash[::Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform_now(args); end
|
||||
end
|
||||
end
|
||||
@@ -70,7 +70,7 @@ RSpec.describe Domain::E621::Job::ScanUserFavsJob do
|
||||
it "updates scanned_favs_at timestamp" do
|
||||
expect { perform_now({ user: user }) }.to change {
|
||||
user.reload.scanned_favs_at
|
||||
}.from(nil).to be_within(1.second).of(Time.current)
|
||||
}.from(nil).to be_within(3.seconds).of(Time.current)
|
||||
end
|
||||
|
||||
context "when API returns error" do
|
||||
@@ -126,7 +126,7 @@ RSpec.describe Domain::E621::Job::ScanUserFavsJob do
|
||||
it "updates scanned_favs_at timestamp" do
|
||||
expect { perform_now({ user: user }) }.to change {
|
||||
user.reload.scanned_favs_at
|
||||
}.from(nil).to(be_within(1.second).of(Time.current))
|
||||
}.from(nil).to(be_within(3.seconds).of(Time.current))
|
||||
end
|
||||
|
||||
it "sets scanned_favs_status to ok" do
|
||||
|
||||
@@ -4,7 +4,6 @@ require "rails_helper"
|
||||
RSpec.describe Domain::Fa::Job::ScanFileJob do
|
||||
include PerformJobHelpers
|
||||
|
||||
let(:fa_post) { create(:domain_post_fa_post) }
|
||||
let(:post_file) { create(:domain_post_file, :has_url) }
|
||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
||||
|
||||
|
||||
126
spec/jobs/fa_post_fur_archiver_post_file_job_spec.rb
Normal file
126
spec/jobs/fa_post_fur_archiver_post_file_job_spec.rb
Normal file
@@ -0,0 +1,126 @@
|
||||
# typed: false
|
||||
require "rails_helper"
|
||||
|
||||
RSpec.describe Job::FaPostFurArchiverPostFileJob do
|
||||
include PerformJobHelpers
|
||||
|
||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
||||
|
||||
let(:post_file) do
|
||||
post_file =
|
||||
create(
|
||||
:domain_post_file,
|
||||
url_str: file_url_str,
|
||||
state: "terminal_error",
|
||||
last_status_code: 404,
|
||||
log_entry:
|
||||
create(
|
||||
:http_log_entry,
|
||||
:text_entry,
|
||||
status_code: 404,
|
||||
uri_str: file_url_str,
|
||||
),
|
||||
post:
|
||||
create(
|
||||
:domain_post_fa_post,
|
||||
creator: create(:domain_user_fa_user, url_name: user_url_name),
|
||||
),
|
||||
)
|
||||
post_file.save!
|
||||
post_file
|
||||
end
|
||||
let(:post) { post_file.post }
|
||||
let(:old_log_entry) { post_file.log_entry }
|
||||
|
||||
let(:image_data) do
|
||||
File.binread(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/1496842943.wolfsparta_caught_pt2.png",
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
let(:not_found_data) do
|
||||
File.binread(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/1165272531.akirameerkat_meerkat_feet_1.jpg",
|
||||
),
|
||||
)
|
||||
end
|
||||
|
||||
before do
|
||||
Scraper::ClientFactory.http_client_mock = http_client_mock
|
||||
@log_entries =
|
||||
HttpClientMockHelpers.init_http_client_mock(
|
||||
http_client_mock,
|
||||
client_mock_config,
|
||||
)
|
||||
end
|
||||
|
||||
describe "#perform" do
|
||||
let(:fur_archiver_url_str) do
|
||||
"https://furarchiver.net/File/View?artist=wolfsparta&filename=1496842943.wolfsparta_caught_pt2.png"
|
||||
end
|
||||
|
||||
let(:user_url_name) { "wolfsparta" }
|
||||
|
||||
shared_examples "correct behavior" do
|
||||
let(:client_mock_config) do
|
||||
[
|
||||
{
|
||||
uri: fur_archiver_url_str,
|
||||
status_code: 200,
|
||||
content_type: "image/png",
|
||||
contents: image_data,
|
||||
},
|
||||
]
|
||||
end
|
||||
|
||||
it "downloads the file from fur archiver" do
|
||||
expect do
|
||||
perform_now({ post_file: post_file })
|
||||
post.reload
|
||||
end.to change { post.file&.log_entry }.from(old_log_entry).to(
|
||||
have_attributes(uri: have_attributes(to_s: fur_archiver_url_str)),
|
||||
)
|
||||
end
|
||||
|
||||
it "updates the post_file blob" do
|
||||
expect do
|
||||
perform_now({ post_file: post_file })
|
||||
post.reload
|
||||
end.to change { post.file&.blob }.from(old_log_entry.response).to(
|
||||
@log_entries[0].response,
|
||||
)
|
||||
end
|
||||
|
||||
it "sets the last status code" do
|
||||
expect do
|
||||
perform_now({ post_file: post_file })
|
||||
post.reload
|
||||
end.to change { post.file&.last_status_code }.from(404).to(200)
|
||||
end
|
||||
|
||||
it "sets the post_file state to ok" do
|
||||
expect do
|
||||
perform_now({ post_file: post_file })
|
||||
post.reload
|
||||
end.to change { post.file&.state }.from("terminal_error").to("ok")
|
||||
end
|
||||
end
|
||||
|
||||
context "with a d.facdn.net url" do
|
||||
let(:file_url_str) do
|
||||
"https://d.facdn.net/art/wolfsparta/1496842943/1496842943.wolfsparta_caught_pt2.png"
|
||||
end
|
||||
include_examples "correct behavior"
|
||||
end
|
||||
|
||||
context "with a d.furaffinity.net url" do
|
||||
let(:file_url_str) do
|
||||
"https://d.furaffinity.net/art/wolfsparta/1496842943/1496842943.wolfsparta_caught_pt2.png"
|
||||
end
|
||||
include_examples "correct behavior"
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -120,7 +120,8 @@ RSpec.describe Domain::MigrateToDomain do
|
||||
md5s: old_file.md5s,
|
||||
state: new_state,
|
||||
log_entry_id: old_file.log_entry_id,
|
||||
blob_sha256: old_file.log_entry&.response_sha256,
|
||||
# blob is now through log_entry.response
|
||||
# blob_sha256: old_file.log_entry&.response_sha256,
|
||||
last_status_code: old_file.log_entry&.status_code,
|
||||
)
|
||||
|
||||
|
||||
@@ -8,7 +8,7 @@ describe Scraper::HttpClient do
|
||||
end
|
||||
|
||||
def allowed_domains
|
||||
["*.example.com"]
|
||||
%w[*.example.com furarchiver.net]
|
||||
end
|
||||
|
||||
def ratelimit
|
||||
@@ -67,4 +67,72 @@ describe Scraper::HttpClient do
|
||||
assert_equal("text/plain", log_entry.response.content_type)
|
||||
assert_match(/the response/, log_entry.response.content_bytes)
|
||||
end
|
||||
|
||||
it "fixes buggy fur archiver 404 response (xml not found)" do
|
||||
client =
|
||||
Scraper::HttpClient.new(
|
||||
TestHttpClientConfig.new,
|
||||
SpecUtil.mock_http_performer(
|
||||
:get,
|
||||
"https://furarchiver.net/File/View?artist=akirameerkat&filename=1165272531.akirameerkat_meerkat_feet_1.jpg",
|
||||
request_headers: {
|
||||
"cookie" => "",
|
||||
},
|
||||
response_code: 200,
|
||||
response_headers: {
|
||||
"content-type" => "image/jpeg",
|
||||
},
|
||||
response_body:
|
||||
File.binread(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/1165272531.akirameerkat_meerkat_feet_1.jpg",
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
response =
|
||||
client.get(
|
||||
"https://furarchiver.net/File/View?artist=akirameerkat&filename=1165272531.akirameerkat_meerkat_feet_1.jpg",
|
||||
)
|
||||
expect(response.status_code).to eq(404)
|
||||
expect(response.body).to include("Error 404 - File Not Found")
|
||||
expect(response.log_entry.status_code).to eq(404)
|
||||
expect(response.log_entry.content_type).to eq("text/xml")
|
||||
end
|
||||
|
||||
it "fixes buggy fur archiver 404 response (image not found gif)" do
|
||||
client =
|
||||
Scraper::HttpClient.new(
|
||||
TestHttpClientConfig.new,
|
||||
SpecUtil.mock_http_performer(
|
||||
:get,
|
||||
"https://furarchiver.net/File/View?artist=summercat&filename=1274587736.summercat_koreal.jpg",
|
||||
request_headers: {
|
||||
"cookie" => "",
|
||||
},
|
||||
response_code: 200,
|
||||
response_headers: {
|
||||
"content-type" => "image/jpeg",
|
||||
},
|
||||
response_body:
|
||||
File.binread(
|
||||
Rails.root.join(
|
||||
"test/fixtures/files/1274587736.summercat_koreal.jpg",
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
response =
|
||||
client.get(
|
||||
"https://furarchiver.net/File/View?artist=summercat&filename=1274587736.summercat_koreal.jpg",
|
||||
)
|
||||
expect(response.status_code).to eq(404)
|
||||
expect(response.log_entry.status_code).to eq(404)
|
||||
expect(response.log_entry.content_type).to eq("image/gif")
|
||||
expect(HexUtil.bin2hex(response.log_entry.response.sha256)).to eq(
|
||||
"fbe9b8727e0ae24baacf63b6553d331c84779e40b743380628a5181e0e9fa2ff",
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
54
test/fixtures/files/1165272531.akirameerkat_meerkat_feet_1.jpg
vendored
Normal file
54
test/fixtures/files/1165272531.akirameerkat_meerkat_feet_1.jpg
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||||
<!-- -->
|
||||
<html>
|
||||
<head>
|
||||
<title>Error 404 - File Not Found -- - Fur Affinity [dot] net</title>
|
||||
|
||||
<meta name="description" content="Art- Fur affinity is a furry art community." />
|
||||
|
||||
<meta name="keywords" content="Art,Digital Art,Photography,Poetry,Online Art,Furry,Inflation,Vore,Macro,Yiff,Fat Fur" />
|
||||
<meta name="classification" content="Art" />
|
||||
<meta name="copyright" content="Copyright C 2004-2005 Fur Affinity [dot] net" />
|
||||
<meta http-equiv="Content-Type" content="text/html;">
|
||||
|
||||
</head>
|
||||
|
||||
<body style="background-color: #919bad; font-family: Verdana; font-size: 16px;">
|
||||
|
||||
<h1 style="text-align: center;">Fur Affinity</h1>
|
||||
<h2 style="text-align: center;">Error 404</h2>
|
||||
<br>
|
||||
|
||||
<div align="center">
|
||||
<div style="width:70%; border: 1px solid #d4dce8; padding: 3px; background-color: #919bad;">
|
||||
<div style="padding: 3px; background-color: #d4dce8; font-weight: bold; text-align: center;">
|
||||
<p>
|
||||
File not found.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<p style="color: #000000; text-align: left; padding: 10px;">
|
||||
The resource you are looking for was not found.
|
||||
</p>
|
||||
|
||||
<hr style="width: 70%; background: #d4dce8;" />
|
||||
|
||||
<p style="color: #000000; text-align: left; padding: 10px;">
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin: 0px auto; padding: 10px; text-align: center; font-size: 10px;">
|
||||
<p>
|
||||
[ <a href="http://www.furaffinity.net/lm/tos/">Terms of Service</a> ]
|
||||
[ <a href="http://www.furaffinity.net/lm/submissionpolicy/">Submission Agreement</a> ]
|
||||
</p>
|
||||
|
||||
No portions of www.furaffinity.net may be used without expressed, written permission.<br/>
|
||||
All artwork is copyrighted to the respective owner.<br/>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
BIN
test/fixtures/files/1274587736.summercat_koreal.jpg
vendored
Normal file
BIN
test/fixtures/files/1274587736.summercat_koreal.jpg
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.0 KiB |
BIN
test/fixtures/files/1496842943.wolfsparta_caught_pt2.png
vendored
Normal file
BIN
test/fixtures/files/1496842943.wolfsparta_caught_pt2.png
vendored
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 271 KiB |
Reference in New Issue
Block a user