bmp support, buggy fa user, url decode usernames

This commit is contained in:
Dylan Knutson
2025-08-16 04:44:04 +00:00
parent 1e46e42352
commit a1fab9e645
13 changed files with 1457 additions and 21 deletions

View File

@@ -11,7 +11,8 @@ class Domain::Fa::Job::Base < Scraper::JobBase
protected
BUGGY_USER_URL_NAMES = T.let(["click here", "..", "."], T::Array[String])
BUGGY_USER_URL_NAMES =
T.let(["click here", "..", ".", "<i class="], T::Array[String])
sig { params(user: Domain::User::FaUser).returns(T::Boolean) }
def buggy_user?(user)

View File

@@ -425,6 +425,7 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
href = link_elem["href"]
url_name =
%r{/user/(.+)/}.match(href)&.[](1) || raise("invalid url: #{href}")
url_name = CGI.unescape(url_name)
if @march_2025_update
name =

View File

@@ -29,7 +29,7 @@ module HasColorLogger
sig { params(tags: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
def make_tags(tags)
tags.map { |tag_name, tag_value| make_tag(tag_name.to_s, tag_value) }
self.class.make_tags(tags)
end
sig { params(tags: T.any(String, T::Array[String])).returns(String) }
@@ -52,15 +52,20 @@ module HasColorLogger
module ClassMethods
extend T::Sig
sig { params(tags: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
def make_tags(tags)
tags.map { |tag_name, tag_value| make_tag(tag_name.to_s, tag_value) }
end
sig { params(tag_name: String, tag_value: T.untyped).returns(String) }
def make_tag(tag_name, tag_value)
tag_value_str = tag_value ? tag_value.to_s.bold : "(nil)".italic
"#{tag_name}: #{tag_value_str}"
end
sig { params(tags: String).returns(String) }
sig { params(tags: T.any(String, T::Array[String])).returns(String) }
def format_tags(*tags)
format_tags_arr(tags)
format_tags_arr(T.unsafe([tags].flatten))
end
sig { params(tags: T::Array[String]).returns(String) }

View File

@@ -65,9 +65,7 @@ class Scraper::CurlHttpPerformer
curl = get_curl
start_at = Time.now
# TODO - normalizing the URL breaks URLs with utf-8 characters
# curl.url = request.uri.normalize.to_s
curl.url = request.uri.to_s
curl.url = request.uri.normalize.to_s
curl.follow_location = request.follow_redirects
request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises"

View File

@@ -35,14 +35,15 @@ module Tasks::Bluesky
),
),
)
cursor = load_cursor
logger.info(format_tags("using cursor", make_tags(cursor:)))
@bluesky_client =
T.let(
Skyfall::Jetstream.new(
"jetstream2.us-east.bsky.network",
{
cursor: nil,
# cursor: load_cursor,
cursor:,
wanted_collections: %w[
app.bsky.feed.post
app.bsky.embed.images

View File

@@ -48,7 +48,7 @@ module Tasks
def run_post_file_descending(start_at)
last_post_file_id = get_progress(start_at)&.to_i
query = Domain::PostFile.where(state: "ok").includes(:blob)
query = Domain::PostFile.where(state: "ok").includes(:blob, :thumbnails)
query = query.where(id: ..last_post_file_id) if last_post_file_id
log("counting post files to process...")

View File

@@ -118,7 +118,8 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
logger.info(format_tags(make_tag("num_frames", num_frames)))
return [] if num_frames.zero?
existing_thumb_types = post_file.thumbnails.to_a.map(&:thumb_type).uniq
existing_thumb_types =
T.cast(post_file.thumbnails.map(&:thumb_type).uniq, T::Array[String])
logger.info(
format_tags(make_tag("existing_thumb_types", existing_thumb_types)),
)
@@ -128,8 +129,10 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
THUMB_TYPE_TO_OPTIONS.each do |thumb_type, options|
thumb_type = thumb_type.to_s
logger.tagged(make_tag("thumb_type", thumb_type)) do
next if existing_thumb_types.include?(thumb_type)
logger.info(format_tags("creating thumbnail"))
if existing_thumb_types.include?(thumb_type)
logger.info(format_tags("thumbnail type already exists"))
next
end
# get the number of frames in the post file
frames_to_thumbnail =
@@ -141,6 +144,8 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
frames_to_thumbnail.each do |frame|
logger.tagged(make_tag("frame", frame)) do
logger.info(format_tags("creating thumbnail"))
thumbnail = post_file.thumbnails.build(thumb_type:, frame:)
unless thumb_file_path = thumbnail.absolute_file_path
logger.info(format_tags("unable to compute thumbnail path"))