bmp support, buggy fa user, url decode usernames

This commit is contained in:
Dylan Knutson
2025-08-16 04:44:04 +00:00
parent 1e46e42352
commit a1fab9e645
13 changed files with 1457 additions and 21 deletions

View File

@@ -69,13 +69,22 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
meson \ meson \
ninja-build ninja-build
# Install imagemagick from source
RUN cd /tmp && \
wget -qO- https://imagemagick.org/archive/releases/ImageMagick-7.1.2-1.tar.xz | tar -xJ && \
cd ImageMagick-7.1.2-1 && \
./configure && \
make -j$(nproc) && \
make install && \
ldconfig && \
cd / && \
rm -rf /tmp/ImageMagick-7.1.2-1*
# Install vips from source # Install vips from source
RUN cd /tmp && \ RUN cd /tmp && \
wget https://github.com/libvips/libvips/releases/download/v8.17.1/vips-8.17.1.tar.xz && \ wget -qO- https://github.com/libvips/libvips/releases/download/v8.17.1/vips-8.17.1.tar.xz | tar -xJ && \
tar -xJf vips-8.17.1.tar.xz && \
ls -la && \
cd vips-8.17.1 && \ cd vips-8.17.1 && \
ls -la && \
meson setup build --prefix=/usr/local -Dcgif=enabled && \ meson setup build --prefix=/usr/local -Dcgif=enabled && \
cd build && \ cd build && \
ninja && \ ninja && \

View File

@@ -70,13 +70,21 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
meson \ meson \
ninja-build ninja-build
# Install imagemagick from source
RUN cd /tmp && \
wget -qO- https://imagemagick.org/archive/releases/ImageMagick-7.1.2-1.tar.xz | tar -xJ && \
cd ImageMagick-7.1.2-1 && \
./configure && \
make -j$(nproc) && \
make install && \
ldconfig && \
cd / && \
rm -rf /tmp/ImageMagick-7.1.2-1*
# Install vips from source # Install vips from source
RUN cd /tmp && \ RUN cd /tmp && \
wget https://github.com/libvips/libvips/releases/download/v8.17.1/vips-8.17.1.tar.xz && \ wget -qO- https://github.com/libvips/libvips/releases/download/v8.17.1/vips-8.17.1.tar.xz | tar -xJ && \
tar -xJf vips-8.17.1.tar.xz && \
ls -la && \
cd vips-8.17.1 && \ cd vips-8.17.1 && \
ls -la && \
meson setup build --prefix=/usr/local -Dcgif=enabled && \ meson setup build --prefix=/usr/local -Dcgif=enabled && \
cd build && \ cd build && \
ninja && \ ninja && \

View File

@@ -11,7 +11,8 @@ class Domain::Fa::Job::Base < Scraper::JobBase
protected protected
BUGGY_USER_URL_NAMES = T.let(["click here", "..", "."], T::Array[String]) BUGGY_USER_URL_NAMES =
T.let(["click here", "..", ".", "<i class="], T::Array[String])
sig { params(user: Domain::User::FaUser).returns(T::Boolean) } sig { params(user: Domain::User::FaUser).returns(T::Boolean) }
def buggy_user?(user) def buggy_user?(user)

View File

@@ -425,6 +425,7 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
href = link_elem["href"] href = link_elem["href"]
url_name = url_name =
%r{/user/(.+)/}.match(href)&.[](1) || raise("invalid url: #{href}") %r{/user/(.+)/}.match(href)&.[](1) || raise("invalid url: #{href}")
url_name = CGI.unescape(url_name)
if @march_2025_update if @march_2025_update
name = name =

View File

@@ -29,7 +29,7 @@ module HasColorLogger
sig { params(tags: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) } sig { params(tags: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
def make_tags(tags) def make_tags(tags)
tags.map { |tag_name, tag_value| make_tag(tag_name.to_s, tag_value) } self.class.make_tags(tags)
end end
sig { params(tags: T.any(String, T::Array[String])).returns(String) } sig { params(tags: T.any(String, T::Array[String])).returns(String) }
@@ -52,15 +52,20 @@ module HasColorLogger
module ClassMethods module ClassMethods
extend T::Sig extend T::Sig
sig { params(tags: T::Hash[Symbol, T.untyped]).returns(T::Array[String]) }
def make_tags(tags)
tags.map { |tag_name, tag_value| make_tag(tag_name.to_s, tag_value) }
end
sig { params(tag_name: String, tag_value: T.untyped).returns(String) } sig { params(tag_name: String, tag_value: T.untyped).returns(String) }
def make_tag(tag_name, tag_value) def make_tag(tag_name, tag_value)
tag_value_str = tag_value ? tag_value.to_s.bold : "(nil)".italic tag_value_str = tag_value ? tag_value.to_s.bold : "(nil)".italic
"#{tag_name}: #{tag_value_str}" "#{tag_name}: #{tag_value_str}"
end end
sig { params(tags: String).returns(String) } sig { params(tags: T.any(String, T::Array[String])).returns(String) }
def format_tags(*tags) def format_tags(*tags)
format_tags_arr(tags) format_tags_arr(T.unsafe([tags].flatten))
end end
sig { params(tags: T::Array[String]).returns(String) } sig { params(tags: T::Array[String]).returns(String) }

View File

@@ -65,9 +65,7 @@ class Scraper::CurlHttpPerformer
curl = get_curl curl = get_curl
start_at = Time.now start_at = Time.now
# TODO - normalizing the URL breaks URLs with utf-8 characters curl.url = request.uri.normalize.to_s
# curl.url = request.uri.normalize.to_s
curl.url = request.uri.to_s
curl.follow_location = request.follow_redirects curl.follow_location = request.follow_redirects
request.request_headers.each { |key, value| curl.headers[key.to_s] = value } request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises" curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises"

View File

@@ -35,14 +35,15 @@ module Tasks::Bluesky
), ),
), ),
) )
cursor = load_cursor
logger.info(format_tags("using cursor", make_tags(cursor:)))
@bluesky_client = @bluesky_client =
T.let( T.let(
Skyfall::Jetstream.new( Skyfall::Jetstream.new(
"jetstream2.us-east.bsky.network", "jetstream2.us-east.bsky.network",
{ {
cursor: nil, cursor:,
# cursor: load_cursor,
wanted_collections: %w[ wanted_collections: %w[
app.bsky.feed.post app.bsky.feed.post
app.bsky.embed.images app.bsky.embed.images

View File

@@ -48,7 +48,7 @@ module Tasks
def run_post_file_descending(start_at) def run_post_file_descending(start_at)
last_post_file_id = get_progress(start_at)&.to_i last_post_file_id = get_progress(start_at)&.to_i
query = Domain::PostFile.where(state: "ok").includes(:blob) query = Domain::PostFile.where(state: "ok").includes(:blob, :thumbnails)
query = query.where(id: ..last_post_file_id) if last_post_file_id query = query.where(id: ..last_post_file_id) if last_post_file_id
log("counting post files to process...") log("counting post files to process...")

View File

@@ -118,7 +118,8 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
logger.info(format_tags(make_tag("num_frames", num_frames))) logger.info(format_tags(make_tag("num_frames", num_frames)))
return [] if num_frames.zero? return [] if num_frames.zero?
existing_thumb_types = post_file.thumbnails.to_a.map(&:thumb_type).uniq existing_thumb_types =
T.cast(post_file.thumbnails.map(&:thumb_type).uniq, T::Array[String])
logger.info( logger.info(
format_tags(make_tag("existing_thumb_types", existing_thumb_types)), format_tags(make_tag("existing_thumb_types", existing_thumb_types)),
) )
@@ -128,8 +129,10 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
THUMB_TYPE_TO_OPTIONS.each do |thumb_type, options| THUMB_TYPE_TO_OPTIONS.each do |thumb_type, options|
thumb_type = thumb_type.to_s thumb_type = thumb_type.to_s
logger.tagged(make_tag("thumb_type", thumb_type)) do logger.tagged(make_tag("thumb_type", thumb_type)) do
next if existing_thumb_types.include?(thumb_type) if existing_thumb_types.include?(thumb_type)
logger.info(format_tags("creating thumbnail")) logger.info(format_tags("thumbnail type already exists"))
next
end
# get the number of frames in the post file # get the number of frames in the post file
frames_to_thumbnail = frames_to_thumbnail =
@@ -141,6 +144,8 @@ class Domain::PostFile::Thumbnail < ReduxApplicationRecord
frames_to_thumbnail.each do |frame| frames_to_thumbnail.each do |frame|
logger.tagged(make_tag("frame", frame)) do logger.tagged(make_tag("frame", frame)) do
logger.info(format_tags("creating thumbnail"))
thumbnail = post_file.thumbnails.build(thumb_type:, frame:) thumbnail = post_file.thumbnails.build(thumb_type:, frame:)
unless thumb_file_path = thumbnail.absolute_file_path unless thumb_file_path = thumbnail.absolute_file_path
logger.info(format_tags("unable to compute thumbnail path")) logger.info(format_tags("unable to compute thumbnail path"))

View File

@@ -12,6 +12,39 @@ describe Domain::Fa::Job::UserPageJob do
) )
end end
context "the user page has a user with a url name that has brackets" do
let(:client_mock_config) do
[
{
uri: "https://www.furaffinity.net/user/thesteamlemur/",
status_code: 200,
content_type: "text/html",
contents:
SpecUtil.read_fixture_file(
"domain/fa/user_page/user_page_thesteamlemur.html",
),
},
]
end
it "creates the user" do
expect do perform_now({ url_name: "thesteamlemur" }) end.to change {
Domain::User::FaUser.find_by(url_name: "thesteamlemur")
}.from(nil).to(be_present)
end
it "creates the recent watchers" do
perform_now({ url_name: "thesteamlemur" })
user = Domain::User::FaUser.find_by(url_name: "thesteamlemur")
expect(user.followed_by_users.count).to eq(7)
expect(user.followed_by_users.map(&:url_name)).to include(
"[sic]",
"drakenbyte",
"naynay",
)
end
end
context "the user is disabled" do context "the user is disabled" do
let(:client_mock_config) do let(:client_mock_config) do
[ [

View File

@@ -24,6 +24,14 @@ RSpec.describe LoadedMedia do
let(:mp4_fixture_path) do let(:mp4_fixture_path) do
Rails.root.join("test/fixtures/files/images/bsky-3l6tnjkcgw72y.mp4").to_s Rails.root.join("test/fixtures/files/images/bsky-3l6tnjkcgw72y.mp4").to_s
end end
let(:bmp_fixture_path) do
Rails
.root
.join(
"test/fixtures/files/images/1404612500.nickthefur775_streaminglive.bmp",
)
.to_s
end
let(:thumbnail_options) do let(:thumbnail_options) do
LoadedMedia::ThumbnailOptions.new( LoadedMedia::ThumbnailOptions.new(
@@ -192,5 +200,17 @@ RSpec.describe LoadedMedia do
expect(FileUtils.compare_file(paths[1], paths[2])).to be false expect(FileUtils.compare_file(paths[1], paths[2])).to be false
end end
end end
context "with a bmp file" do
it "can extract a frame and save a thumbnail" do
media = LoadedMedia.from_file("image/bmp", bmp_fixture_path)
output_path = make_output_path(0)
media.write_frame_thumbnail(0, output_path, thumbnail_options)
expect(File.exist?(output_path)).to be true
expect(File.size(output_path)).to be > 0
end
end
end end
end end

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 MiB