From 15ea73a350a81fc077391a95b9ec8b3b9400ca82 Mon Sep 17 00:00:00 2001 From: Dylan Knutson Date: Sun, 17 Aug 2025 18:51:55 +0000 Subject: [PATCH] fix bsky user profile link sanitizer --- app/helpers/domain/descriptions_helper.rb | 71 ++++++++++++------- app/helpers/domain/domains_helper.rb | 1 + app/jobs/domain/bluesky/job/scan_posts_job.rb | 2 +- app/lib/bluesky/process_post_helper.rb | 12 +++- rake/bluesky.rake | 22 ++++++ .../domain/descriptions_helper_spec.rb | 49 +++++++++++-- 6 files changed, 126 insertions(+), 31 deletions(-) diff --git a/app/helpers/domain/descriptions_helper.rb b/app/helpers/domain/descriptions_helper.rb index 196909a3..e47f8ab8 100644 --- a/app/helpers/domain/descriptions_helper.rb +++ b/app/helpers/domain/descriptions_helper.rb @@ -57,7 +57,12 @@ module Domain::DescriptionsHelper end WEAK_URL_MATCHER_REGEX = - %r{(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)} + %r{(http(s)?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)} + + sig { params(str: String).returns(T.nilable(String)) } + def extract_weak_url(str) + str.match(WEAK_URL_MATCHER_REGEX)&.[](0) + end ALLOWED_INFERRED_URL_DOMAINS = T.let( @@ -72,6 +77,16 @@ module Domain::DescriptionsHelper html = model.description_html_for_view return nil if html.blank? + is_bsky_description = model.is_a?(Domain::User::BlueskyUser) + visual_style = + ( + if model.is_a?(Domain::User::BlueskyUser) + "description-section-link-light" + else + "description-section-link" + end + ) + case model when Domain::Post::E621Post dtext_result = DText.parse(html) @@ -95,17 +110,23 @@ module Domain::DescriptionsHelper next unless node.text? next unless node.ancestors("a").empty? next unless (node_text = T.cast(node.text, T.nilable(String))) - next unless (match = node_text.match(WEAK_URL_MATCHER_REGEX)) - next unless (url_text = match[0]) + next unless (url_text = extract_weak_url(node_text)) + next if url_text.blank? unless ( uri = try_parse_uri(model.description_html_base_domain, url_text) ) next end - unless ALLOWED_PLAIN_TEXT_URL_DOMAINS.any? { |domain| - url_matches_domain?(domain, uri.host) - } + if is_bsky_description + unless ALLOWED_EXTERNAL_LINK_DOMAINS.any? { |domain| + url_matches_domain?(domain, uri.host) + } + next + end + elsif ALLOWED_PLAIN_TEXT_URL_DOMAINS.none? do |domain| + url_matches_domain?(domain, uri.host) + end next end @@ -157,20 +178,12 @@ module Domain::DescriptionsHelper when Domain::Post [ "domain/has_description_html/inline_link_domain_post", - { - post: found_model, - link_text: node.text, - visual_style: "description-section-link", - }, + { post: found_model, link_text: node.text, visual_style: }, ] when Domain::User [ "domain/has_description_html/inline_link_domain_user", - { - user: found_model, - link_text: node.text, - visual_style: "description-section-link", - }, + { user: found_model, link_text: node.text, visual_style: }, ] else raise "Unknown model type: #{found_link.model.class}" @@ -191,14 +204,24 @@ module Domain::DescriptionsHelper end replacements[node] = Nokogiri::HTML5.fragment( - render( - partial: "domain/has_description_html/inline_link_external", - locals: { - url: url.to_s, - title:, - icon_path: icon_path_for_domain(url.host), - }, - ), + if is_bsky_description + render( + partial: "domain/has_description_html/external_link", + locals: { + link_text: node.text, + url: url.to_s, + }, + ) + else + render( + partial: "domain/has_description_html/inline_link_external", + locals: { + url: url.to_s, + title:, + icon_path: icon_path_for_domain(url.host), + }, + ) + end, ) next { node_whitelist: [node] } end diff --git a/app/helpers/domain/domains_helper.rb b/app/helpers/domain/domains_helper.rb index 408d8c28..0e0fd8b2 100644 --- a/app/helpers/domain/domains_helper.rb +++ b/app/helpers/domain/domains_helper.rb @@ -11,6 +11,7 @@ module Domain::DomainsHelper e621.net furaffinity.net inkbunny.net + bsky.app ].freeze # If a link is detected in an anchor tag and is one of these domains, diff --git a/app/jobs/domain/bluesky/job/scan_posts_job.rb b/app/jobs/domain/bluesky/job/scan_posts_job.rb index 4f6d0085..ec345e5c 100644 --- a/app/jobs/domain/bluesky/job/scan_posts_job.rb +++ b/app/jobs/domain/bluesky/job/scan_posts_job.rb @@ -242,7 +242,7 @@ class Domain::Bluesky::Job::ScanPostsJob < Domain::Bluesky::Job::Base def enqueue_pending_files_job(post) post.files.each do |post_file| if post_file.state_pending? - defer_job(Domain::StaticFileJob, { post_file: }) + defer_job(Domain::StaticFileJob, { post_file: }, { queue: "bluesky" }) end end end diff --git a/app/lib/bluesky/process_post_helper.rb b/app/lib/bluesky/process_post_helper.rb index d0698ed9..7addd7d0 100644 --- a/app/lib/bluesky/process_post_helper.rb +++ b/app/lib/bluesky/process_post_helper.rb @@ -69,7 +69,11 @@ class Bluesky::ProcessPostHelper set_alt_text(post_file, image_data["alt"]) post_file.save! - @deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: }) + @deferred_job_sink.defer_job( + Domain::StaticFileJob, + { post_file: }, + { queue: "bluesky" }, + ) logger.debug( format_tags( @@ -93,7 +97,11 @@ class Bluesky::ProcessPostHelper set_aspect_ratio(post_file, embed_data["aspectRatio"]) set_alt_text(post_file, embed_data["alt"]) post_file.save! - @deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: }) + @deferred_job_sink.defer_job( + Domain::StaticFileJob, + { post_file: }, + { queue: "bluesky" }, + ) logger.debug( format_tags( diff --git a/rake/bluesky.rake b/rake/bluesky.rake index a90e465e..92c9cdd7 100644 --- a/rake/bluesky.rake +++ b/rake/bluesky.rake @@ -97,6 +97,28 @@ namespace :bluesky do puts "added #{user.did} / #{user.handle} to monitor" added += 1 end + + if added % 25 == 0 + loop do + queue_size = + GoodJob::Job + .where("queue_name IN ('bluesky', 'static_file')") + .where(finished_at: nil, performed_at: nil, error: nil) + .where( + [ + "(serialized_params->'exception_executions' = '{}')", + "(serialized_params->'exception_executions' is null)", + ].join(" OR "), + ) + .count + puts "queue size: #{queue_size}" + if queue_size > 150 + sleep 10 + else + break + end + end + end else puts "user not found: #{line}" end diff --git a/spec/helpers/domain/descriptions_helper_spec.rb b/spec/helpers/domain/descriptions_helper_spec.rb index 933197dd..90102bf2 100644 --- a/spec/helpers/domain/descriptions_helper_spec.rb +++ b/spec/helpers/domain/descriptions_helper_spec.rb @@ -20,10 +20,22 @@ class ModelWithDescriptionHtml end RSpec.describe Domain::DescriptionsHelper, type: :helper do - def sanitize_description_html(html, base_domain = "www.furaffinity.net") - helper.sanitize_description_html( - ModelWithDescriptionHtml.new(base_domain, html), - ) + def sanitize_description_html( + html, + base_domain = "www.furaffinity.net", + bsky_model: false + ) + model = + if bsky_model + create( + :domain_user_bluesky_user, + handle: "lost_spots", + description: html, + ) + else + ModelWithDescriptionHtml.new(base_domain, html) + end + helper.sanitize_description_html(model) end # Mock the policy for posts to avoid Devise authentication errors @@ -225,6 +237,35 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do end end + describe "#extract_weak_url" do + shared_examples "extract_weak_url" do |url| + it "works for #{url}" do + expect(helper.extract_weak_url(url)).to eq(url) + expect(helper.extract_weak_url("before #{url} after")).to eq(url) + end + end + + %w[http://twitter.com/foo http://t.me/foo].each do |url| + include_examples "extract_weak_url", url + end + end + + describe "bare links" do + it "works for t.me" do + html = "https://t.me/lost_spots" + sanitized = sanitize_description_html(html, bsky_model: true) + expect(sanitized).to include("t.me/lost_spots") + expect(sanitized).to include("href=\"https://t.me/lost_spots\"") + end + + it "works for twitter.com" do + html = "https://twitter.com/lost_spots" + sanitized = sanitize_description_html(html, bsky_model: true) + expect(sanitized).to include("twitter.com/lost_spots") + expect(sanitized).to include("href=\"https://twitter.com/lost_spots\"") + end + end + describe "relative links" do it "works for posts" do post1 =