fix bsky user profile link sanitizer

This commit is contained in:
Dylan Knutson
2025-08-17 18:51:55 +00:00
parent 6bf64cf8c6
commit 15ea73a350
6 changed files with 126 additions and 31 deletions

View File

@@ -57,7 +57,12 @@ module Domain::DescriptionsHelper
end end
WEAK_URL_MATCHER_REGEX = WEAK_URL_MATCHER_REGEX =
%r{(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)} %r{(http(s)?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
sig { params(str: String).returns(T.nilable(String)) }
def extract_weak_url(str)
str.match(WEAK_URL_MATCHER_REGEX)&.[](0)
end
ALLOWED_INFERRED_URL_DOMAINS = ALLOWED_INFERRED_URL_DOMAINS =
T.let( T.let(
@@ -72,6 +77,16 @@ module Domain::DescriptionsHelper
html = model.description_html_for_view html = model.description_html_for_view
return nil if html.blank? return nil if html.blank?
is_bsky_description = model.is_a?(Domain::User::BlueskyUser)
visual_style =
(
if model.is_a?(Domain::User::BlueskyUser)
"description-section-link-light"
else
"description-section-link"
end
)
case model case model
when Domain::Post::E621Post when Domain::Post::E621Post
dtext_result = DText.parse(html) dtext_result = DText.parse(html)
@@ -95,17 +110,23 @@ module Domain::DescriptionsHelper
next unless node.text? next unless node.text?
next unless node.ancestors("a").empty? next unless node.ancestors("a").empty?
next unless (node_text = T.cast(node.text, T.nilable(String))) next unless (node_text = T.cast(node.text, T.nilable(String)))
next unless (match = node_text.match(WEAK_URL_MATCHER_REGEX)) next unless (url_text = extract_weak_url(node_text))
next unless (url_text = match[0]) next if url_text.blank?
unless ( unless (
uri = uri =
try_parse_uri(model.description_html_base_domain, url_text) try_parse_uri(model.description_html_base_domain, url_text)
) )
next next
end end
unless ALLOWED_PLAIN_TEXT_URL_DOMAINS.any? { |domain| if is_bsky_description
url_matches_domain?(domain, uri.host) unless ALLOWED_EXTERNAL_LINK_DOMAINS.any? { |domain|
} url_matches_domain?(domain, uri.host)
}
next
end
elsif ALLOWED_PLAIN_TEXT_URL_DOMAINS.none? do |domain|
url_matches_domain?(domain, uri.host)
end
next next
end end
@@ -157,20 +178,12 @@ module Domain::DescriptionsHelper
when Domain::Post when Domain::Post
[ [
"domain/has_description_html/inline_link_domain_post", "domain/has_description_html/inline_link_domain_post",
{ { post: found_model, link_text: node.text, visual_style: },
post: found_model,
link_text: node.text,
visual_style: "description-section-link",
},
] ]
when Domain::User when Domain::User
[ [
"domain/has_description_html/inline_link_domain_user", "domain/has_description_html/inline_link_domain_user",
{ { user: found_model, link_text: node.text, visual_style: },
user: found_model,
link_text: node.text,
visual_style: "description-section-link",
},
] ]
else else
raise "Unknown model type: #{found_link.model.class}" raise "Unknown model type: #{found_link.model.class}"
@@ -191,14 +204,24 @@ module Domain::DescriptionsHelper
end end
replacements[node] = Nokogiri::HTML5.fragment( replacements[node] = Nokogiri::HTML5.fragment(
render( if is_bsky_description
partial: "domain/has_description_html/inline_link_external", render(
locals: { partial: "domain/has_description_html/external_link",
url: url.to_s, locals: {
title:, link_text: node.text,
icon_path: icon_path_for_domain(url.host), url: url.to_s,
}, },
), )
else
render(
partial: "domain/has_description_html/inline_link_external",
locals: {
url: url.to_s,
title:,
icon_path: icon_path_for_domain(url.host),
},
)
end,
) )
next { node_whitelist: [node] } next { node_whitelist: [node] }
end end

View File

@@ -11,6 +11,7 @@ module Domain::DomainsHelper
e621.net e621.net
furaffinity.net furaffinity.net
inkbunny.net inkbunny.net
bsky.app
].freeze ].freeze
# If a link is detected in an anchor tag and is one of these domains, # If a link is detected in an anchor tag and is one of these domains,

View File

@@ -242,7 +242,7 @@ class Domain::Bluesky::Job::ScanPostsJob < Domain::Bluesky::Job::Base
def enqueue_pending_files_job(post) def enqueue_pending_files_job(post)
post.files.each do |post_file| post.files.each do |post_file|
if post_file.state_pending? if post_file.state_pending?
defer_job(Domain::StaticFileJob, { post_file: }) defer_job(Domain::StaticFileJob, { post_file: }, { queue: "bluesky" })
end end
end end
end end

View File

@@ -69,7 +69,11 @@ class Bluesky::ProcessPostHelper
set_alt_text(post_file, image_data["alt"]) set_alt_text(post_file, image_data["alt"])
post_file.save! post_file.save!
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: }) @deferred_job_sink.defer_job(
Domain::StaticFileJob,
{ post_file: },
{ queue: "bluesky" },
)
logger.debug( logger.debug(
format_tags( format_tags(
@@ -93,7 +97,11 @@ class Bluesky::ProcessPostHelper
set_aspect_ratio(post_file, embed_data["aspectRatio"]) set_aspect_ratio(post_file, embed_data["aspectRatio"])
set_alt_text(post_file, embed_data["alt"]) set_alt_text(post_file, embed_data["alt"])
post_file.save! post_file.save!
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: }) @deferred_job_sink.defer_job(
Domain::StaticFileJob,
{ post_file: },
{ queue: "bluesky" },
)
logger.debug( logger.debug(
format_tags( format_tags(

View File

@@ -97,6 +97,28 @@ namespace :bluesky do
puts "added #{user.did} / #{user.handle} to monitor" puts "added #{user.did} / #{user.handle} to monitor"
added += 1 added += 1
end end
if added % 25 == 0
loop do
queue_size =
GoodJob::Job
.where("queue_name IN ('bluesky', 'static_file')")
.where(finished_at: nil, performed_at: nil, error: nil)
.where(
[
"(serialized_params->'exception_executions' = '{}')",
"(serialized_params->'exception_executions' is null)",
].join(" OR "),
)
.count
puts "queue size: #{queue_size}"
if queue_size > 150
sleep 10
else
break
end
end
end
else else
puts "user not found: #{line}" puts "user not found: #{line}"
end end

View File

@@ -20,10 +20,22 @@ class ModelWithDescriptionHtml
end end
RSpec.describe Domain::DescriptionsHelper, type: :helper do RSpec.describe Domain::DescriptionsHelper, type: :helper do
def sanitize_description_html(html, base_domain = "www.furaffinity.net") def sanitize_description_html(
helper.sanitize_description_html( html,
ModelWithDescriptionHtml.new(base_domain, html), base_domain = "www.furaffinity.net",
) bsky_model: false
)
model =
if bsky_model
create(
:domain_user_bluesky_user,
handle: "lost_spots",
description: html,
)
else
ModelWithDescriptionHtml.new(base_domain, html)
end
helper.sanitize_description_html(model)
end end
# Mock the policy for posts to avoid Devise authentication errors # Mock the policy for posts to avoid Devise authentication errors
@@ -225,6 +237,35 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
end end
end end
describe "#extract_weak_url" do
shared_examples "extract_weak_url" do |url|
it "works for #{url}" do
expect(helper.extract_weak_url(url)).to eq(url)
expect(helper.extract_weak_url("before #{url} after")).to eq(url)
end
end
%w[http://twitter.com/foo http://t.me/foo].each do |url|
include_examples "extract_weak_url", url
end
end
describe "bare links" do
it "works for t.me" do
html = "https://t.me/lost_spots"
sanitized = sanitize_description_html(html, bsky_model: true)
expect(sanitized).to include("t.me/lost_spots")
expect(sanitized).to include("href=\"https://t.me/lost_spots\"")
end
it "works for twitter.com" do
html = "https://twitter.com/lost_spots"
sanitized = sanitize_description_html(html, bsky_model: true)
expect(sanitized).to include("twitter.com/lost_spots")
expect(sanitized).to include("href=\"https://twitter.com/lost_spots\"")
end
end
describe "relative links" do describe "relative links" do
it "works for posts" do it "works for posts" do
post1 = post1 =