fix bsky user profile link sanitizer
This commit is contained in:
@@ -57,7 +57,12 @@ module Domain::DescriptionsHelper
|
||||
end
|
||||
|
||||
WEAK_URL_MATCHER_REGEX =
|
||||
%r{(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
|
||||
%r{(http(s)?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
|
||||
|
||||
sig { params(str: String).returns(T.nilable(String)) }
|
||||
def extract_weak_url(str)
|
||||
str.match(WEAK_URL_MATCHER_REGEX)&.[](0)
|
||||
end
|
||||
|
||||
ALLOWED_INFERRED_URL_DOMAINS =
|
||||
T.let(
|
||||
@@ -72,6 +77,16 @@ module Domain::DescriptionsHelper
|
||||
html = model.description_html_for_view
|
||||
return nil if html.blank?
|
||||
|
||||
is_bsky_description = model.is_a?(Domain::User::BlueskyUser)
|
||||
visual_style =
|
||||
(
|
||||
if model.is_a?(Domain::User::BlueskyUser)
|
||||
"description-section-link-light"
|
||||
else
|
||||
"description-section-link"
|
||||
end
|
||||
)
|
||||
|
||||
case model
|
||||
when Domain::Post::E621Post
|
||||
dtext_result = DText.parse(html)
|
||||
@@ -95,19 +110,25 @@ module Domain::DescriptionsHelper
|
||||
next unless node.text?
|
||||
next unless node.ancestors("a").empty?
|
||||
next unless (node_text = T.cast(node.text, T.nilable(String)))
|
||||
next unless (match = node_text.match(WEAK_URL_MATCHER_REGEX))
|
||||
next unless (url_text = match[0])
|
||||
next unless (url_text = extract_weak_url(node_text))
|
||||
next if url_text.blank?
|
||||
unless (
|
||||
uri =
|
||||
try_parse_uri(model.description_html_base_domain, url_text)
|
||||
)
|
||||
next
|
||||
end
|
||||
unless ALLOWED_PLAIN_TEXT_URL_DOMAINS.any? { |domain|
|
||||
if is_bsky_description
|
||||
unless ALLOWED_EXTERNAL_LINK_DOMAINS.any? { |domain|
|
||||
url_matches_domain?(domain, uri.host)
|
||||
}
|
||||
next
|
||||
end
|
||||
elsif ALLOWED_PLAIN_TEXT_URL_DOMAINS.none? do |domain|
|
||||
url_matches_domain?(domain, uri.host)
|
||||
end
|
||||
next
|
||||
end
|
||||
|
||||
before, after = node.text.split(url_text, 2)
|
||||
new_node = "#{before}<a href=\"#{url_text}\">#{url_text}</a>#{after}"
|
||||
@@ -157,20 +178,12 @@ module Domain::DescriptionsHelper
|
||||
when Domain::Post
|
||||
[
|
||||
"domain/has_description_html/inline_link_domain_post",
|
||||
{
|
||||
post: found_model,
|
||||
link_text: node.text,
|
||||
visual_style: "description-section-link",
|
||||
},
|
||||
{ post: found_model, link_text: node.text, visual_style: },
|
||||
]
|
||||
when Domain::User
|
||||
[
|
||||
"domain/has_description_html/inline_link_domain_user",
|
||||
{
|
||||
user: found_model,
|
||||
link_text: node.text,
|
||||
visual_style: "description-section-link",
|
||||
},
|
||||
{ user: found_model, link_text: node.text, visual_style: },
|
||||
]
|
||||
else
|
||||
raise "Unknown model type: #{found_link.model.class}"
|
||||
@@ -191,6 +204,15 @@ module Domain::DescriptionsHelper
|
||||
end
|
||||
|
||||
replacements[node] = Nokogiri::HTML5.fragment(
|
||||
if is_bsky_description
|
||||
render(
|
||||
partial: "domain/has_description_html/external_link",
|
||||
locals: {
|
||||
link_text: node.text,
|
||||
url: url.to_s,
|
||||
},
|
||||
)
|
||||
else
|
||||
render(
|
||||
partial: "domain/has_description_html/inline_link_external",
|
||||
locals: {
|
||||
@@ -198,7 +220,8 @@ module Domain::DescriptionsHelper
|
||||
title:,
|
||||
icon_path: icon_path_for_domain(url.host),
|
||||
},
|
||||
),
|
||||
)
|
||||
end,
|
||||
)
|
||||
next { node_whitelist: [node] }
|
||||
end
|
||||
|
||||
@@ -11,6 +11,7 @@ module Domain::DomainsHelper
|
||||
e621.net
|
||||
furaffinity.net
|
||||
inkbunny.net
|
||||
bsky.app
|
||||
].freeze
|
||||
|
||||
# If a link is detected in an anchor tag and is one of these domains,
|
||||
|
||||
@@ -242,7 +242,7 @@ class Domain::Bluesky::Job::ScanPostsJob < Domain::Bluesky::Job::Base
|
||||
def enqueue_pending_files_job(post)
|
||||
post.files.each do |post_file|
|
||||
if post_file.state_pending?
|
||||
defer_job(Domain::StaticFileJob, { post_file: })
|
||||
defer_job(Domain::StaticFileJob, { post_file: }, { queue: "bluesky" })
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -69,7 +69,11 @@ class Bluesky::ProcessPostHelper
|
||||
set_alt_text(post_file, image_data["alt"])
|
||||
|
||||
post_file.save!
|
||||
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: })
|
||||
@deferred_job_sink.defer_job(
|
||||
Domain::StaticFileJob,
|
||||
{ post_file: },
|
||||
{ queue: "bluesky" },
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
format_tags(
|
||||
@@ -93,7 +97,11 @@ class Bluesky::ProcessPostHelper
|
||||
set_aspect_ratio(post_file, embed_data["aspectRatio"])
|
||||
set_alt_text(post_file, embed_data["alt"])
|
||||
post_file.save!
|
||||
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: })
|
||||
@deferred_job_sink.defer_job(
|
||||
Domain::StaticFileJob,
|
||||
{ post_file: },
|
||||
{ queue: "bluesky" },
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
format_tags(
|
||||
|
||||
@@ -97,6 +97,28 @@ namespace :bluesky do
|
||||
puts "added #{user.did} / #{user.handle} to monitor"
|
||||
added += 1
|
||||
end
|
||||
|
||||
if added % 25 == 0
|
||||
loop do
|
||||
queue_size =
|
||||
GoodJob::Job
|
||||
.where("queue_name IN ('bluesky', 'static_file')")
|
||||
.where(finished_at: nil, performed_at: nil, error: nil)
|
||||
.where(
|
||||
[
|
||||
"(serialized_params->'exception_executions' = '{}')",
|
||||
"(serialized_params->'exception_executions' is null)",
|
||||
].join(" OR "),
|
||||
)
|
||||
.count
|
||||
puts "queue size: #{queue_size}"
|
||||
if queue_size > 150
|
||||
sleep 10
|
||||
else
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
else
|
||||
puts "user not found: #{line}"
|
||||
end
|
||||
|
||||
@@ -20,10 +20,22 @@ class ModelWithDescriptionHtml
|
||||
end
|
||||
|
||||
RSpec.describe Domain::DescriptionsHelper, type: :helper do
|
||||
def sanitize_description_html(html, base_domain = "www.furaffinity.net")
|
||||
helper.sanitize_description_html(
|
||||
ModelWithDescriptionHtml.new(base_domain, html),
|
||||
def sanitize_description_html(
|
||||
html,
|
||||
base_domain = "www.furaffinity.net",
|
||||
bsky_model: false
|
||||
)
|
||||
model =
|
||||
if bsky_model
|
||||
create(
|
||||
:domain_user_bluesky_user,
|
||||
handle: "lost_spots",
|
||||
description: html,
|
||||
)
|
||||
else
|
||||
ModelWithDescriptionHtml.new(base_domain, html)
|
||||
end
|
||||
helper.sanitize_description_html(model)
|
||||
end
|
||||
|
||||
# Mock the policy for posts to avoid Devise authentication errors
|
||||
@@ -225,6 +237,35 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
|
||||
end
|
||||
end
|
||||
|
||||
describe "#extract_weak_url" do
|
||||
shared_examples "extract_weak_url" do |url|
|
||||
it "works for #{url}" do
|
||||
expect(helper.extract_weak_url(url)).to eq(url)
|
||||
expect(helper.extract_weak_url("before #{url} after")).to eq(url)
|
||||
end
|
||||
end
|
||||
|
||||
%w[http://twitter.com/foo http://t.me/foo].each do |url|
|
||||
include_examples "extract_weak_url", url
|
||||
end
|
||||
end
|
||||
|
||||
describe "bare links" do
|
||||
it "works for t.me" do
|
||||
html = "https://t.me/lost_spots"
|
||||
sanitized = sanitize_description_html(html, bsky_model: true)
|
||||
expect(sanitized).to include("t.me/lost_spots")
|
||||
expect(sanitized).to include("href=\"https://t.me/lost_spots\"")
|
||||
end
|
||||
|
||||
it "works for twitter.com" do
|
||||
html = "https://twitter.com/lost_spots"
|
||||
sanitized = sanitize_description_html(html, bsky_model: true)
|
||||
expect(sanitized).to include("twitter.com/lost_spots")
|
||||
expect(sanitized).to include("href=\"https://twitter.com/lost_spots\"")
|
||||
end
|
||||
end
|
||||
|
||||
describe "relative links" do
|
||||
it "works for posts" do
|
||||
post1 =
|
||||
|
||||
Reference in New Issue
Block a user