fix bsky user profile link sanitizer
This commit is contained in:
@@ -57,7 +57,12 @@ module Domain::DescriptionsHelper
|
|||||||
end
|
end
|
||||||
|
|
||||||
WEAK_URL_MATCHER_REGEX =
|
WEAK_URL_MATCHER_REGEX =
|
||||||
%r{(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
|
%r{(http(s)?:\/\/)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
|
||||||
|
|
||||||
|
sig { params(str: String).returns(T.nilable(String)) }
|
||||||
|
def extract_weak_url(str)
|
||||||
|
str.match(WEAK_URL_MATCHER_REGEX)&.[](0)
|
||||||
|
end
|
||||||
|
|
||||||
ALLOWED_INFERRED_URL_DOMAINS =
|
ALLOWED_INFERRED_URL_DOMAINS =
|
||||||
T.let(
|
T.let(
|
||||||
@@ -72,6 +77,16 @@ module Domain::DescriptionsHelper
|
|||||||
html = model.description_html_for_view
|
html = model.description_html_for_view
|
||||||
return nil if html.blank?
|
return nil if html.blank?
|
||||||
|
|
||||||
|
is_bsky_description = model.is_a?(Domain::User::BlueskyUser)
|
||||||
|
visual_style =
|
||||||
|
(
|
||||||
|
if model.is_a?(Domain::User::BlueskyUser)
|
||||||
|
"description-section-link-light"
|
||||||
|
else
|
||||||
|
"description-section-link"
|
||||||
|
end
|
||||||
|
)
|
||||||
|
|
||||||
case model
|
case model
|
||||||
when Domain::Post::E621Post
|
when Domain::Post::E621Post
|
||||||
dtext_result = DText.parse(html)
|
dtext_result = DText.parse(html)
|
||||||
@@ -95,17 +110,23 @@ module Domain::DescriptionsHelper
|
|||||||
next unless node.text?
|
next unless node.text?
|
||||||
next unless node.ancestors("a").empty?
|
next unless node.ancestors("a").empty?
|
||||||
next unless (node_text = T.cast(node.text, T.nilable(String)))
|
next unless (node_text = T.cast(node.text, T.nilable(String)))
|
||||||
next unless (match = node_text.match(WEAK_URL_MATCHER_REGEX))
|
next unless (url_text = extract_weak_url(node_text))
|
||||||
next unless (url_text = match[0])
|
next if url_text.blank?
|
||||||
unless (
|
unless (
|
||||||
uri =
|
uri =
|
||||||
try_parse_uri(model.description_html_base_domain, url_text)
|
try_parse_uri(model.description_html_base_domain, url_text)
|
||||||
)
|
)
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
unless ALLOWED_PLAIN_TEXT_URL_DOMAINS.any? { |domain|
|
if is_bsky_description
|
||||||
url_matches_domain?(domain, uri.host)
|
unless ALLOWED_EXTERNAL_LINK_DOMAINS.any? { |domain|
|
||||||
}
|
url_matches_domain?(domain, uri.host)
|
||||||
|
}
|
||||||
|
next
|
||||||
|
end
|
||||||
|
elsif ALLOWED_PLAIN_TEXT_URL_DOMAINS.none? do |domain|
|
||||||
|
url_matches_domain?(domain, uri.host)
|
||||||
|
end
|
||||||
next
|
next
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -157,20 +178,12 @@ module Domain::DescriptionsHelper
|
|||||||
when Domain::Post
|
when Domain::Post
|
||||||
[
|
[
|
||||||
"domain/has_description_html/inline_link_domain_post",
|
"domain/has_description_html/inline_link_domain_post",
|
||||||
{
|
{ post: found_model, link_text: node.text, visual_style: },
|
||||||
post: found_model,
|
|
||||||
link_text: node.text,
|
|
||||||
visual_style: "description-section-link",
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
when Domain::User
|
when Domain::User
|
||||||
[
|
[
|
||||||
"domain/has_description_html/inline_link_domain_user",
|
"domain/has_description_html/inline_link_domain_user",
|
||||||
{
|
{ user: found_model, link_text: node.text, visual_style: },
|
||||||
user: found_model,
|
|
||||||
link_text: node.text,
|
|
||||||
visual_style: "description-section-link",
|
|
||||||
},
|
|
||||||
]
|
]
|
||||||
else
|
else
|
||||||
raise "Unknown model type: #{found_link.model.class}"
|
raise "Unknown model type: #{found_link.model.class}"
|
||||||
@@ -191,14 +204,24 @@ module Domain::DescriptionsHelper
|
|||||||
end
|
end
|
||||||
|
|
||||||
replacements[node] = Nokogiri::HTML5.fragment(
|
replacements[node] = Nokogiri::HTML5.fragment(
|
||||||
render(
|
if is_bsky_description
|
||||||
partial: "domain/has_description_html/inline_link_external",
|
render(
|
||||||
locals: {
|
partial: "domain/has_description_html/external_link",
|
||||||
url: url.to_s,
|
locals: {
|
||||||
title:,
|
link_text: node.text,
|
||||||
icon_path: icon_path_for_domain(url.host),
|
url: url.to_s,
|
||||||
},
|
},
|
||||||
),
|
)
|
||||||
|
else
|
||||||
|
render(
|
||||||
|
partial: "domain/has_description_html/inline_link_external",
|
||||||
|
locals: {
|
||||||
|
url: url.to_s,
|
||||||
|
title:,
|
||||||
|
icon_path: icon_path_for_domain(url.host),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
end,
|
||||||
)
|
)
|
||||||
next { node_whitelist: [node] }
|
next { node_whitelist: [node] }
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ module Domain::DomainsHelper
|
|||||||
e621.net
|
e621.net
|
||||||
furaffinity.net
|
furaffinity.net
|
||||||
inkbunny.net
|
inkbunny.net
|
||||||
|
bsky.app
|
||||||
].freeze
|
].freeze
|
||||||
|
|
||||||
# If a link is detected in an anchor tag and is one of these domains,
|
# If a link is detected in an anchor tag and is one of these domains,
|
||||||
|
|||||||
@@ -242,7 +242,7 @@ class Domain::Bluesky::Job::ScanPostsJob < Domain::Bluesky::Job::Base
|
|||||||
def enqueue_pending_files_job(post)
|
def enqueue_pending_files_job(post)
|
||||||
post.files.each do |post_file|
|
post.files.each do |post_file|
|
||||||
if post_file.state_pending?
|
if post_file.state_pending?
|
||||||
defer_job(Domain::StaticFileJob, { post_file: })
|
defer_job(Domain::StaticFileJob, { post_file: }, { queue: "bluesky" })
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -69,7 +69,11 @@ class Bluesky::ProcessPostHelper
|
|||||||
set_alt_text(post_file, image_data["alt"])
|
set_alt_text(post_file, image_data["alt"])
|
||||||
|
|
||||||
post_file.save!
|
post_file.save!
|
||||||
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: })
|
@deferred_job_sink.defer_job(
|
||||||
|
Domain::StaticFileJob,
|
||||||
|
{ post_file: },
|
||||||
|
{ queue: "bluesky" },
|
||||||
|
)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
format_tags(
|
format_tags(
|
||||||
@@ -93,7 +97,11 @@ class Bluesky::ProcessPostHelper
|
|||||||
set_aspect_ratio(post_file, embed_data["aspectRatio"])
|
set_aspect_ratio(post_file, embed_data["aspectRatio"])
|
||||||
set_alt_text(post_file, embed_data["alt"])
|
set_alt_text(post_file, embed_data["alt"])
|
||||||
post_file.save!
|
post_file.save!
|
||||||
@deferred_job_sink.defer_job(Domain::StaticFileJob, { post_file: })
|
@deferred_job_sink.defer_job(
|
||||||
|
Domain::StaticFileJob,
|
||||||
|
{ post_file: },
|
||||||
|
{ queue: "bluesky" },
|
||||||
|
)
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
format_tags(
|
format_tags(
|
||||||
|
|||||||
@@ -97,6 +97,28 @@ namespace :bluesky do
|
|||||||
puts "added #{user.did} / #{user.handle} to monitor"
|
puts "added #{user.did} / #{user.handle} to monitor"
|
||||||
added += 1
|
added += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if added % 25 == 0
|
||||||
|
loop do
|
||||||
|
queue_size =
|
||||||
|
GoodJob::Job
|
||||||
|
.where("queue_name IN ('bluesky', 'static_file')")
|
||||||
|
.where(finished_at: nil, performed_at: nil, error: nil)
|
||||||
|
.where(
|
||||||
|
[
|
||||||
|
"(serialized_params->'exception_executions' = '{}')",
|
||||||
|
"(serialized_params->'exception_executions' is null)",
|
||||||
|
].join(" OR "),
|
||||||
|
)
|
||||||
|
.count
|
||||||
|
puts "queue size: #{queue_size}"
|
||||||
|
if queue_size > 150
|
||||||
|
sleep 10
|
||||||
|
else
|
||||||
|
break
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
else
|
else
|
||||||
puts "user not found: #{line}"
|
puts "user not found: #{line}"
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -20,10 +20,22 @@ class ModelWithDescriptionHtml
|
|||||||
end
|
end
|
||||||
|
|
||||||
RSpec.describe Domain::DescriptionsHelper, type: :helper do
|
RSpec.describe Domain::DescriptionsHelper, type: :helper do
|
||||||
def sanitize_description_html(html, base_domain = "www.furaffinity.net")
|
def sanitize_description_html(
|
||||||
helper.sanitize_description_html(
|
html,
|
||||||
ModelWithDescriptionHtml.new(base_domain, html),
|
base_domain = "www.furaffinity.net",
|
||||||
)
|
bsky_model: false
|
||||||
|
)
|
||||||
|
model =
|
||||||
|
if bsky_model
|
||||||
|
create(
|
||||||
|
:domain_user_bluesky_user,
|
||||||
|
handle: "lost_spots",
|
||||||
|
description: html,
|
||||||
|
)
|
||||||
|
else
|
||||||
|
ModelWithDescriptionHtml.new(base_domain, html)
|
||||||
|
end
|
||||||
|
helper.sanitize_description_html(model)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Mock the policy for posts to avoid Devise authentication errors
|
# Mock the policy for posts to avoid Devise authentication errors
|
||||||
@@ -225,6 +237,35 @@ RSpec.describe Domain::DescriptionsHelper, type: :helper do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe "#extract_weak_url" do
|
||||||
|
shared_examples "extract_weak_url" do |url|
|
||||||
|
it "works for #{url}" do
|
||||||
|
expect(helper.extract_weak_url(url)).to eq(url)
|
||||||
|
expect(helper.extract_weak_url("before #{url} after")).to eq(url)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
%w[http://twitter.com/foo http://t.me/foo].each do |url|
|
||||||
|
include_examples "extract_weak_url", url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "bare links" do
|
||||||
|
it "works for t.me" do
|
||||||
|
html = "https://t.me/lost_spots"
|
||||||
|
sanitized = sanitize_description_html(html, bsky_model: true)
|
||||||
|
expect(sanitized).to include("t.me/lost_spots")
|
||||||
|
expect(sanitized).to include("href=\"https://t.me/lost_spots\"")
|
||||||
|
end
|
||||||
|
|
||||||
|
it "works for twitter.com" do
|
||||||
|
html = "https://twitter.com/lost_spots"
|
||||||
|
sanitized = sanitize_description_html(html, bsky_model: true)
|
||||||
|
expect(sanitized).to include("twitter.com/lost_spots")
|
||||||
|
expect(sanitized).to include("href=\"https://twitter.com/lost_spots\"")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
describe "relative links" do
|
describe "relative links" do
|
||||||
it "works for posts" do
|
it "works for posts" do
|
||||||
post1 =
|
post1 =
|
||||||
|
|||||||
Reference in New Issue
Block a user