better description sanitization

This commit is contained in:
Dylan Knutson
2025-02-15 06:42:55 +00:00
parent e375570a0f
commit c1b3887c58
8 changed files with 132 additions and 169 deletions

View File

@@ -123,16 +123,30 @@ module Domain::PostsHelper
nil
end
fa_host_matcher = /(www\.)?furaffinity\.net/
fa_post_matcher = %r{/view/(\d+)/?}
fa_user_matcher = %r{/user/(\w+)/?}
if uri.nil?
if href.match?(fa_host_matcher)
if m = href.match(fa_post_matcher)
fa_id = m[1].to_i
fa_post_id_to_node[fa_id] = node
next { node_whitelist: [node] }
elsif m = href.match(fa_user_matcher)
fa_url_name = m[1]
fa_user_url_name_to_node[fa_url_name] = node
next { node_whitelist: [node] }
end
end
end
valid_type = !uri.is_a?(URI::MailTo)
next { node_whitelist: [node] } if uri.nil? || !valid_type
next { node_whitelist: [] } if uri.nil? || !valid_type
uri.host ||= "www.furaffinity.net"
path = uri.path
fa_host_matcher = /^(www\.)?furaffinity\.net$/
fa_post_matcher = %r{^/view/(\d+)/?$}
fa_user_matcher = %r{^/user/(\w+)/?$}
if fa_host_matcher.match?(uri.host) && path
if match = path.match(fa_post_matcher)
fa_id = match[1].to_i
@@ -158,7 +172,9 @@ module Domain::PostsHelper
if fa_post_id_to_node.any?
# Batch load posts and their titles, ensuring fa_post_ids are strings
posts_by_id =
Domain::Fa::Post.where(fa_id: fa_post_id_to_node.keys).index_by(&:fa_id)
Domain::Post::FaPost.where(fa_id: fa_post_id_to_node.keys).index_by(
&:fa_id
)
# Replace the link text with post titles if available
fa_post_id_to_node.each do |fa_id, node|
@@ -166,7 +182,7 @@ module Domain::PostsHelper
node.replace(
Nokogiri::HTML5.fragment(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post,
},
@@ -182,7 +198,7 @@ module Domain::PostsHelper
if fa_user_url_name_to_node.any?
# Batch load users and their names, ensuring fa_user_url_names are strings
users_by_url_name =
Domain::Fa::User
Domain::User::FaUser
.where(url_name: fa_user_url_name_to_node.keys)
.includes(:avatar)
.index_by(&:url_name)
@@ -193,7 +209,7 @@ module Domain::PostsHelper
node.replace(
Nokogiri::HTML5.fragment(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user,
},

View File

@@ -20,6 +20,27 @@ module Domain::UsersHelper
asset_path("user-circle.svg")
end
end
sig do
params(
avatar: T.nilable(Domain::UserAvatar),
thumb: T.nilable(String),
).returns(String)
end
def domain_user_avatar_img_tag(avatar, thumb: nil)
if (sha256 = avatar&.log_entry&.response_sha256)
image_tag(
blob_path(HexUtil.bin2hex(sha256), format: "jpg", thumb: thumb),
class: "inline-block h-4 w-4 flex-shrink-0 rounded-sm object-cover",
alt: avatar&.user&.name_for_view || "user avatar",
)
else
raw <<~SVG
<svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor" class="w-4 h-4">
<path stroke-linecap="round" stroke-linejoin="round" d="M15.75 6a3.75 3.75 0 11-7.5 0 3.75 3.75 0 017.5 0zM4.501 20.118a7.5 7.5 0 0114.998 0A17.933 17.933 0 0112 21.75c-2.676 0-5.216-.584-7.499-1.632z" />
</svg>
SVG
end
end
sig { params(user: Domain::User).returns(String) }
def site_name_for_user(user)
@@ -69,90 +90,6 @@ module Domain::UsersHelper
"#{domain_user_path(user)}/favorites"
end
sig { params(html: String).returns(String) }
def sanitized_user_profile_html(html)
# try to preload all the FA usernames in the profile
maybe_url_names =
Nokogiri
.HTML(html)
.css("a")
.flat_map do |node|
node = T.cast(node, Nokogiri::XML::Element)
href = T.cast(Addressable::URI.parse(node["href"]), Addressable::URI)
right_host = href.host.nil? || href.host == "www.furaffinity.net"
right_path = href.path =~ %r{/user/.+}
if right_host && right_path
[href]
else
[]
end
end
.map do |href|
T.cast(href.path.split("/")[2]&.downcase, T.nilable(String))
end
preloaded_users =
T.cast(
Domain::User::FaUser
.where(url_name: maybe_url_names)
.includes(:avatar)
.index_by(&:url_name),
T::Hash[String, Domain::User],
)
raw Sanitize.fragment(
html,
elements: %w[br img b i span strong],
attributes: {
"span" => %w[style],
"a" => [],
},
css: {
properties: %w[font-size color],
},
transformers:
Kernel.lambda do |env|
return unless env[:node_name] == "a"
node = T.cast(env[:node], Nokogiri::XML::Element)
href = URI.parse(node["href"])
unless href.host == nil || href.host == "www.furaffinity.net"
return
end
return unless href.path =~ %r{/user/.+}
url_name = href.path&.split("/")&.[](2)&.downcase
return unless url_name.present?
Sanitize.node!(
node,
{ elements: %w[a], attributes: { "a" => %w[href] } },
)
user =
preloaded_users[url_name] ||
Domain::User::FaUser.find_by(url_name: url_name)
node["href"] = domain_user_path(user) if user.present?
node["class"] = "text-slate-200 underline decoration-slate-200 " +
"decoration-dashed decoration-dashed decoration-1"
node_allowlist = [node]
if user.present?
avatar = user.avatar
img = Nokogiri::XML::Node.new("img", node.document)
img["class"] = "inline w-5"
img["src"] = domain_user_avatar_img_src_path(
avatar,
thumb: "32-avatar",
)
node.prepend_child(img)
node_allowlist << img
end
{ node_allowlist: }
end,
)
end
sig { params(user: Domain::User).returns(T::Array[[String, Integer]]) }
def stat_rows_for_user(user)
rows = []

View File

@@ -84,4 +84,8 @@ module HelpersInterface
end
def link_to(link_text, link_url, options = {}, &block)
end
sig { abstract.params(src: String, options: T.untyped).returns(String) }
def image_tag(src, options = {})
end
end

View File

@@ -206,8 +206,6 @@ class Domain::MigrateToDomain
user_post_creation.post_id = post.id
user_post_creation
end
rescue ActiveRecord::StatementInvalid
binding.pry
end
pb.progress = [pb.progress + batch.size, pb.total].min
end

View File

@@ -0,0 +1,6 @@
<%= link_to domain_post_path(post),
class:
"text-sky-200 transition-all hover:text-sky-800 inline-flex items-center hover:bg-gray-100 rounded-md gap-1 px-1" do %>
<i class="fa-regular fa-image h-4 w-4 flex-shrink-0"></i>
<span><%= post.title %></span>
<% end %>

View File

@@ -0,0 +1,6 @@
<%= link_to domain_user_path(user),
class:
"text-sky-200 transition-all hover:text-sky-800 inline-flex items-center hover:bg-gray-100 rounded-md gap-1 px-1 align-bottom" do %>
<%= domain_user_avatar_img_tag(user.avatar, thumb: "32-avatar") %>
<span><%= user.name %></span>
<% end %>

View File

@@ -3,7 +3,7 @@
<% if (profile_html = user.profile_html) %>
<div class="bg-slate-800 p-4 text-slate-200">
<% cache(user, expires_in: 12.hours) do %>
<%= sanitized_user_profile_html(profile_html) %>
<%= description_sanitized(profile_html) %>
<% end %>
</div>
<% else %>

View File

@@ -1,42 +1,7 @@
# typed: false
require "rails_helper"
RSpec.describe Domain::Fa::PostsHelper, type: :helper do
describe "#post_state_string" do
let(:post) { build(:domain_fa_post) }
context "when post has a file" do
before { allow(post).to receive(:have_file?).and_return(true) }
it 'returns "file"' do
expect(helper.post_state_string(post)).to eq("file")
end
end
context "when post is scanned but has no file" do
before do
allow(post).to receive(:have_file?).and_return(false)
allow(post).to receive(:scanned?).and_return(true)
end
it 'returns "scanned"' do
expect(helper.post_state_string(post)).to eq("scanned")
end
end
context "when post is neither scanned nor has file" do
before do
allow(post).to receive(:have_file?).and_return(false)
allow(post).to receive(:scanned?).and_return(false)
allow(post).to receive(:state).and_return("pending")
end
it "returns the post state" do
expect(helper.post_state_string(post)).to eq("pending")
end
end
end
RSpec.describe Domain::PostsHelper, type: :helper do
describe "#page_str" do
context "when page is greater than 1" do
it "returns page string" do
@@ -52,11 +17,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
end
end
describe "#fa_post_description_sanitized" do
describe "#description_sanitized" do
describe "basic HTML sanitization" do
it "works" do
sanitized =
helper.fa_post_description_sanitized(
helper.description_sanitized(
'<b>Bold</b> <i>Italic</i> <span style="color: red; font-size: 12px;">Styled</span> <script>alert("bad")</script>',
)
[
@@ -66,6 +31,28 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
].each { |text| expect(sanitized).to include(text) }
["<script>"].each { |text| expect(sanitized).not_to include(text) }
end
it "handles manually embedded links" do
user =
create(
:domain_user_fa_user,
url_name: "starringer",
name: "Starringer",
)
sanitized =
helper.description_sanitized(
"my mate &lt;<a href=\"http://www.furaffinity.net/user/starringer/&gt;.\" title=\"http://www.furaffinity.net/user/starringer/&gt;.\" class=\"auto_link\">http://www.furaffinity.net/user/starringer/&gt;.</a> It was fun.",
)
expect(sanitized).to include_html(
render(
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user,
},
),
)
end
end
describe "FA post link handling" do
@@ -83,12 +70,13 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
Furaffinity.net/view/123456
].each do |url|
it "processes #{url}" do
post = create(:domain_fa_post, fa_id: "123456", title: "Post Title")
post =
create(:domain_post_fa_post, fa_id: "123456", title: "Post Title")
html = %(<a href="#{url}">FA Link</a>)
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post,
},
@@ -100,7 +88,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "sanitizes nested content within valid FA links" do
html =
'<a href="https://www.furaffinity.net/view/123456/"><b>Bold</b> <script>bad</script> <span style="color: red; background: blue;">Text</span></a>'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html(
'<b>Bold</b> <span style="color: red; ">Text</span>',
@@ -109,10 +97,10 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
describe "post title lookup" do
let!(:post1) do
create(:domain_fa_post, fa_id: "123", title: "First Post")
create(:domain_post_fa_post, fa_id: "123", title: "First Post")
end
let!(:post2) do
create(:domain_fa_post, fa_id: "456", title: "Second Post")
create(:domain_post_fa_post, fa_id: "456", title: "Second Post")
end
it "replaces link text with post titles when posts exist" do
@@ -121,11 +109,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
<a href="https://www.furaffinity.net/view/123/">Original Text 1</a>
<a href="https://www.furaffinity.net/view/456/">Original Text 2</a>
'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post1,
},
@@ -133,7 +121,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post2,
},
@@ -144,7 +132,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "removes the link when the post doesn't exist" do
html =
'<a href="https://www.furaffinity.net/view/789/">Original Text</a>'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html("Original Text")
end
@@ -152,11 +140,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "replaces nested elements when replacing titles" do
html =
'<a href="https://www.furaffinity.net/view/123/"><b>Bold</b> <i>Text</i></a>'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post1,
},
@@ -173,7 +161,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
'
expect {
helper.fa_post_description_sanitized(html)
helper.description_sanitized(html)
}.to make_database_queries(count: 1)
end
end
@@ -193,7 +181,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
"<b>Bold</b> and <i>italic</i> text",
}.each do |input, expected|
it "processes '#{input}' correctly" do
sanitized = helper.fa_post_description_sanitized(input)
sanitized = helper.description_sanitized(input)
expect(sanitized).to eq_html(expected)
end
end
@@ -203,7 +191,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "only allows specified CSS properties" do
input =
'<span style="font-size: 12px; color: red; background: blue; position: absolute;">Test</span>'
sanitized = helper.fa_post_description_sanitized(input)
sanitized = helper.description_sanitized(input)
expect(sanitized).to include("font-size")
expect(sanitized).to include("color")
@@ -224,7 +212,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
},
}.each do |description, test_case|
it "handles #{description}" do
sanitized = helper.fa_post_description_sanitized(test_case[:input])
sanitized = helper.description_sanitized(test_case[:input])
expect(sanitized).to eq(test_case[:expected])
end
end
@@ -232,9 +220,13 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
describe "multiple link handling" do
it "correctly processes multiple links of different types" do
post1 = create(:domain_fa_post, fa_id: "123", title: "Post Title")
post1 = create(:domain_post_fa_post, fa_id: "123", title: "Post Title")
user1 =
create(:domain_fa_user, url_name: "username1", name: "User Name 1")
create(
:domain_user_fa_user,
url_name: "username1",
name: "User Name 1",
)
html =
'
@@ -243,11 +235,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
<a href="https://google.com">Google</a>
<a>No href</a>
'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
partial: "domain/posts/description/inline_link_domain_post",
locals: {
post: post1,
},
@@ -255,7 +247,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user1,
},
@@ -271,13 +263,17 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
describe "FA user link handling" do
let!(:user1) do
create(:domain_fa_user, url_name: "artistone", name: "Artist One")
create(:domain_user_fa_user, url_name: "artistone", name: "Artist One")
end
let!(:user2) do
create(:domain_fa_user, url_name: "artisttwo", name: "Artist Two")
create(:domain_user_fa_user, url_name: "artisttwo", name: "Artist Two")
end
let!(:user3) do
create(:domain_fa_user, url_name: "artistthree", name: "Artist Three")
create(
:domain_user_fa_user,
url_name: "artistthree",
name: "Artist Three",
)
end
it "replaces link text with user names when users exist" do
@@ -286,11 +282,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
<a href="https://www.furaffinity.net/user/artistone/">Original Text 1</a>
<a href="https://www.furaffinity.net/user/artisttwo/">Original Text 2</a>
'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user1,
},
@@ -299,7 +295,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user2,
},
@@ -310,7 +306,7 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "Removes the link when the user doesn't exist" do
html =
'<a href="https://www.furaffinity.net/user/nonexistent/">Original Text</a>'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html("Original Text")
end
@@ -318,11 +314,11 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
it "replaces nested elements when replacing names" do
html =
'<a href="https://www.furaffinity.net/user/artistone/"><b>Bold</b> <i>Text</i></a>'
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to include_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user1,
},
@@ -339,9 +335,9 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
<a href="https://www.furaffinity.net/user/nonexistent/">Link 4</a>
'
expect {
helper.fa_post_description_sanitized(html)
}.to make_database_queries(count: 2)
expect { helper.description_sanitized(html) }.to make_database_queries(
count: 2,
)
end
%w[
@@ -357,10 +353,10 @@ RSpec.describe Domain::Fa::PostsHelper, type: :helper do
].each do |url|
it "processes #{url}" do
html = %(<a href="#{url}">FA User Link</a>)
sanitized = helper.fa_post_description_sanitized(html)
sanitized = helper.description_sanitized(html)
expect(sanitized).to eq_html(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
partial: "domain/posts/description/inline_link_domain_user",
locals: {
user: user1,
},