Files
redux-scraper/app/helpers/domain/descriptions_helper.rb
2025-08-13 08:20:32 +00:00

375 lines
12 KiB
Ruby

# typed: strict
# frozen_string_literal: true
require "dtext"
module Domain::DescriptionsHelper
extend T::Sig
extend T::Helpers
include HelpersInterface
include Domain::PostsHelper
include Domain::DomainsHelper
include Domain::UsersHelper
requires_ancestor { Object }
abstract!
sig do
params(assumed_host: String, url_string: String).returns(
T.nilable(Addressable::URI),
)
end
def try_parse_uri(assumed_host, url_string)
extracted = URI.extract(url_string).first || url_string
# if the url string starts with a slash, add the assumed host to it
extracted = assumed_host + extracted if extracted.starts_with?("/")
# if the url string doesn't have a protocol, add https:// to it
unless extracted.starts_with?("http") && extracted.include?("://")
extracted = "https://" + extracted
end
uri = Addressable::URI.parse(extracted)
uri.host ||= assumed_host
uri.scheme ||= "https"
uri
rescue Addressable::URI::InvalidURIError
nil
end
sig { params(text: String, url: String).returns(T::Boolean) }
def text_same_as_url?(text, url)
text = text.strip.downcase
url = url.strip.downcase
["", "http://", "https://"].any? { |prefix| "#{prefix}#{text}" == url }
end
sig { params(model: HasDescriptionHtmlForView).returns(T.nilable(String)) }
def description_section_class_for_model(model)
case model
when Domain::Post::FaPost, Domain::User::FaUser
"bg-slate-700 p-4 text-slate-200 text-sm"
when Domain::Post::E621Post, Domain::User::E621User
"bg-slate-700 p-4 text-slate-200 text-sm"
else
nil
end
end
WEAK_URL_MATCHER_REGEX =
%r{(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)}
ALLOWED_INFERRED_URL_DOMAINS =
T.let(
%w[furaffinity.net inkbunny.net e621.net]
.flat_map { |domain| [domain, "www.#{domain}"] }
.freeze,
T::Array[String],
)
sig { params(model: HasDescriptionHtmlForView).returns(T.nilable(String)) }
def sanitize_description_html(model)
html = model.description_html_for_view
return nil if html.blank?
case model
when Domain::Post::E621Post
dtext_result = DText.parse(html)
return nil if dtext_result.blank?
html = dtext_result[0]
else
# profiles often contain bbcode, so first re-parse that
# for some reason, lots of duplicate <br> tags appear as well
html = html.gsub("<br>", "").strip
html = try_convert_bbcode_to_html(html)
end
replacements = {}
# Transform bare text that is not contained within an anchor tag into an anchor tag
text_link_transformer =
lambda do |env|
node = T.cast(env[:node], Nokogiri::XML::Node)
return if env[:is_allowlisted]
next unless node.text?
next unless node.ancestors("a").empty?
next unless (node_text = T.cast(node.text, T.nilable(String)))
next unless (match = node_text.match(WEAK_URL_MATCHER_REGEX))
next unless (url_text = match[0])
unless (
uri =
try_parse_uri(model.description_html_base_domain, url_text)
)
next
end
unless ALLOWED_PLAIN_TEXT_URL_DOMAINS.any? { |domain|
url_matches_domain?(domain, uri.host)
}
next
end
before, after = node.text.split(url_text, 2)
new_node = "#{before}<a href=\"#{url_text}\">#{url_text}</a>#{after}"
node.replace(new_node)
end
tag_class_and_style_transformer =
lambda do |env|
node = T.cast(env[:node], Nokogiri::XML::Node)
node_name = T.cast(env[:node_name], String)
return if env[:is_allowlisted] || !node.element?
# Convert bbcode_center class to text-align: center style
# and remove all other styling
add_node_styles = []
if node["class"]&.include?("bbcode_center")
add_node_styles << "text-align: center"
end
node.name = "div" if node_name == "code"
node.remove_attribute("class")
# add to original styles
node["style"] = (node["style"] || "")
.split(";")
.map(&:strip)
.concat(add_node_styles)
.map { |s| s + ";" }
.join(" ")
end
link_to_model_link_transformer =
lambda do |env|
node = T.cast(env[:node], Nokogiri::XML::Node)
node_name = T.cast(env[:node_name], String)
next if env[:is_allowlisted] || !node.element?
# Only allow and transform FA links
if node_name == "a"
href_str = node["href"]&.downcase || ""
url = try_parse_uri(model.description_html_base_domain, href_str)
next { node_whitelist: [] } if url.nil?
found_link = link_for_source(url.to_s)
if found_link.present? && (found_model = found_link.model)
partial, locals =
case found_model
when Domain::Post
[
"domain/has_description_html/inline_link_domain_post",
{
post: found_model,
link_text: node.text,
visual_style: "description-section-link",
},
]
when Domain::User
[
"domain/has_description_html/inline_link_domain_user",
{
user: found_model,
link_text: node.text,
visual_style: "description-section-link",
},
]
else
raise "Unknown model type: #{found_link.model.class}"
end
replacements[node] = Nokogiri::HTML5.fragment(
render(partial:, locals:),
)
next { node_whitelist: [node] }
else
if ALLOWED_EXTERNAL_LINK_DOMAINS.any? { |domain|
url_matches_domain?(domain, url.host)
}
if node.text.blank? || text_same_as_url?(node.text, url.to_s)
title = title_for_url(url.to_s)
else
title = node.text
end
replacements[node] = Nokogiri::HTML5.fragment(
render(
partial: "domain/has_description_html/inline_link_external",
locals: {
url: url.to_s,
title:,
icon_path: icon_path_for_domain(url.host),
},
),
)
next { node_whitelist: [node] }
end
end
end
end
disallowed_link_transformer =
lambda do |env|
node = T.cast(env[:node], Nokogiri::XML::Node)
node_name = T.cast(env[:node_name], String)
return if env[:is_allowlisted] || !node.element?
if node_name == "a"
# by the time we're here, we know this is not a valid link node,
# and it should be replaced with its text
node.replace(node.inner_html)
end
end
sanitizer =
Sanitize.new(
elements: %w[a code div br img b i span strong hr p],
attributes: {
"a" => %w[href class],
:all => %w[class style],
},
css: {
properties: %w[font-size color text-align class],
},
transformers: [
text_link_transformer,
tag_class_and_style_transformer,
link_to_model_link_transformer,
disallowed_link_transformer,
],
)
fragment = Nokogiri::HTML5.fragment(sanitizer.send(:preprocess, html))
sanitizer.node!(fragment)
replacements.each { |node, replacement| node.replace(replacement) }
raw fragment.to_html(preserve_newline: true)
rescue StandardError
raise if Rails.env == "staging" || Rails.env.test? || Rails.env.development?
# if anything goes wrong in production, bail out and don't display anything
"(error generating description)"
end
sig { params(visual_style: String).returns(String) }
def link_classes_for_visual_style(visual_style)
case visual_style
when "sky-link"
"blue-link truncate"
when "description-section-link"
[
"text-sky-200 border-slate-200",
"border border-transparent hover:border-slate-300 hover:text-sky-800 hover:bg-slate-100",
"rounded-md px-1 transition-all",
"inline-flex items-center align-bottom",
].join(" ")
when "description-section-link-light"
[
"text-sky-600 border-slate-300",
"border border-transparent hover:border-slate-500 hover:text-sky-800 hover:bg-slate-200",
"rounded-md px-1 transition-all",
"inline-flex items-center align-bottom",
].join(" ")
else
"blue-link"
end
end
sig do
params(user: Domain::User, visual_style: String, icon_size: String).returns(
T::Hash[Symbol, T.untyped],
)
end
def props_for_user_hover_preview(user, visual_style, icon_size)
cache_key = [
user,
policy(user),
"popover_inline_link_domain_user",
icon_size,
]
Rails
.cache
.fetch(cache_key) do
num_posts =
user.has_created_posts? ? user.user_post_creations.size : nil
registered_at = domain_user_registered_at_string_for_view(user)
num_followed_by =
user.has_followed_by_users? ? user.user_user_follows_to.size : nil
num_followed =
user.has_followed_users? ? user.user_user_follows_from.size : nil
avatar_thumb_size = icon_size == "large" ? "64-avatar" : "32-avatar"
{
iconSize: icon_size,
linkText: user.name_for_view,
userId: user.to_param,
userName: user.name_for_view,
userPath: domain_user_path(user),
userSmallAvatarPath:
domain_user_avatar_img_src_path(
user.avatar,
thumb: avatar_thumb_size,
),
userAvatarPath: domain_user_avatar_img_src_path(user.avatar),
userAvatarAlt: "View #{user.name_for_view}'s profile",
userDomainIcon: domain_model_icon_path(user),
userNumPosts: num_posts,
userRegisteredAt: registered_at,
userNumFollowedBy: num_followed_by,
userNumFollowed: num_followed,
}
end
.then do |props|
props[:visualStyle] = visual_style
props
end
end
sig do
params(
post: Domain::Post,
link_text: String,
visual_style: String,
domain_icon: T::Boolean,
link_params: T::Hash[Symbol, T.untyped],
).returns(T::Hash[Symbol, T.untyped])
end
def props_for_post_hover_preview(
post,
link_text,
visual_style,
domain_icon: true,
link_params: {}
)
cache_key = [
post,
policy(post),
link_text,
"popover_inline_link_domain_post",
]
Rails
.cache
.fetch(cache_key) do
{
linkText: link_text,
postId: post.to_param,
postTitle: post.title,
postPath:
Rails.application.routes.url_helpers.domain_post_path(
post,
link_params,
),
postThumbnailPath: thumbnail_for_post_path(post),
postThumbnailAlt: "View on #{domain_name_for_model(post)}",
postDomainIcon: domain_icon ? domain_model_icon_path(post) : nil,
}.then do |props|
if creator = post.primary_creator_for_view
props[:creatorName] = creator.name_for_view
props[:creatorAvatarPath] = user_avatar_path_for_view(creator)
end
props
end
end
.then do |props|
props[:visualStyle] = visual_style
props
end
end
end