add dtext parsing

This commit is contained in:
Dylan Knutson
2025-03-01 03:47:20 +00:00
parent 7437586dda
commit ad3d564d58
19 changed files with 197 additions and 62 deletions

View File

@@ -130,6 +130,9 @@ gem "zstd-ruby"
gem "rouge"
gem "docx"
gem "ruby-bbcode"
gem "dtext_rb",
git: "https://github.com/e621ng/dtext_rb",
ref: "5ef8fd7a5205c832f4c18197911717e7d491494e"
# gem "pghero", git: "https://github.com/dymk/pghero", ref: "e314f99"
gem "pghero", "~> 3.6"

View File

@@ -1,3 +1,10 @@
GIT
remote: https://github.com/e621ng/dtext_rb
revision: 5ef8fd7a5205c832f4c18197911717e7d491494e
ref: 5ef8fd7a5205c832f4c18197911717e7d491494e
specs:
dtext_rb (1.11.0)
GIT
remote: https://github.com/railsjazz/rails_live_reload
revision: dcd3b73904594e2c5134c2f6e05954f3937a8d29
@@ -563,6 +570,7 @@ DEPENDENCIES
discard
disco
docx
dtext_rb!
factory_bot_rails
faiss
good_job (~> 4.6)

View File

@@ -14,6 +14,10 @@
@apply px-4 py-3 font-medium text-slate-900;
}
.sky-section-header {
@apply px-4 py-3 font-medium text-slate-900;
}
.sky-link {
@apply text-sky-600 underline decoration-dotted transition-colors hover:text-sky-800;
}

View File

@@ -1,5 +1,7 @@
# typed: strict
# frozen_string_literal: true
require "dtext"
module Domain::DescriptionsHelper
extend T::Sig
extend T::Helpers
@@ -46,20 +48,23 @@ module Domain::DescriptionsHelper
html = model.description_html_for_view
return nil if html.blank?
# profiles often contain bbcode, so first re-parse that
# for some reason, lots of duplicate <br> tags appear as well
html = html.gsub("<br>", "").strip
begin
html = T.cast(T.unsafe(html).bbcode_to_html(false), String)
rescue RuntimeError
# if the bbcode is invalid, skip parsing it
case model
when Domain::Post::E621Post
dtext_result = DText.parse(html)
return nil if dtext_result.blank?
html = dtext_result[0]
else
# profiles often contain bbcode, so first re-parse that
# for some reason, lots of duplicate <br> tags appear as well
html = html.gsub("<br>", "").strip
html = try_convert_bbcode_to_html(html)
end
replacements = {}
sanitizer =
Sanitize.new(
elements: %w[code div br img b i span strong hr],
elements: %w[code div br img b i span strong hr p],
attributes: {
all: %w[class style],
},
@@ -152,7 +157,8 @@ module Domain::DescriptionsHelper
replacements.each { |node, replacement| node.replace(replacement) }
raw fragment.to_html(preserve_newline: true)
rescue StandardError
# if anything goes wrong, bail out and don't display anything
raise if Rails.env == "staging"
# if anything goes wrong in production, bail out and don't display anything
"(error generating description)"
end
end

View File

@@ -375,6 +375,11 @@ module Domain::PostsHelper
nil
end
sig { params(post: Domain::Post::FaPost).returns(T::Array[String]) }
def keywords_for_fa_post(post)
post.keywords.map(&:strip).reject(&:blank?).compact
end
private
sig { params(url: String).returns(T.nilable(String)) }

28
app/helpers/ui_helper.rb Normal file
View File

@@ -0,0 +1,28 @@
# typed: strict
# frozen_string_literal: true
module UiHelper
extend T::Sig
extend T::Helpers
include HelpersInterface
sig do
params(
title: String,
has_collapse_widget: T::Boolean,
kwargs: T.untyped,
block: T.proc.void,
).returns(String)
end
def sky_section_tag(title, has_collapse_widget: false, **kwargs, &block)
content = capture(&block)
kwargs[:class] ||= "bg-slate-100 p-4"
content_tag(:div, class: "sky-section") do
concat content_tag(:div, title, class: "sky-section-header")
concat(
content_tag(:div, class: "sky-section-body #{kwargs[:class]}") do
concat content
end,
)
end
end
end

View File

@@ -1,5 +1,4 @@
# typed: strict
class DeferredJob < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
@@ -24,17 +23,3 @@ class DeferredJob < T::ImmutableStruct
.join("")
end
end
class SuppressedJob < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :job_class, T.class_of(Scraper::JobBase)
const :params, T::Hash[Symbol, T.untyped]
sig { params(job: DeferredJob).returns(T::Boolean) }
def matches?(job)
job.job_class == job_class &&
params.all? { |key, value| job.params[key] == value }
end
end

14
app/lib/suppressed_job.rb Normal file
View File

@@ -0,0 +1,14 @@
# typed: strict
class SuppressedJob < T::ImmutableStruct
extend T::Sig
include T::Struct::ActsAsComparable
const :job_class, T.class_of(Scraper::JobBase)
const :params, T::Hash[Symbol, T.untyped]
sig { params(job: DeferredJob).returns(T::Boolean) }
def matches?(job)
job.job_class == job_class &&
params.all? { |key, value| job.params[key] == value }
end
end

View File

@@ -129,6 +129,11 @@ class Domain::Post::E621Post < Domain::Post
self.class.first_valid_artist_tag_in(self.artists_array)
end
sig { override.returns(T.nilable(String)) }
def description_html_for_view
self.description
end
INVALID_ARTIST_TAGS = %w[unknown unknown_artist sound_warning].freeze
sig { params(list: T.nilable(T::Array[String])).returns(T.nilable(String)) }

View File

@@ -1,10 +1 @@
<section class="sky-section">
<div class="section-header"><%= description_title %></div>
<% if (description_html = sanitize_description_html(model)) %>
<div class="bg-slate-800 p-4 text-slate-200">
<%= description_html %>
</div>
<% else %>
<div class="p-4 text-center text-slate-500"><%= no_description_text %></div>
<% end %>
</section>
<h1>REPLACE ME</h1>

View File

@@ -0,0 +1,8 @@
<% description_html = sanitize_description_html(post) %>
<%= sky_section_tag("Description") do %>
<% if description_html %>
<%= description_html %>
<% else %>
<div class="p-4 text-center text-slate-500">No description available</div>
<% end %>
<% end %>

View File

@@ -0,0 +1,8 @@
<% description_html = sanitize_description_html(post) %>
<%= sky_section_tag("Description", class: description_html ? "bg-slate-700 p-4 text-slate-200" : nil) do %>
<% if description_html %>
<%= description_html %>
<% else %>
<div class="p-4 text-center text-slate-500">No description available</div>
<% end %>
<% end %>

View File

@@ -1,8 +1,6 @@
<section class="sky-section">
<div class="section-header">Tags</div>
<div class="bg-slate-100 p-4">
<% if post.tags_array.any? %>
<% tags_array =
<%= sky_section_tag("Tags") do %>
<% if post.tags_array.any? %>
<% tags_array =
(
if post.tags_array.is_a?(Hash)
post.tags_array
@@ -10,23 +8,22 @@
{ "general" => post.tags_array }
end
) %>
<div class="flex flex-wrap gap-2">
<% tag_category_order.each do |category| %>
<% (tags_array[category.to_s] || []).each do |tag| %>
<span
<div class="flex flex-wrap gap-2">
<% tag_category_order.each do |category| %>
<% (tags_array[category.to_s] || []).each do |tag| %>
<span
class="<%= tailwind_tag_category_class(category) %> rounded px-2 py-1 text-sm text-slate-600"
>
<% icon = font_awesome_category_icon(category) %>
<% if icon %>
<i class="fa-solid <%= icon %> mr-1"></i>
<% end %>
<%= tag %>
</span>
<% end %>
<% icon = font_awesome_category_icon(category) %>
<% if icon %>
<i class="fa-solid <%= icon %> mr-1"></i>
<% end %>
<%= tag %>
</span>
<% end %>
</div>
<% else %>
<div class="text-center italic text-slate-400">No tags</div>
<% end %>
</div>
</section>
<% end %>
</div>
<% else %>
<div class="text-center italic text-slate-400">No tags</div>
<% end %>
<% end %>

View File

@@ -0,0 +1,8 @@
<% description_html = sanitize_description_html(post) %>
<%= sky_section_tag("Description", class: description_html ? "bg-slate-700 p-4 text-slate-200" : nil) do %>
<% if description_html %>
<%= description_html %>
<% else %>
<div class="p-4 text-center text-slate-500">No description available</div>
<% end %>
<% end %>

View File

@@ -0,0 +1,20 @@
<%= sky_section_tag("Keywords") do %>
<% keywords = keywords_for_fa_post(post) %>
<% if keywords.any? %>
<div class="flex flex-wrap gap-2">
<% keywords.each do |keyword| %>
<span
class="<%= tailwind_tag_category_class(:general) %> rounded px-2 py-1 text-sm text-slate-600"
>
<% icon = font_awesome_category_icon(:general) %>
<% if icon %>
<i class="fa-solid <%= icon %> mr-1"></i>
<% end %>
<%= keyword %>
</span>
<% end %>
</div>
<% else %>
<div class="text-center italic text-slate-400">No keywords</div>
<% end %>
<% end%>

View File

@@ -2,12 +2,7 @@
<%= render_for_model(@post, "section_post_title", as: :post) %>
<%= render_for_model(@post, "section_post_groups", as: :post) %>
<%= render_for_model(@post, "section_primary_file", as: :post) %>
<% cache [@post, "section_description_sanitized"], expires_in: 1.hour do %>
<%= render "domain/has_description_html/section_description_sanitized",
model: @post,
description_title: "Description",
no_description_text: "No description available" %>
<% end %>
<%= render_for_model(@post, "section_description", as: :post) %>
<%= render_for_model(@post, "section_tags", as: :post) %>
<%= render_for_model(@post, "section_sources", as: :post) %>
<%= render_for_model(@post, "section_similar_posts", as: :post) %>

View File

@@ -0,0 +1,9 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for types exported from the `dtext_rb` gem.
# Please instead update this file by running `bin/tapioca gem dtext_rb`.
# THIS IS AN EMPTY RBI FILE.
# see https://github.com/Shopify/tapioca#manually-requiring-parts-of-a-gem

View File

@@ -0,0 +1,22 @@
# typed: true
class DText
extend T::Sig
sig do
params(
str: String,
inline: T::Boolean,
allow_color: T::Boolean,
max_thumbs: Integer,
base_url: T.nilable(String),
).returns(T.nilable([String, String]))
end
def self.parse(
str,
inline: false,
allow_color: false,
max_thumbs: 25,
base_url: nil
)
end
end

View File

@@ -75,4 +75,23 @@ module HelpersInterface
sig { params(src: String, options: T.untyped).returns(String) }
def image_tag(src, options = {})
end
sig { params(block: T.proc.void).returns(String) }
def capture(&block)
end
sig do
params(
name: Symbol,
args: T.untyped,
kwargs: T.untyped,
block: T.nilable(T.proc.returns(String)),
).returns(String)
end
def content_tag(name, *args, **kwargs, &block)
end
sig { params(string: String).returns(String) }
def concat(string)
end
end