Files
redux-scraper/app/lib/domain/fa/parser/submission_parser_helper.rb
2025-09-07 18:27:22 +00:00

431 lines
11 KiB
Ruby

# typed: strict
class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
VERSION_0 = Domain::Fa::Parser::Page::VERSION_0
VERSION_1 = Domain::Fa::Parser::Page::VERSION_1
VERSION_2 = Domain::Fa::Parser::Page::VERSION_2
sig do
params(
elem: Nokogiri::XML::Node,
phtml: String,
page_version: Symbol,
page: Domain::Fa::Parser::Page,
).void
end
def initialize(elem, phtml, page_version, page)
@elem = elem
@phtml = phtml
@page_version = page_version
@page = page
@id = T.let(nil, T.nilable(Integer))
@small_img = T.let(nil, T.nilable(String))
@title = T.let(nil, T.nilable(String))
@artist = T.let(nil, T.nilable(String))
@artist_user_page_path = T.let(nil, T.nilable(String))
@artist_avatar_url = T.let(nil, T.nilable(String))
@description_html = T.let(nil, T.nilable(String))
@full_res_img = T.let(nil, T.nilable(String))
@posted_date = T.let(nil, T.nilable(Time))
@rating = T.let(nil, T.nilable(Symbol))
@category = T.let(nil, T.nilable(String))
@theme = T.let(nil, T.nilable(String))
@category_full_str_redux = T.let(nil, T.nilable(String))
@species = T.let(nil, T.nilable(String))
@gender = T.let(nil, T.nilable(String))
@num_favorites = T.let(nil, T.nilable(Integer))
@num_comments = T.let(nil, T.nilable(Integer))
@num_views = T.let(nil, T.nilable(Integer))
@resolution_str = T.let(nil, T.nilable(String))
@keywords_array = T.let(nil, T.nilable(T::Array[String]))
@information_elem = T.let(nil, T.nilable(Nokogiri::XML::Node))
@stats_container_redux = T.let(nil, T.nilable(Nokogiri::XML::Node))
end
sig { returns(Integer) }
def id
# @elem.css("form[name=myform]").first['action'].split("/").last.to_i
@id ||=
begin
elem = @elem.css("meta[property='og:url']").first
elem["content"].split("/").reject(&:empty?).last.to_i if elem
end
end
sig { returns(String) }
def small_img
@elem.css("#submissionImg").first["src"].strip
end
sig { returns(String) }
def title
# r = @elem.css(".cat").first.text.strip
case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission td.cat b").first.text.strip
when VERSION_2
@elem.css(".submission-title p").first.text.strip
else
unimplemented_version!
end
end
sig { returns(String) }
def artist
# @elem.css(".cat a").first.text.strip
@artist ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first&.text&.strip
else
unimplemented_version!
end
end
sig { returns(String) }
def artist_user_page_path
@artist_user_page_path ||=
case @page_version
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first["href"]
when VERSION_0, VERSION_1
@elem.css("table[align=center] td.cat a")&.first["href"]
else
unimplemented_version!
end
end
sig { returns(String) }
def artist_url_name
T.must(artist_user_page_path.split("/").last)
end
sig { returns(String) }
def artist_avatar_url
@artist_avatar_url ||=
case @page_version
when VERSION_2
@elem.css(".submission-user-icon.avatar")&.first&.[]("src")
when VERSION_0, VERSION_1
@elem.css("a img.avatar")&.first&.[]("src")
else
unimplemented_version!
end
end
sig { returns(String) }
def description_html
case @page_version
# when VERSION_0
# @elem.css("#submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_0, VERSION_1
@elem.css("#page-submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_2
@elem.css(".submission-description").first.inner_html
else
unimplemented_version!
end
end
sig { returns(String) }
def full_res_img
case @page_version
when VERSION_0
@elem.css("#page-submission div b a")[1]["href"].strip
# @elem.css("#submission div b a")[1]["href"].strip
when VERSION_1
@elem.css("#page-submission div b a")[1]["href"].strip
when VERSION_2
@elem
.css("a.button.standard.mobile-fix")
.find { |elem| elem.text.strip == "Download" }[
"href"
]
else
unimplemented_version!
end
end
sig { returns(T.nilable(Time)) }
def posted_date
@posted_date ||=
begin
time_zone_offset = @page.logged_in_user_tz
case @page_version
when VERSION_0, VERSION_1
idx =
Domain::Fa::Parser::Page.elem_idx_after_text_match(
info_children,
/Posted/,
)
idx = T.must(idx)
child = info_children[idx..idx + 5].find { |ic| ic.name == "span" }
date_str = child.try(:[], "title").try(:strip)
if date_str
date_str = date_str.gsub(/(\d+)(st|nd|rd|th)/, '\1')
time_zone_offset.strptime(date_str, "%b %d, %Y %I:%M %p")
end
when VERSION_2
date_str = @elem.css(".popup_date").first["title"]
if date_str
[
# version 2, pre September 2025 - formatted like "Jan 20, 2025 11:23 AM"
"%b %d, %Y %I:%M %p",
# version 2, post September 2025 - formatted like "September 7, 2025, 10:48:53"
"%B %e, %Y, %H:%M:%S",
].lazy
.map do |format|
begin
time_zone_offset.strptime(date_str, format)
rescue ArgumentError
nil
end
end
.find(&:present?) ||
raise(ArgumentError.new("invalid date string: `#{date_str}`"))
end
else
raise("unimplemented version #{@page_version}")
end
end
end
sig { returns(Symbol) }
def rating
case @page_version
when VERSION_2
if @elem.css(".rating-box.mature").first
:mature
elsif @elem.css(".rating-box.adult").first
:adult
elsif @elem.css(".rating-box.general").first
:general
else
raise("unable to determine rating")
end
else
unimplemented_version!
end
end
sig { returns(T.nilable(String)) }
def category
@category ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Category/)
&.text
&.strip
when VERSION_2
category_full_str_redux&.split(" / ")&.first&.strip
else
unimplemented_version!
end
end
sig { returns(T.nilable(String)) }
def theme
@theme ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Theme/)
&.text
&.strip
when VERSION_2
category_full_str_redux&.split(" / ")&.last&.strip
else
unimplemented_version!
end
end
# FA started combining "Category / Theme" string into one
sig { returns(T.nilable(String)) }
def category_full_str_redux
@category_full_str_redux ||=
case @page_version
when VERSION_2
info_text_value_redux("Category")
else
unimplemented_version!
end
end
sig { returns(String) }
def species
@species ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Species/)
&.text
&.strip
when VERSION_2
info_text_value_redux("Species")
else
unimplemented_version!
end
end
sig { returns(T.nilable(String)) }
def gender
@gender ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Gender/)
&.text
&.strip
when VERSION_2
info_text_value_redux("Gender")
else
unimplemented_version!
end
end
sig { returns(Integer) }
def num_favorites
@num_favorites ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Favorites/)
&.text
&.strip
&.to_i
when VERSION_2
stats_container_redux
.css(".favorites .font-large")
.first
.text
.strip
.to_i
else
unimplemented_version!
end
end
sig { returns(Integer) }
def num_comments
@num_comments ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Comments/)
&.text
&.strip
&.to_i
when VERSION_2
stats_container_redux.css(".comments .font-large").first.text.strip.to_i
else
unimplemented_version!
end
end
sig { returns(Integer) }
def num_views
@num_views ||=
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Views/)
&.text
&.strip
&.to_i
when VERSION_2
stats_container_redux.css(".views .font-large").first.text.strip.to_i
else
unimplemented_version!
end
end
sig { returns(String) }
def resolution_str
@resolution_str ||=
case @page_version
when VERSION_0
Domain::Fa::Parser::Page
.elem_after_text_match(info_children, /Resolution/)
&.text
&.strip
when VERSION_1
idx =
Domain::Fa::Parser::Page.elem_idx_after_text_match(
info_children,
/Resolution/,
)
idx = T.must(idx)
info_children[idx + 1].try(:text).try(:strip)
when VERSION_2
parts = T.must(info_text_value_redux("Size")&.split(" "))
"#{parts.first}x#{parts.last}"
else
unimplemented_version!
end
end
sig { returns(T::Array[String]) }
def keywords_array
@keywords_array ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css("#keywords a").map(&:text)
when VERSION_2
@elem.css(".tags-row .tags a").map(&:text).map(&:strip)
else
unimplemented_version!
end&.reject(&:empty?) || []
end
private
sig { returns(Nokogiri::XML::NodeSet) }
def info_children
information_elem.children
end
sig { params(i: Integer).returns(Nokogiri::XML::Node) }
def info_child(i)
information_elem.children[i]
end
sig { returns(Nokogiri::XML::Node) }
def information_elem
@information_elem ||=
case @page_version
# when VERSION_0
# @elem.css("td td td td td td.alt1[align=left]").first
when VERSION_0, VERSION_1
@elem.css("#page-submission td td td td.alt1[align=left]").first
else
unimplemented_version!
end
end
sig { returns(Nokogiri::XML::Node) }
def info_text_elem_redux
@elem.css("section.info.text").first
end
sig { params(info_section: String).returns(T.nilable(String)) }
def info_text_value_redux(info_section)
info_text_elem_redux
.css(".highlight")
.find { |e| e.text == info_section }
&.parent
&.children
&.slice(1..)
&.text
&.strip
end
sig { returns(Nokogiri::XML::NodeSet) }
def stats_container_redux
@elem.css(".stats-container.text")
end
end