Files
redux-scraper/app/lib/domain/fa/parser/submission_parser_helper.rb
2025-01-01 03:29:53 +00:00

322 lines
8.0 KiB
Ruby

# typed: true
class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
VERSION_0 = Domain::Fa::Parser::Page::VERSION_0
VERSION_1 = Domain::Fa::Parser::Page::VERSION_1
VERSION_2 = Domain::Fa::Parser::Page::VERSION_2
def initialize(elem, phtml, page_version)
@elem = elem
@phtml = phtml
@page_version = page_version
end
def id
# @elem.css("form[name=myform]").first['action'].split("/").last.to_i
@id ||=
begin
elem = @elem.css("meta[property='og:url']").first
elem["content"].split("/").reject(&:empty?).last.to_i if elem
end
end
def small_img
@elem.css("#submissionImg").first["src"].strip
end
def title
# r = @elem.css(".cat").first.text.strip
case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission td.cat b").first.text.strip
when VERSION_2
@elem.css(".submission-title p").first.text.strip
else
unimplemented_version!
end
end
def artist
# @elem.css(".cat a").first.text.strip
@artist ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first&.text&.strip
else
unimplemented_version!
end
end
def artist_user_page_path
@artist_user_page_path ||=
case @page_version
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first["href"]
when VERSION_0, VERSION_1
@elem.css("table[align=center] td.cat a")&.first["href"]
else
unimplemented_version!
end
end
def artist_url_name
artist_user_page_path.split("/").last
end
def artist_avatar_url
@artist_avatar_url ||=
case @page_version
when VERSION_2
@elem.css(".submission-user-icon.avatar")&.first&.[]("src")
when VERSION_0, VERSION_1
@elem.css("a img.avatar")&.first&.[]("src")
else
unimplemented_version!
end
end
def description_html
case @page_version
# when VERSION_0
# @elem.css("#submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_0, VERSION_1
@elem.css("#page-submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_2
@elem.css(".submission-description").first.inner_html
else
unimplemented_version!
end
end
def full_res_img
case @page_version
when VERSION_0
@elem.css("#page-submission div b a")[1]["href"].strip
# @elem.css("#submission div b a")[1]["href"].strip
when VERSION_1
@elem.css("#page-submission div b a")[1]["href"].strip
when VERSION_2
@elem
.css("a.button.standard.mobile-fix")
.find { |elem| elem.text.strip == "Download" }[
"href"
]
else
unimplemented_version!
end
end
def posted_date
@posted_date ||=
case @page_version
# when VERSION_0
# info_child(6)["title"].strip
when VERSION_0, VERSION_1
idx = elem_idx_after_text_match(info_children, /Posted/)
child = info_children[idx..idx + 5].find { |ic| ic.name == "span" }
date_str = child.try(:[], "title").try(:strip)
if date_str
DateTime.strptime(
date_str.gsub(/(\d+)(st|nd|rd|th)/, '\1'),
"%b %d, %Y %I:%M %p",
)
end
when VERSION_2
date_str = @elem.css(".popup_date").first["title"]
# e.g. 'Feb 18, 2023 06:47 PM'
DateTime.strptime(date_str, "%b %d, %Y %I:%M %p") if date_str
else
raise("unimplemented version #{@page_version}")
end
end
def rating
case @page_version
when VERSION_2
if @elem.css(".rating-box.mature").first
:mature
elsif @elem.css(".rating-box.adult").first
:adult
elsif @elem.css(".rating-box.general").first
:general
else
raise("unable to determine rating")
end
else
unimplemented_version!
end
end
def category
@category ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Category/).text.strip
when VERSION_2
category_full_str_redux&.split(" / ")&.first&.strip
else
unimplemented_version!
end
end
def theme
@theme ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Theme/).text.strip
when VERSION_2
category_full_str_redux&.split(" / ")&.last&.strip
else
unimplemented_version!
end
end
# FA started combining "Category / Theme" string into one
def category_full_str_redux
@category_full_str_redux ||=
case @page_version
when VERSION_2
info_text_value_redux("Category")
else
unimplemented_version!
end
end
def species
@species ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
when VERSION_2
info_text_value_redux("Species")
else
unimplemented_version!
end
end
def gender
@gender ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
when VERSION_2
info_text_value_redux("Gender")
else
unimplemented_version!
end
end
def num_favorites
@num_favorites ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Favorites/).text.strip.to_i
when VERSION_2
stats_container_redux
.css(".favorites .font-large")
.first
.text
.strip
.to_i
else
unimplemented_version!
end
end
def num_comments
@num_comments ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Comments/).text.strip.to_i
when VERSION_2
stats_container_redux.css(".comments .font-large").first.text.strip.to_i
else
unimplemented_version!
end
end
def num_views
@num_views ||=
case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Views/).text.strip.to_i
when VERSION_2
stats_container_redux.css(".views .font-large").first.text.strip.to_i
else
unimplemented_version!
end
end
def resolution_str
@resolution_str ||=
case @page_version
when VERSION_0
elem_after_text_match(info_children, /Resolution/).try(:text).try(
:strip,
)
when VERSION_1
idx = elem_idx_after_text_match(info_children, /Resolution/)
info_children[idx + 1].try(:text).try(:strip)
when VERSION_2
parts = info_text_value_redux("Size").split(" ")
parts.first + "x" + parts.last
else
unimplemented_version!
end
end
def keywords_array
@keywords_array ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css("#keywords a").map(&:text)
when VERSION_2
@elem.css(".tags-row .tags a").map(&:text).map(&:strip)
else
unimplemented_version!
end
end
private
def info_children
information_elem.children
end
def info_child(i)
information_elem.children[i]
end
def information_elem
@information_elem ||=
case @page_version
# when VERSION_0
# @elem.css("td td td td td td.alt1[align=left]").first
when VERSION_0, VERSION_1
@elem.css("#page-submission td td td td.alt1[align=left]").first
else
unimplemented_version!
end
end
def info_text_elem_redux
@elem.css("section.info.text").first
end
def info_text_value_redux(info_section)
info_text_elem_redux
.css(".highlight")
.find { |e| e.text == info_section }
&.parent
&.children
&.slice(1..)
&.text
&.strip
end
def stats_container_redux
@elem.css(".stats-container.text")
end
end