Files
redux-scraper/app/lib/domain/fa/parser/user_page_helper.rb
2025-08-16 04:44:04 +00:00

517 lines
14 KiB
Ruby

# typed: strict
class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
extend T::Sig
VERSION_0 = Domain::Fa::Parser::Page::VERSION_0
VERSION_1 = Domain::Fa::Parser::Page::VERSION_1
VERSION_2 = Domain::Fa::Parser::Page::VERSION_2
sig do
params(
elem: Nokogiri::HTML::Document,
page_version: Symbol,
page: Domain::Fa::Parser::Page,
).void
end
def initialize(elem, page_version, page)
@name = T.let(nil, T.nilable(String))
@account_status = T.let(nil, T.nilable(Symbol))
@full_name = T.let(nil, T.nilable(String))
@artist_type = T.let(nil, T.nilable(String))
@profile_thumb_url = T.let(nil, T.nilable(String))
@registered_since = T.let(nil, T.nilable(Time))
@mood = T.let(nil, T.nilable(String))
@profile_html = T.let(nil, T.nilable(String))
@num_pageviews = T.let(nil, T.nilable(Integer))
@num_submissions = T.let(nil, T.nilable(Integer))
@num_comments_recieved = T.let(nil, T.nilable(Integer))
@num_comments_given = T.let(nil, T.nilable(Integer))
@num_journals = T.let(nil, T.nilable(Integer))
@num_favorites = T.let(nil, T.nilable(Integer))
@recent_fav_fa_ids = T.let(nil, T.nilable(T::Array[Integer]))
@recent_gallery_fa_ids = T.let(nil, T.nilable(T::Array[Integer]))
@recent_watchers = T.let(nil, T.nilable(T::Array[RecentUser]))
@recent_watching = T.let(nil, T.nilable(T::Array[RecentUser]))
@statistics = T.let(nil, T.nilable(Nokogiri::XML::Element))
@main_about = T.let(nil, T.nilable(Nokogiri::XML::Element))
@num_watched_by = T.let(nil, T.nilable(Integer))
@num_watching = T.let(nil, T.nilable(Integer))
@submissions_json_data = T.let(nil, T.nilable(T::Array[SubmissionData]))
@march_2025_update =
T.let(elem.css(".js-displayName").first.present?, T::Boolean)
@elem = elem
@page_version = page_version
@page = page
end
# https://webcache.googleusercontent.com/search?q=cache:L85loB7VJXwJ:https://forums.furaffinity.net/threads/question-about-the-username-prefix.1655691/&cd=10&hl=en&ct=clnk&gl=us
PREFIXES = [
"~", # regular user
"!", # suspended
"-", # banned (permanent)
"@", # admin
"", # deceased
]
sig { returns(T.nilable(String)) }
def name
@name ||=
begin
if @march_2025_update
return @elem.css(".js-displayName")&.first&.text&.strip
end
elem =
case @page_version
when VERSION_0, VERSION_1
elem = @elem.css(".addpad.lead b")
when VERSION_2
@elem.css("userpage-nav-user-details username")
else
unimplemented_version!
end
name = elem&.first&.text&.strip
if name
if @elem.css("userpage-nav-header img.userIcon.type-admin").first
# in newer versions of FA user pages, admins have no '@' prefix,
# but rather an indicator image icon
if PREFIXES.include?(name[0])
raise("invalid prefix for admin user name: #{name}")
end
name
elsif PREFIXES.include?(name[0])
name[1..]
else
if @elem.css("img.userIcon.faplus-icon")
# FA+ users have no prefix
name
else
raise("invalid prefix for name: #{name}")
end
end
end
end
end
sig { returns(T.nilable(Symbol)) }
def account_status
@account_status ||=
begin
if @elem.css("userpage-nav-header img.userIcon.type-admin").first
:admin
else
if @march_2025_update
elem = @elem.css(".c-usernameBlock__symbol")
symbol = elem&.first&.text&.strip || ""
else
elem =
case @page_version
when VERSION_2
@elem.css("userpage-nav-user-details username")
else
unimplemented_version!
end
name = elem&.first&.text&.strip || ""
symbol = name[0]
end
case symbol
when "~"
:active
when "!"
:suspended
when "@"
:admin
when "-"
:banned
when ""
:deceased
else
nil
end
end
end
end
sig { returns(T.nilable(String)) }
def full_name
@full_name ||=
Domain::Fa::Parser::Page
.elem_after_text_match(main_about.children, /Full/)
&.text
&.strip
end
sig { returns(T.nilable(String)) }
def artist_type
@artist_type ||=
Domain::Fa::Parser::Page
.elem_after_text_match(main_about.children, /Type/)
&.text
&.strip
end
sig { returns(T.nilable(String)) }
def profile_thumb_url
@profile_thumb_url ||=
case @page_version
when VERSION_0
@elem.css(".addpad.alt1 a img.avatar").first.try(:[], "src")
when VERSION_2
@elem.css("userpage-nav-avatar a.current img").first.try(:[], "src")
else
unimplemented_version!
end
end
sig { returns(T.nilable(Time)) }
def registered_since
@registered_since ||=
begin
time_str =
case @page_version
when VERSION_0, VERSION_1
Domain::Fa::Parser::Page
.elem_after_text_match(main_about.children, /Registered/)
&.text
&.strip
when VERSION_2
if @march_2025_update
@elem
.css("userpage-nav-user-details .user-title .hideonmobile")
.find { |elem| elem&.text&.strip == "Registered:" }
&.next_sibling
&.text
&.strip
else
@elem
.css("username span")
.find { |elem| elem&.text&.strip == "Registered:" }
&.next_sibling
&.text
&.strip
end
else
unimplemented_version!
end
@page.logged_in_user_tz.parse(time_str) if time_str
end
end
sig { returns(T.nilable(String)) }
def mood
@mood ||=
Domain::Fa::Parser::Page
.elem_after_text_match(main_about.children, /mood/)
&.text
&.strip
end
sig { returns(String) }
def profile_html
@profile_html ||= main_about.inner_html.force_encoding("utf-8")
end
sig { returns(Integer) }
def num_pageviews
@num_pageviews ||= stat_value(:pvs, 0)
end
sig { returns(Integer) }
def num_submissions
@num_submissions ||= stat_value(:subs, 1)
end
sig { returns(Integer) }
def num_comments_recieved
@num_comments_recieved ||= stat_value(:crec, 3)
end
sig { returns(Integer) }
def num_comments_given
@num_comments_given ||= stat_value(:cgiv, 4)
end
sig { returns(Integer) }
def num_journals
@num_journals ||= stat_value(:njr, 5)
end
sig { returns(Integer) }
def num_favorites
@num_favorites ||= stat_value(:nfav, 2)
end
sig { returns(T::Array[Integer]) }
def recent_fav_fa_ids
@recent_fav_fa_ids ||=
case @page_version
when VERSION_0
@elem
.css(".userpage-first-favorite s a, .userpage-favorites s a")
.filter_map do |elem|
href = elem["href"]
%r{/view/(\d+)}.match(href)&.[](1)&.to_i || next
end
when VERSION_2
@elem
.css("#gallery-latest-favorites")
.first
&.css("figure a")
&.map do |elem|
href = elem["href"]
%r{/view/(\d+)}.match(href)&.[](1)&.to_i ||
raise("invalid url: #{href}")
end || []
else
unimplemented_version!
end
end
class SubmissionData < T::ImmutableStruct
include T::Struct::ActsAsComparable
const :fa_id, Integer
const :title, String
const :user_full_name, String
const :url_name, String
const :date_full, String
const :icon_rating, String
const :faved_at, Time
end
sig { returns(T::Array[SubmissionData]) }
def submissions_json_data
@submissions_json_data ||=
case @page_version
when VERSION_0, VERSION_2
json =
T.cast(
if (e = @elem.css("#js-submissionData").first)
e.inner_html&.to_s
else
@elem.inner_html.match(/submission_data = (.*);$/)&.[](1)&.to_s
end,
T.nilable(String),
)
raise("no submission data json found") unless json
JSON
.parse(json)
.map do |fa_id, data|
date_full =
data["date_full"] ||
begin
html_date = data["html_date"]
parsed = html_date && Nokogiri::XML.parse(html_date)
parsed&.css("span")&.attr("title")&.to_s ||
raise("invalid html date: #{data}")
end
faved_at = @page.logged_in_user_tz.parse(date_full)
SubmissionData.new(
fa_id: fa_id.to_i,
title: data["title"],
user_full_name: data["username"],
url_name: data["lower"],
date_full:,
icon_rating: data["icon_rating"],
faved_at:,
)
end
else
unimplemented_version!
end
end
sig { returns(T::Array[Integer]) }
def recent_gallery_fa_ids
@recent_gallery_fa_ids ||=
case @page_version
when VERSION_2
@elem
.css("#gallery-latest-submissions")
.first
&.css("figure a")
&.map do |elem|
href = elem["href"]
%r{/view/(\d+)}.match(href)&.[](1)&.to_i ||
raise("invalid url: #{href}")
end || []
else
unimplemented_version!
end
end
class JSONSubmissionData < T::ImmutableStruct
include T::Struct::ActsAsComparable
const :fa_id, Integer
const :title, String
const :creator, Domain::User::FaUser
end
class RecentUser < T::Struct
include T::Struct::ActsAsComparable
extend T::Sig
const :name, String
const :url_name, String
sig { returns(T::Array[String]) }
def to_a
[name, url_name]
end
end
sig { returns(T::Array[RecentUser]) }
def recent_watchers
@recent_watchers ||= recent_users_for_section("Recent Watchers")
end
sig { returns(T::Array[RecentUser]) }
def recent_watching
@recent_watching ||= recent_users_for_section("Recently Watched")
end
sig { returns(Integer) }
def num_watched_by
@num_watched_by ||=
begin
watchers_text =
recent_users_section("Recent Watchers")
.parent
.css(".floatright")
.first
.text
watchers_text.match(/Watched by (\d+)/)[1].to_i
end
end
sig { returns(Integer) }
def num_watching
@num_watching ||=
begin
watchers_text =
recent_users_section("Recently Watched")
.parent
.css(".floatright")
.first
.text
watchers_text.match(/Watching (\d+)/)[1].to_i
end
end
private
sig { params(section_name: String).returns(T::Array[RecentUser]) }
def recent_users_for_section(section_name)
case @page_version
when VERSION_2
section_elem =
@elem
.css(".userpage-section-left")
.find do |elem|
elem.css(".section-header h2")&.first&.text&.strip == section_name
end
section_elem = section_elem.css(".section-body").first
section_elem
.css("a")
.map do |link_elem|
href = link_elem["href"]
url_name =
%r{/user/(.+)/}.match(href)&.[](1) || raise("invalid url: #{href}")
url_name = CGI.unescape(url_name)
if @march_2025_update
name =
link_elem
.css(".c-usernameBlockSimple__displayName")
.first
.text
.strip
else
name = link_elem.css(".artist_name").first.text.strip
end
RecentUser.new(name:, url_name:)
end
else
unimplemented_version!
end
end
sig { params(section_name: String).returns(Nokogiri::XML::Element) }
def recent_users_section(section_name)
case @page_version
when VERSION_2
@elem
.css(".userpage-section-left")
.find do |elem|
elem.css(".section-header h2")&.first&.text&.strip == section_name
end
else
unimplemented_version!
end
end
sig { params(legacy_name: Symbol, redux_idx: Integer).returns(Integer) }
def stat_value(legacy_name, redux_idx)
legacy_map = # if false # old mode?
# { pvs: 2, subs: 5, crec: 8, cgiv: 11, njr: 14, nfav: 17 }
# else
{ pvs: 2, subs: 6, crec: 10, cgiv: 14, njr: 18, nfav: 22 }
# end
value =
case @page_version
when VERSION_0, VERSION_1
statistics.children[legacy_map[legacy_name] || raise].text.strip.to_i
when VERSION_2
statistics.css(".highlight")[redux_idx]&.next_sibling&.text&.strip&.to_i
else
unimplemented_version!
end
# FA databases can be a little weird
if value >= (2**32 - 1)
0
else
value
end
end
sig { returns(Nokogiri::XML::Element) }
def statistics
@statistics ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css(".ldot table tr:last-child td").first
when VERSION_2
@elem
.css(".userpage-layout-right-col-content .userpage-section-right")
.find do |child|
child.css(".section-header h2")&.first&.text&.strip == "Stats"
end
else
unimplemented_version!
end
end
sig { returns(Nokogiri::XML::Element) }
def main_about
@main_about ||=
case @page_version
when VERSION_0, VERSION_1
@elem.css(".alt1.addpad .ldot").first
when VERSION_2
@elem.css(".section-body.userpage-profile").first
else
unimplemented_version!
end
end
end