Files
redux-scraper/app/lib/domain/sofurry/gallery_page_parser.rb
2025-07-01 21:02:23 +00:00

87 lines
2.4 KiB
Ruby

# typed: strict
class Domain::Sofurry::GalleryPageParser
extend T::Sig
class Folder < T::Struct
include T::Struct::ActsAsComparable
const :name, String
const :id, Integer
end
class MediaType < T::Enum
enums do
Stories = new("stories")
Art = new("art")
Music = new("music")
Journals = new("journals")
Photos = new("photos")
end
end
class ShortGalleryEntry < T::Struct
include T::Struct::ActsAsComparable
const :id, Integer
const :title, String
end
sig { params(page: String, user_id: Integer).void }
def initialize(page, user_id:)
@doc = T.let(Nokogiri.HTML(page), Nokogiri::HTML::Document)
@page = T.let(page, String)
@folders = T.let(nil, T.nilable(T::Array[String]))
@user_id = T.let(user_id, Integer)
@posts = T.let(nil, T.nilable(T::Array[ShortGalleryEntry]))
end
sig { returns(T::Array[Folder]) }
def folders
@folders ||=
@doc
.css(".sfBrowseListFolders .items .sfArtworkSmallWrapper strong a")
.map do |elem|
href = elem.attr("href")
href_params = Addressable::URI.parse(href).query_values
user_id = href_params["by"]
if user_id&.to_i != @user_id
raise "user_id mismatch: #{user_id} != #{@user_id}"
end
name = elem.text
id = href_params["folder"]&.to_i
Folder.new(name:, id:)
end
end
sig { returns(T::Array[ShortGalleryEntry]) }
def posts
@posts ||=
begin
[
@doc
.css(".sf-browse-shortlist,.sf-browse-shortlist-zebra")
.map do |elem|
title_id = elem.css(".sf-browse-shortlist-title a").first || next
id = title_id&.attr("href").split("/")&.last&.to_i || next
title = title_id&.text || next
ShortGalleryEntry.new(id:, title:)
end
.compact,
@doc
.css("a.sfArtworkSmallInner")
.map do |elem|
id = elem.attr("href").split("/")&.last&.to_i || next
img_elem = elem.css("img.sfArtworkItem").first || next
title =
img_elem.attr("alt")&.strip&.rpartition("|by")&.first || next
ShortGalleryEntry.new(id:, title:)
end
.compact,
].flatten
end
end
sig { returns(T.nilable(String)) }
def username
@doc.css(".user-info .user-text").first&.text&.strip&.chomp
end
end