implement fa parser for browse page / gallery page / submission page

This commit is contained in:
2023-02-18 16:32:41 -08:00
parent b13851780a
commit d4072f6aca
8 changed files with 4499 additions and 16 deletions

View File

@@ -67,12 +67,20 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
end
def submission_elems
case @page_version
when VERSION_0 then @page.css(".t-image")
when VERSION_1 then @page.css(".submission-list > .gallery > figure")
when VERSION_2 then @page.css(".submission-list > .gallery > figure")
else unimplemented_version!
end
@submission_elems ||= case @page_version
when VERSION_0 then @page.css(".t-image")
when VERSION_1 then @page.css(".submission-list > .gallery > figure")
when VERSION_2
# user gallery pages are under .submission-list
elem = @page.css(".submission-list > .gallery > figure")
if elem.empty?
# /browse/ page is under #gallery-browse
@page.css("#gallery-browse > figure")
else
elem
end
else unimplemented_version!
end
end
def logged_in_user

View File

@@ -23,13 +23,32 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
def title
# r = @elem.css(".cat").first.text.strip
r = @elem.css("#page-submission td.cat b").first.text.strip
r
case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission td.cat b").first.text.strip
when VERSION_2
@elem.css(".submission-title p").first.text.strip
else unimplemented_version!
end
end
def artist
# @elem.css(".cat a").first.text.strip
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
@artist ||= case @page_version
when VERSION_0, VERSION_1
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first&.text&.strip
else unimplemented_version!
end
end
def artist_user_page_path
@artist_user_page_path ||= case @page_version
when VERSION_2
@elem.css(".submission-id-sub-container a")&.first["href"]
else unimplemented_version!
end
end
def description_html
@@ -38,6 +57,8 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
# @elem.css("#submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_0, VERSION_1
@elem.css("#page-submission td.alt1 .maintable tr .alt1").last.inner_html
when VERSION_2
@elem.css(".submission-description").first.inner_html
else unimplemented_version!
end
end
@@ -49,6 +70,10 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
# @elem.css("#submission div b a")[1]["href"].strip
when VERSION_1
@elem.css("#page-submission div b a")[1]["href"].strip
when VERSION_2
@elem.css("a.button.standard.mobile-fix").find do |elem|
elem.text.strip == "Download"
end["href"]
else unimplemented_version!
end
end
@@ -61,37 +86,81 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
idx = elem_idx_after_text_match(info_children, /Posted/)
child = info_children[idx..idx + 5].find { |ic| ic.name == "span" }
child.try(:[], "title").try(:strip)
when VERSION_2
date_str = @elem.css(".popup_date").first["title"]
# e.g. 'Feb 18, 2023 06:47 PM'
DateTime.strptime(date_str, "%b %d, %Y %I:%M %p") if date_str
else
raise("unimplemented version #{@page_version}")
end
end
def category
@category ||= elem_after_text_match(info_children, /Category/).text.strip
@category ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Category/).text.strip
when VERSION_2
info_text_value_redux("Category")
else unimplemented_version!
end
end
def theme
@theme ||= elem_after_text_match(info_children, /Theme/).text.strip
@theme ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Theme/).text.strip
else unimplemented_version!
end
end
def species
@species ||= elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
@species ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
when VERSION_2
info_text_value_redux("Species")
else unimplemented_version!
end
end
def gender
@gender ||= elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
@gender ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
when VERSION_2
info_text_value_redux("Gender")
else unimplemented_version!
end
end
def num_favorites
@num_favorites ||= elem_after_text_match(info_children, /Favorites/).text.strip.to_i
@num_favorites ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Favorites/).text.strip.to_i
when VERSION_2
stats_container_redux.css(".favorites .font-large").first.text.strip.to_i
else unimplemented_version!
end
end
def num_comments
@num_comments ||= elem_after_text_match(info_children, /Comments/).text.strip.to_i
@num_comments ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Comments/).text.strip.to_i
when VERSION_2
stats_container_redux.css(".comments .font-large").first.text.strip.to_i
else unimplemented_version!
end
end
def num_views
@num_views ||= elem_after_text_match(info_children, /Views/).text.strip.to_i
@num_views ||= case @page_version
when VERSION_0, VERSION_1
elem_after_text_match(info_children, /Views/).text.strip.to_i
when VERSION_2
stats_container_redux.css(".views .font-large").first.text.strip.to_i
else unimplemented_version!
end
end
def resolution_str
@@ -101,6 +170,9 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
when VERSION_1
idx = elem_idx_after_text_match(info_children, /Resolution/)
info_children[idx + 1].try(:text).try(:strip)
when VERSION_2
parts = info_text_value_redux("Size").split(" ")
parts.first + "x" + parts.last
else unimplemented_version!
end
end
@@ -128,4 +200,20 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
else unimplemented_version!
end
end
def info_text_elem_redux
@elem.css("section.info.text").first
end
def info_text_value_redux(info_section)
# binding.pry
info_text_elem_redux.
css(".highlight").
find { |e| e.text == info_section }.
parent.children[1..].text.strip
end
def stats_container_redux
@elem.css(".stats-container.text")
end
end

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -28,6 +28,7 @@ class Domain::Fa::Parser::ReduxPageTest < ActiveSupport::TestCase
def test_user_page_is_correct
parser = get_parser "user_page_miles_df.html"
assert parser.logged_in?
assert parser.probably_user_page?
up = parser.user_page
@@ -43,6 +44,118 @@ class Domain::Fa::Parser::ReduxPageTest < ActiveSupport::TestCase
assert_equal DateTime.new(2006, 1, 12, 7, 52), up.registered_since
end
def test_gallery_is_correct
parser = get_parser "gallery_page_miles_df.html"
assert parser.logged_in?
assert parser.probably_listings_page?
listings = parser.submissions_parsed
assert_equal 48, listings.length
assert_equal 51039083, listings.first.id
first_listing = listings.first
assert_equal "Miles-DF", first_listing.artist
assert_equal "/user/miles-df/", first_listing.artist_path
assert_equal "booty", first_listing.title
assert_equal "/view/51039083/", first_listing.view_path
assert_equal "//t.furaffinity.net/51039083@300-1676577120.jpg", first_listing.thumb_path
assert_equal 2, parser.submission_folders.length
parser_empty = get_parser "gallery_page_empty_miles_df.html"
assert parser_empty.probably_listings_page?
assert_equal 0, parser_empty.submissions_parsed.length
end
def test_gallery_is_correct_with_folders
parser = get_parser "gallery_page_with_folders_feretta.html"
assert parser.probably_listings_page?
listings = parser.submissions_parsed
assert_equal 48, listings.length
first_listing = listings.first
assert_equal 51066954, first_listing.id
assert_equal "Feretta", first_listing.artist
assert_equal "/user/feretta/", first_listing.artist_path
assert_equal "[Comm] Size and property - 2/3", first_listing.title
assert_equal "/view/51066954/", first_listing.view_path
assert_equal "//t.furaffinity.net/51066954@200-1676761927.jpg", first_listing.thumb_path
last_listing = listings.last
assert_equal 50320568, last_listing.id
assert_equal "Feretta", last_listing.artist
assert_equal "/user/feretta/", last_listing.artist_path
assert_equal "[Comm] Sizable date - 1/3", last_listing.title
assert_equal "/view/50320568/", last_listing.view_path
assert_equal "//t.furaffinity.net/50320568@200-1671750577.jpg", last_listing.thumb_path
folders = parser.submission_folders
assert_equal({ href: "/scraps/feretta/", title: "Scraps" }, folders[0])
assert_equal({ href: "/gallery/feretta/folder/60236/Tale-of-Tails", title: "Tale of Tails" }, folders[1])
assert_equal({ href: "/gallery/feretta/folder/60234/Illustrations", title: "Illustrations" }, folders[2])
assert_equal({ href: "/gallery/feretta/folder/229520/Marketing", title: "Marketing" }, folders[9])
assert_equal({ href: "/gallery/feretta/folder/820310/3D", title: "3D" }, folders[13])
end
def test_browse_page
# https://www.furaffinity.net/browse/
parser = get_parser "browse_page.html"
assert parser.probably_listings_page?
assert !parser.probably_submission?
listings = parser.submissions_parsed
# FA appears to respond with only 47?
assert_equal 47, listings.length
first = listings.first
assert_equal 51067352, first.id
assert_equal "CleverDerpy", first.artist
assert_equal "/user/cleverderpy/", first.artist_path
assert_equal "The Second Kind of Tarpit", first.title
assert_equal "/view/51067352/", first.view_path
assert_equal "//t.furaffinity.net/51067352@400-1676764142.jpg", first.thumb_path
end
def test_submission_is_correct
parser = get_parser "submission_51067333_blauhaher.html"
assert parser.probably_submission?
sub = parser.submission
assert_equal 51067333, sub.id
assert_equal "BlauHaher", sub.artist
assert_equal "/user/blauhaher/", sub.artist_user_page_path
assert_equal '"CUTIE!"', sub.title
assert_equal "//d.furaffinity.net/art/blauhaher/1676764049/1676764049.blauhaher_хорнь.png", sub.small_img
assert_equal "//d.furaffinity.net/art/blauhaher/1676764049/1676764049.blauhaher_хорнь.png", sub.full_res_img
assert_equal DateTime.new(2023, 2, 18, 18, 47), sub.posted_date
assert_equal "Artwork (Digital) / Doodle", sub.category
assert_equal "Unspecified / Any", sub.species
assert_equal "Any", sub.gender
assert_equal 1, sub.num_favorites
assert_equal 0, sub.num_comments
assert_equal 17, sub.num_views
assert_equal "1024x1024", sub.resolution_str
assert_equal [], sub.keywords_array
assert sub.description_html =~ /really adore at the way this/
# parser = get_parser "submission_19190013_korichi.html"
# assert parser.probably_submission?
# sub = parser.submission
# assert_equal 19_190_013, sub.id
# assert_equal "Korichi", sub.artist
# assert_equal "General Furry Art", sub.theme
# assert_equal "Artwork (Digital)", sub.category
# assert_equal "1280x914", sub.resolution_str
# assert_equal 82, sub.num_views
# assert_equal "Male", sub.gender
# assert_equal "Dragon (Other)", sub.species
# assert_equal "Feb 24th, 2016 12:51 AM", sub.posted_date
# assert_equal %w[kobold frog hunting male frog mighty hunter], sub.keywords_array
end
def get_parser(file, require_logged_in: true)
path = File.join("domain/fa/parser/redux", file)
contents = read_fixture_file(path) || raise("Couldn't open #{path}")