implement fa parser for browse page / gallery page / submission page
This commit is contained in:
@@ -67,12 +67,20 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def submission_elems
|
||||
case @page_version
|
||||
when VERSION_0 then @page.css(".t-image")
|
||||
when VERSION_1 then @page.css(".submission-list > .gallery > figure")
|
||||
when VERSION_2 then @page.css(".submission-list > .gallery > figure")
|
||||
else unimplemented_version!
|
||||
end
|
||||
@submission_elems ||= case @page_version
|
||||
when VERSION_0 then @page.css(".t-image")
|
||||
when VERSION_1 then @page.css(".submission-list > .gallery > figure")
|
||||
when VERSION_2
|
||||
# user gallery pages are under .submission-list
|
||||
elem = @page.css(".submission-list > .gallery > figure")
|
||||
if elem.empty?
|
||||
# /browse/ page is under #gallery-browse
|
||||
@page.css("#gallery-browse > figure")
|
||||
else
|
||||
elem
|
||||
end
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def logged_in_user
|
||||
|
||||
@@ -23,13 +23,32 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
|
||||
def title
|
||||
# r = @elem.css(".cat").first.text.strip
|
||||
r = @elem.css("#page-submission td.cat b").first.text.strip
|
||||
r
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#page-submission td.cat b").first.text.strip
|
||||
when VERSION_2
|
||||
@elem.css(".submission-title p").first.text.strip
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def artist
|
||||
# @elem.css(".cat a").first.text.strip
|
||||
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
|
||||
@artist ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
|
||||
when VERSION_2
|
||||
@elem.css(".submission-id-sub-container a")&.first&.text&.strip
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def artist_user_page_path
|
||||
@artist_user_page_path ||= case @page_version
|
||||
when VERSION_2
|
||||
@elem.css(".submission-id-sub-container a")&.first["href"]
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def description_html
|
||||
@@ -38,6 +57,8 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
# @elem.css("#submission td.alt1 .maintable tr .alt1").last.inner_html
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#page-submission td.alt1 .maintable tr .alt1").last.inner_html
|
||||
when VERSION_2
|
||||
@elem.css(".submission-description").first.inner_html
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
@@ -49,6 +70,10 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
# @elem.css("#submission div b a")[1]["href"].strip
|
||||
when VERSION_1
|
||||
@elem.css("#page-submission div b a")[1]["href"].strip
|
||||
when VERSION_2
|
||||
@elem.css("a.button.standard.mobile-fix").find do |elem|
|
||||
elem.text.strip == "Download"
|
||||
end["href"]
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
@@ -61,37 +86,81 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
idx = elem_idx_after_text_match(info_children, /Posted/)
|
||||
child = info_children[idx..idx + 5].find { |ic| ic.name == "span" }
|
||||
child.try(:[], "title").try(:strip)
|
||||
when VERSION_2
|
||||
date_str = @elem.css(".popup_date").first["title"]
|
||||
# e.g. 'Feb 18, 2023 06:47 PM'
|
||||
DateTime.strptime(date_str, "%b %d, %Y %I:%M %p") if date_str
|
||||
else
|
||||
raise("unimplemented version #{@page_version}")
|
||||
end
|
||||
end
|
||||
|
||||
def category
|
||||
@category ||= elem_after_text_match(info_children, /Category/).text.strip
|
||||
@category ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Category/).text.strip
|
||||
when VERSION_2
|
||||
info_text_value_redux("Category")
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def theme
|
||||
@theme ||= elem_after_text_match(info_children, /Theme/).text.strip
|
||||
@theme ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Theme/).text.strip
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def species
|
||||
@species ||= elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
|
||||
@species ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
info_text_value_redux("Species")
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def gender
|
||||
@gender ||= elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
|
||||
@gender ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
info_text_value_redux("Gender")
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_favorites
|
||||
@num_favorites ||= elem_after_text_match(info_children, /Favorites/).text.strip.to_i
|
||||
@num_favorites ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Favorites/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".favorites .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_comments
|
||||
@num_comments ||= elem_after_text_match(info_children, /Comments/).text.strip.to_i
|
||||
@num_comments ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Comments/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".comments .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_views
|
||||
@num_views ||= elem_after_text_match(info_children, /Views/).text.strip.to_i
|
||||
@num_views ||= case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Views/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".views .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def resolution_str
|
||||
@@ -101,6 +170,9 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
when VERSION_1
|
||||
idx = elem_idx_after_text_match(info_children, /Resolution/)
|
||||
info_children[idx + 1].try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
parts = info_text_value_redux("Size").split(" ")
|
||||
parts.first + "x" + parts.last
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
@@ -128,4 +200,20 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
else unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def info_text_elem_redux
|
||||
@elem.css("section.info.text").first
|
||||
end
|
||||
|
||||
def info_text_value_redux(info_section)
|
||||
# binding.pry
|
||||
info_text_elem_redux.
|
||||
css(".highlight").
|
||||
find { |e| e.text == info_section }.
|
||||
parent.children[1..].text.strip
|
||||
end
|
||||
|
||||
def stats_container_redux
|
||||
@elem.css(".stats-container.text")
|
||||
end
|
||||
end
|
||||
|
||||
1260
test/fixtures/files/domain/fa/parser/redux/browse_page.html
vendored
Normal file
1260
test/fixtures/files/domain/fa/parser/redux/browse_page.html
vendored
Normal file
File diff suppressed because one or more lines are too long
686
test/fixtures/files/domain/fa/parser/redux/gallery_page_empty_miles_df.html
vendored
Normal file
686
test/fixtures/files/domain/fa/parser/redux/gallery_page_empty_miles_df.html
vendored
Normal file
File diff suppressed because one or more lines are too long
732
test/fixtures/files/domain/fa/parser/redux/gallery_page_miles_df.html
vendored
Normal file
732
test/fixtures/files/domain/fa/parser/redux/gallery_page_miles_df.html
vendored
Normal file
File diff suppressed because one or more lines are too long
769
test/fixtures/files/domain/fa/parser/redux/gallery_page_with_folders_feretta.html
vendored
Normal file
769
test/fixtures/files/domain/fa/parser/redux/gallery_page_with_folders_feretta.html
vendored
Normal file
File diff suppressed because one or more lines are too long
827
test/fixtures/files/domain/fa/parser/redux/submission_51067333_blauhaher.html
vendored
Normal file
827
test/fixtures/files/domain/fa/parser/redux/submission_51067333_blauhaher.html
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -28,6 +28,7 @@ class Domain::Fa::Parser::ReduxPageTest < ActiveSupport::TestCase
|
||||
|
||||
def test_user_page_is_correct
|
||||
parser = get_parser "user_page_miles_df.html"
|
||||
assert parser.logged_in?
|
||||
assert parser.probably_user_page?
|
||||
up = parser.user_page
|
||||
|
||||
@@ -43,6 +44,118 @@ class Domain::Fa::Parser::ReduxPageTest < ActiveSupport::TestCase
|
||||
assert_equal DateTime.new(2006, 1, 12, 7, 52), up.registered_since
|
||||
end
|
||||
|
||||
def test_gallery_is_correct
|
||||
parser = get_parser "gallery_page_miles_df.html"
|
||||
assert parser.logged_in?
|
||||
assert parser.probably_listings_page?
|
||||
|
||||
listings = parser.submissions_parsed
|
||||
assert_equal 48, listings.length
|
||||
assert_equal 51039083, listings.first.id
|
||||
|
||||
first_listing = listings.first
|
||||
assert_equal "Miles-DF", first_listing.artist
|
||||
assert_equal "/user/miles-df/", first_listing.artist_path
|
||||
assert_equal "booty", first_listing.title
|
||||
assert_equal "/view/51039083/", first_listing.view_path
|
||||
assert_equal "//t.furaffinity.net/51039083@300-1676577120.jpg", first_listing.thumb_path
|
||||
|
||||
assert_equal 2, parser.submission_folders.length
|
||||
|
||||
parser_empty = get_parser "gallery_page_empty_miles_df.html"
|
||||
assert parser_empty.probably_listings_page?
|
||||
assert_equal 0, parser_empty.submissions_parsed.length
|
||||
end
|
||||
|
||||
def test_gallery_is_correct_with_folders
|
||||
parser = get_parser "gallery_page_with_folders_feretta.html"
|
||||
assert parser.probably_listings_page?
|
||||
|
||||
listings = parser.submissions_parsed
|
||||
assert_equal 48, listings.length
|
||||
|
||||
first_listing = listings.first
|
||||
assert_equal 51066954, first_listing.id
|
||||
assert_equal "Feretta", first_listing.artist
|
||||
assert_equal "/user/feretta/", first_listing.artist_path
|
||||
assert_equal "[Comm] Size and property - 2/3", first_listing.title
|
||||
assert_equal "/view/51066954/", first_listing.view_path
|
||||
assert_equal "//t.furaffinity.net/51066954@200-1676761927.jpg", first_listing.thumb_path
|
||||
|
||||
last_listing = listings.last
|
||||
assert_equal 50320568, last_listing.id
|
||||
assert_equal "Feretta", last_listing.artist
|
||||
assert_equal "/user/feretta/", last_listing.artist_path
|
||||
assert_equal "[Comm] Sizable date - 1/3", last_listing.title
|
||||
assert_equal "/view/50320568/", last_listing.view_path
|
||||
assert_equal "//t.furaffinity.net/50320568@200-1671750577.jpg", last_listing.thumb_path
|
||||
|
||||
folders = parser.submission_folders
|
||||
assert_equal({ href: "/scraps/feretta/", title: "Scraps" }, folders[0])
|
||||
assert_equal({ href: "/gallery/feretta/folder/60236/Tale-of-Tails", title: "Tale of Tails" }, folders[1])
|
||||
assert_equal({ href: "/gallery/feretta/folder/60234/Illustrations", title: "Illustrations" }, folders[2])
|
||||
assert_equal({ href: "/gallery/feretta/folder/229520/Marketing", title: "Marketing" }, folders[9])
|
||||
assert_equal({ href: "/gallery/feretta/folder/820310/3D", title: "3D" }, folders[13])
|
||||
end
|
||||
|
||||
def test_browse_page
|
||||
# https://www.furaffinity.net/browse/
|
||||
|
||||
parser = get_parser "browse_page.html"
|
||||
assert parser.probably_listings_page?
|
||||
assert !parser.probably_submission?
|
||||
|
||||
listings = parser.submissions_parsed
|
||||
# FA appears to respond with only 47?
|
||||
assert_equal 47, listings.length
|
||||
|
||||
first = listings.first
|
||||
assert_equal 51067352, first.id
|
||||
assert_equal "CleverDerpy", first.artist
|
||||
assert_equal "/user/cleverderpy/", first.artist_path
|
||||
assert_equal "The Second Kind of Tarpit", first.title
|
||||
assert_equal "/view/51067352/", first.view_path
|
||||
assert_equal "//t.furaffinity.net/51067352@400-1676764142.jpg", first.thumb_path
|
||||
end
|
||||
|
||||
def test_submission_is_correct
|
||||
parser = get_parser "submission_51067333_blauhaher.html"
|
||||
assert parser.probably_submission?
|
||||
|
||||
sub = parser.submission
|
||||
assert_equal 51067333, sub.id
|
||||
assert_equal "BlauHaher", sub.artist
|
||||
assert_equal "/user/blauhaher/", sub.artist_user_page_path
|
||||
assert_equal '"CUTIE!"', sub.title
|
||||
assert_equal "//d.furaffinity.net/art/blauhaher/1676764049/1676764049.blauhaher_хорнь.png", sub.small_img
|
||||
assert_equal "//d.furaffinity.net/art/blauhaher/1676764049/1676764049.blauhaher_хорнь.png", sub.full_res_img
|
||||
assert_equal DateTime.new(2023, 2, 18, 18, 47), sub.posted_date
|
||||
assert_equal "Artwork (Digital) / Doodle", sub.category
|
||||
assert_equal "Unspecified / Any", sub.species
|
||||
assert_equal "Any", sub.gender
|
||||
assert_equal 1, sub.num_favorites
|
||||
assert_equal 0, sub.num_comments
|
||||
assert_equal 17, sub.num_views
|
||||
assert_equal "1024x1024", sub.resolution_str
|
||||
assert_equal [], sub.keywords_array
|
||||
assert sub.description_html =~ /really adore at the way this/
|
||||
|
||||
# parser = get_parser "submission_19190013_korichi.html"
|
||||
# assert parser.probably_submission?
|
||||
|
||||
# sub = parser.submission
|
||||
# assert_equal 19_190_013, sub.id
|
||||
# assert_equal "Korichi", sub.artist
|
||||
# assert_equal "General Furry Art", sub.theme
|
||||
# assert_equal "Artwork (Digital)", sub.category
|
||||
# assert_equal "1280x914", sub.resolution_str
|
||||
# assert_equal 82, sub.num_views
|
||||
# assert_equal "Male", sub.gender
|
||||
# assert_equal "Dragon (Other)", sub.species
|
||||
# assert_equal "Feb 24th, 2016 12:51 AM", sub.posted_date
|
||||
# assert_equal %w[kobold frog hunting male frog mighty hunter], sub.keywords_array
|
||||
end
|
||||
|
||||
def get_parser(file, require_logged_in: true)
|
||||
path = File.join("domain/fa/parser/redux", file)
|
||||
contents = read_fixture_file(path) || raise("Couldn't open #{path}")
|
||||
|
||||
Reference in New Issue
Block a user