more parsing fixes

This commit is contained in:
Dylan Knutson
2023-05-16 23:08:20 -07:00
parent c34faef0dc
commit df712f65db
4 changed files with 1262 additions and 5 deletions

View File

@@ -34,9 +34,10 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
strip
if name
if @elem.css("img.userIcon.type-admin").first
if @elem.css("userpage-nav-header img.userIcon.type-admin").first
# in newer versions of FA user pages, admins have no '@' prefix,
# but rather an indicator image icon
raise("invalid prefix for admin user name: #{name}") if PREFIXES.include?(name[0])
name
elsif PREFIXES.include?(name[0])
name[1..]

View File

@@ -5,21 +5,23 @@ namespace :fa do
Domain::Fa::User.find_each(start: start_at, batch_size: 100) do |user|
page_log_entry = user.guess_user_page_log_entry
unless page_log_entry
Rails.logger.error("user #{user.id} / #{user.url_name} does not have a scanned user page")
# Rails.logger.error("user #{user.id} / #{user.url_name} does not have a scanned user page")
next
end
parser = Domain::Fa::Parser::Page.new(page_log_entry.response.contents, require_logged_in: false)
unless parser.probably_user_page?
Rails.logger.error("user #{user.id} / #{user.url_name} page #{page_log_entry.id} is not a user page")
# Rails.logger.error("user #{user.id} / #{user.url_name} page #{page_log_entry.id} is not a user page")
next
end
old_name = user.name
new_name = parser.user_page.name
user.name = new_name
if old_name != new_name
user.name = new_name
user.save!
puts ""
Rails.logger.info("updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}")
else
Rails.logger.info("no change #{user.id} / #{user.url_name} - #{old_name}")
print "."
end
end
end

View File

@@ -56,6 +56,16 @@ describe Domain::Fa::Parser::Page do
assert_match /Site Owner/, up.profile_html
end
it "correctly parses a user when there's an admin elsewhere on the page" do
parser = get_parser "user_page_childofpeace.html"
assert parser.logged_in?
assert_page_type parser, :probably_user_page?
up = parser.user_page
assert_equal "Childofpeace2004", up.name
assert_equal 419, up.num_pageviews
assert_match /Not Available/, up.profile_html
end
it "gallery_is_correct" do
parser = get_parser "gallery_page_miles_df.html"
assert parser.logged_in?

File diff suppressed because it is too large Load Diff