Files
redux-scraper/app/jobs/domain/fa/job/scan_user_utils.rb
2025-01-01 03:29:53 +00:00

117 lines
3.8 KiB
Ruby

# typed: true
module Domain::Fa::Job
class ScanUserUtils
DISABLED_PAGE_PATTERNS = [
/User ".+" has voluntarily disabled access/,
/User ".+" was not found in our database./,
/The page you are trying to reach is currently pending deletion/,
]
def self.user_disabled_or_not_found?(user, response)
if DISABLED_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
user.state = :scan_error
user.state_detail ||= {}
user.state_detail[
"scan_error"
] = "account disabled or not found, see last_scanned_page_id"
user.state_detail["last_scanned_page_id"] = response.log_entry.id
user.save!
true
else
false
end
end
def self.check_disabled_or_not_found(user, response)
if response.status_code != 200
return [
:fatal,
{
message:
"http #{response.status_code}, log entry #{response.log_entry.id}",
}
]
end
page =
Domain::Fa::Parser::Page.new(response.body, require_logged_in: false)
return :ok, { page: page } if page.probably_user_page?
if response.body =~ /has voluntarily disabled access/
user.state = :scan_error
user.state_detail = {
scan_error:
"(user scan) user has disabled account, see last_user_page_id",
last_user_page_id: response.log_entry.id,
}
try_name = /User "(.+)" has voluntarily disabled/.match(response.body)
user.name ||= try_name && try_name[1] || user.url_name
user.save!
return :stop, { message: "account disabled" }
end
if response.body =~ /This user cannot be found./ ||
response.body =~
/The page you are trying to reach is currently pending deletion/
user.state = :scan_error
user.state_detail = {
scan_error: "(user scan) user was not found, see last_user_page_id",
last_user_page_id: response.log_entry.id,
}
user.name ||= user.url_name
user.save!
return :stop, { message: "account not found" }
end
return [
:fatal,
{ message: "not a user page - log entry #{response.log_entry.id}" }
]
end
def self.update_user_fields_from_page(user, page, response)
user_page = page.user_page
user.name = user_page.name
user.registered_at = user_page.registered_since
user.num_pageviews = user_page.num_pageviews
user.num_submissions = user_page.num_submissions
user.num_comments_recieved = user_page.num_comments_recieved
user.num_comments_given = user_page.num_comments_given
user.num_journals = user_page.num_journals
user.num_favorites = user_page.num_favorites
user.profile_html =
user_page.profile_html.encode(
"UTF-8",
invalid: :replace,
undef: :replace,
)
user.log_entry_detail["last_user_page_id"] = response.log_entry.id
avatar = user.ensure_avatar!
user.avatar.file_uri = user_page.profile_thumb_url
if user.avatar.changed?
user.avatar.save!
Domain::Fa::Job::UserAvatarJob.perform_later(
{ user: user, caused_by_entry: response.log_entry },
)
end
end
# names is an array of structs - [Struct(:name, :url_name)]
def self.find_or_create_by_names(names, caused_by_entry: nil)
users = Domain::Fa::User.where(url_name: names.map(&:url_name)).to_a
missing =
names.reject { |name| users.any? { |u| u.url_name == name.url_name } }
missing.each do |name|
user =
Domain::Fa::User.create!(url_name: name.url_name, name: name.name)
Domain::Fa::Job::UserPageJob.perform_later(
{ user: user, caused_by_entry: caused_by_entry },
)
users << user
end
users
end
end
end