Files
redux-scraper/app/models/domain/fa/post.rb
2024-12-25 21:53:47 +00:00

177 lines
4.4 KiB
Ruby

class Domain::Fa::Post < ReduxApplicationRecord
self.table_name = "domain_fa_posts"
has_lite_trail(
schema_version: 1,
map_attribute: {
file_sha256: ::Sha256AttributeMapper,
},
)
include HasIndexedPost
enum :state,
[
:ok, # so far so good, post may not yet be scanned or have file downloaded
:removed, # post has been removed
:scan_error, # error scanning post page (see state_detail)
:file_error, # error downloading post file (see state_detail)
]
validates_inclusion_of(:state, in: self.states.keys)
after_initialize do
self.state_detail ||= {}
if self.state_detail == "d9.facdn.net is unresolvable"
self.state_detail = { file_error: "d9.facdn.net is unresolvable" }
end
self.log_entry_detail ||= {}
self.state ||= :ok
end
validates_presence_of(:fa_id, :state)
belongs_to :creator,
class_name: "::Domain::Fa::User",
inverse_of: :posts,
optional: true,
autosave: true
# If the file was scraped, this is the blob entry that represents it
belongs_to :file,
class_name: "::HttpLogEntry",
optional: :true,
autosave: true
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :post
has_many :faved_by,
class_name: "::Domain::Fa::User",
through: :fav_post_joins,
source: :user
has_one :disco,
class_name: "::Domain::Fa::PostFactor",
inverse_of: :post,
foreign_key: :post_id,
dependent: :destroy
def to_param
self.fa_id.to_s
end
def file_uri
Addressable::URI.parse(self.file_url_str) if self.file_url_str
end
def file_uri=(uri)
if uri
uri = Addressable::URI.parse(uri)
uri.scheme = "https" if uri.scheme.blank?
self.file_url_str = uri.to_s
else
self.file_url_str = nil
end
end
def thumbnail_uri
if self.state_detail["thumbnail_url_str"]
Addressable::URI.parse(self.state_detail["thumbnail_url_str"])
else
nil
end
end
def thumbnail_uri=(uri)
if uri
uri = Addressable::URI.parse(uri)
uri.scheme = "https" if uri.scheme.blank?
self.state_detail["thumbnail_url_str"] = uri.to_s
else
self.state_detail["thumbnail_url_str"] = nil
end
end
def scanned?
self.file_url_str.present?
end
def scanned_at
# at some point, `scanned_at` was populated to avoid having to look up the
# post's `last_submission_page` log entry, but we fall back to that
# if scanned_at isn't populated yet
if state_detail["scanned_at"]
Time.at(state_detail["scanned_at"])
else
last_submission_page&.created_at
end
end
def scanned_at=(time)
unless time.nil?
unless time.is_a?(Time)
raise ArgumentError("time must be Time, was #{time.class}")
end
end
self.state_detail["scanned_at"] = time&.to_i
end
def posted_at
pa = super
return pa if pa
@posted_at ||=
begin
contents = guess_last_submission_page&.response&.contents
if contents
parser = Domain::Fa::Parser::Page.new(contents)
parser.submission.posted_date if parser.probably_submission?
end
end
end
def last_submission_page=(log_entry)
self.log_entry_detail["last_submission_page_id"] = log_entry.id
end
def last_submission_page
HttpLogEntry.find_by_id(self.log_entry_detail["last_submission_page_id"])
end
def guess_last_submission_page
last_submission_page ||
begin
HttpLogEntry
.where(
uri_host: "www.furaffinity.net",
uri_path: ["/view/#{self.fa_id}/", "/view/#{self.fa_id}"],
uri_query: nil,
status_code: 200,
)
.order(created_at: :desc)
.first
end
end
def have_file?
self.file_id.present?
end
def self.hash_from_submission_parser_helper(
submission,
first_seen_log_entry: nil
)
creator =
Domain::Fa::User.find_or_create_by(
{ url_name: submission.artist_url_name },
) { |user| user.name = submission.artist }
{
fa_id: submission.id,
creator_id: creator.id,
title: submission.title,
state_detail: {
"first_seen_entry" => first_seen_log_entry&.id,
"thumbnail_url_str" => submission.thumb_path,
},
}
end
end