234 lines
6.8 KiB
Ruby
234 lines
6.8 KiB
Ruby
# typed: strict
|
|
class Domain::Fa::Post < ReduxApplicationRecord
|
|
self.table_name = "domain_fa_posts"
|
|
|
|
include HasIndexedPost
|
|
include Pundit::Authorization
|
|
include AttrJsonRecordAliases
|
|
|
|
enum :state,
|
|
[
|
|
:ok, # so far so good, post may not yet be scanned or have file downloaded
|
|
:removed, # post has been removed
|
|
:scan_error, # error scanning post page (see state_detail)
|
|
:file_error, # error downloading post file (see state_detail)
|
|
]
|
|
validates_inclusion_of(:state, in: self.states.keys)
|
|
validates_presence_of(:fa_id, :state)
|
|
after_initialize { self.state ||= :ok }
|
|
|
|
belongs_to :creator,
|
|
class_name: "::Domain::Fa::User",
|
|
inverse_of: :posts,
|
|
optional: true,
|
|
autosave: true
|
|
|
|
# If the file was scraped, this is the blob entry that represents it
|
|
belongs_to :file, class_name: "::HttpLogEntry", optional: true, autosave: true
|
|
|
|
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :post
|
|
|
|
has_many :faved_by,
|
|
class_name: "::Domain::Fa::User",
|
|
through: :fav_post_joins,
|
|
source: :user
|
|
|
|
has_one :disco,
|
|
class_name: "::Domain::Fa::PostFactor",
|
|
inverse_of: :post,
|
|
foreign_key: :post_id,
|
|
dependent: :destroy
|
|
|
|
attr_json :title, :string
|
|
attr_json :category, :string
|
|
attr_json :theme, :string
|
|
attr_json :species, :string
|
|
attr_json :gender, :string
|
|
attr_json :description, :string
|
|
attr_json :keywords, :string, array: true, default: []
|
|
attr_json :num_favorites, :integer
|
|
attr_json :num_comments, :integer
|
|
attr_json :num_views, :integer
|
|
attr_json :posted_at, :datetime
|
|
attr_json :scanned_at, :datetime
|
|
|
|
attr_json :last_user_page_id, :integer
|
|
attr_json :last_submission_page_id, :integer
|
|
attr_json :first_browse_page_id, :integer
|
|
attr_json :first_gallery_page_id, :integer
|
|
attr_json :first_seen_entry_id, :integer
|
|
|
|
attr_json :scan_file_error, :string
|
|
|
|
belongs_to :last_user_page, class_name: "HttpLogEntry", optional: true
|
|
belongs_to :last_submission_page, class_name: "HttpLogEntry", optional: true
|
|
belongs_to :first_browse_page, class_name: "HttpLogEntry", optional: true
|
|
belongs_to :first_gallery_page, class_name: "HttpLogEntry", optional: true
|
|
belongs_to :first_seen_entry, class_name: "HttpLogEntry", optional: true
|
|
|
|
sig { params(id: Integer).returns(T.nilable(Domain::Fa::Post)) }
|
|
def self.find_by_fa_id(id)
|
|
where(fa_id: id).first
|
|
end
|
|
|
|
sig { params(id: Integer).returns(Domain::Fa::Post) }
|
|
def self.find_by_fa_id!(id)
|
|
where(fa_id: id).first!
|
|
end
|
|
|
|
sig { returns(String) }
|
|
def to_param
|
|
self.fa_id.to_s
|
|
end
|
|
|
|
sig { returns(T.nilable(Addressable::URI)) }
|
|
def file_uri
|
|
Addressable::URI.parse(self.file_url_str) if self.file_url_str
|
|
end
|
|
|
|
sig { params(uri: T.nilable(T.any(String, Addressable::URI))).void }
|
|
def file_uri=(uri)
|
|
if uri
|
|
uri = Addressable::URI.parse(uri)
|
|
uri.scheme = "https" if uri.scheme.blank?
|
|
self.file_url_str = uri.to_s
|
|
else
|
|
self.file_url_str = nil
|
|
end
|
|
end
|
|
|
|
sig { void }
|
|
def fix_file_by_uri!
|
|
parts = []
|
|
parts << "[fa_id: #{self.fa_id}]"
|
|
file_uri = self.file&.uri
|
|
file_url_str = self.file_url_str
|
|
if file_url_str.present? && file_uri.present? &&
|
|
(file_uri.to_s != file_url_str)
|
|
old_file = self.file
|
|
new_file = HttpLogEntry.find_by_uri_host_path(file_url_str)
|
|
if old_file == new_file
|
|
parts << "[no change][invariant]"
|
|
elsif new_file
|
|
self.file = new_file
|
|
parts << "[old: #{old_file&.id} / #{old_file&.uri.to_s}]"
|
|
parts << "[new: #{self.file&.id} / #{self.file&.uri.to_s}]"
|
|
self.save!
|
|
else
|
|
parts << "[new_file_not_found: #{file_url_str}]"
|
|
end
|
|
else
|
|
parts << "[no change]"
|
|
parts << "[file: #{self.file&.id} / #{self.file&.uri.to_s}]"
|
|
parts << "[file_url_str: #{self.file_url_str}]"
|
|
end
|
|
logger.info parts.join(" ")
|
|
end
|
|
|
|
sig { returns(T::Boolean) }
|
|
def scanned?
|
|
self.file_url_str.present?
|
|
end
|
|
|
|
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
|
|
def scanned_at
|
|
# # at some point, `scanned_at` was populated to avoid having to look up the
|
|
# # post's `last_submission_page` log entry, but we fall back to that
|
|
# # if scanned_at isn't populated yet
|
|
# if state_detail["scanned_at"]
|
|
# Time.at(state_detail["scanned_at"])
|
|
# else
|
|
# last_submission_page&.created_at
|
|
# end
|
|
super || last_submission_page&.created_at
|
|
end
|
|
|
|
# sig { params(time: T.nilable(Time)).void }
|
|
# def scanned_at=(time)
|
|
# self.state_detail["scanned_at"] = time&.to_i
|
|
# end
|
|
|
|
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
|
|
def guess_posted_at
|
|
pa = posted_at
|
|
return pa if pa
|
|
begin
|
|
contents = guess_last_submission_page&.response_bytes
|
|
if contents
|
|
parser = Domain::Fa::Parser::Page.new(contents)
|
|
parser.submission.posted_date if parser.probably_submission?
|
|
end
|
|
end
|
|
end
|
|
|
|
# sig { params(log_entry: T.nilable(HttpLogEntry)).void }
|
|
# def last_submission_page=(log_entry)
|
|
# self.last_submission_page_id = log_entry&.id
|
|
# end
|
|
|
|
# sig { returns(T.nilable(HttpLogEntry)) }
|
|
# def last_submission_page
|
|
# HttpLogEntry.find_by(id: self.last_submission_page_id)
|
|
# end
|
|
|
|
sig { returns(T.nilable(HttpLogEntry)) }
|
|
def guess_last_submission_page
|
|
last_submission_page ||
|
|
begin
|
|
HttpLogEntry
|
|
.where(
|
|
uri_host: "www.furaffinity.net",
|
|
uri_path: ["/view/#{self.fa_id}/", "/view/#{self.fa_id}"],
|
|
uri_query: nil,
|
|
status_code: 200,
|
|
)
|
|
.order(created_at: :desc)
|
|
.first
|
|
end
|
|
end
|
|
|
|
sig { returns(T.nilable(String)) }
|
|
def description
|
|
content = super
|
|
return nil if content.nil? || content.blank?
|
|
|
|
# this is a hack to remove the first two lines of the description, which are
|
|
# always empty and a <br><br>
|
|
lines = content.lines.map(&:strip).map(&:chomp)
|
|
if lines.length > 3
|
|
if lines[0] == "" && lines[1]&.start_with?("<a href=") &&
|
|
lines[2] == "<br><br>"
|
|
return (lines[3..] || []).join("\n")
|
|
end
|
|
end
|
|
content
|
|
end
|
|
|
|
sig { returns(T::Boolean) }
|
|
def have_file?
|
|
self.file_id.present?
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
submission: T.untyped,
|
|
first_seen_log_entry: T.nilable(HttpLogEntry),
|
|
).returns(Domain::Fa::Post)
|
|
end
|
|
def self.find_or_initialize_by_submission_parser(
|
|
submission,
|
|
first_seen_log_entry: nil
|
|
)
|
|
creator =
|
|
Domain::Fa::User.find_or_create_by!(
|
|
{ url_name: submission.artist_url_name },
|
|
) { |user| user.name = submission.artist }
|
|
|
|
Domain::Fa::Post.find_or_initialize_by(fa_id: submission.id) do |post|
|
|
post.creator = creator
|
|
post.title = submission.title
|
|
post.first_seen_entry = first_seen_log_entry
|
|
end
|
|
end
|
|
end
|