Files
redux-scraper/app/jobs/domain/inkbunny/job/latest_posts_job.rb
2024-12-19 06:04:37 +00:00

173 lines
6.3 KiB
Ruby

module Domain::Inkbunny::Job
class LatestPostsJob < Base
def perform(args)
url =
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
@api_search_response =
http_client.post(
url,
caused_by_entry: @first_browse_page_entry || @caused_by_entry
)
if @api_search_response.status_code != 200
fatal_error("api_search failed: #{@api_search_response.status_code}")
end
api_search_json = JSON.parse(@api_search_response.body)
handle_search_response(api_search_json)
end
private
def handle_search_response(api_search_json)
ib_submission_jsons = api_search_json["submissions"]
ib_submission_ids =
ib_submission_jsons.map { |j| j["submission_id"]&.to_i }
@ib_post_id_to_model =
Domain::Inkbunny::Post
.where(ib_post_id: ib_submission_ids)
.includes(:files, :creator)
.index_by(&:ib_post_id)
new_posts = []
users = []
ib_submission_jsons.each do |submission_json|
ib_post_id = submission_json["submission_id"]&.to_i
unless @ib_post_id_to_model[ib_post_id]
post = Domain::Inkbunny::Post.new({ ib_post_id: ib_post_id })
user =
Domain::Inkbunny::User.find_or_initialize_by(
{ ib_user_id: submission_json["user_id"].to_i }
) { |user| user.name = submission_json["username"] }
user.save!
post.creator = user
new_posts << post
@ib_post_id_to_model[ib_post_id] = post
end
end
Domain::Inkbunny::Post.transaction do
users.select { |user| user.new_record? || user.changed? }.each(&:save!)
new_posts.each(&:save!)
end
# do shallow updates of all posts
needs_deep_update_posts = []
Domain::Inkbunny::Post.transaction do
ib_submission_jsons.each do |submission_json|
needs_deep_update, post =
shallow_update_post_from_submission_json(submission_json)
needs_deep_update_posts << post if needs_deep_update
end
end
# TODO - check condition for needing a deep update
# Such as:
# - Never been deep updated before
# - Number of files changed
# - Latest file updated timestamp changed
# - Don't have a user avatar yet
if needs_deep_update_posts.any?
ids_list = needs_deep_update_posts.map(&:ib_post_id).join(",")
url =
"https://inkbunny.net/api_submissions.php?" +
"submission_ids=#{ids_list}" +
"&show_description=yes&show_writing=yes&show_pools=yes"
@api_submissions_response =
http_client.get(url, caused_by_entry: @api_search_response.log_entry)
if @api_submissions_response.status_code != 200
fatal_error(
"api_submissions failed: #{@api_submissions_response.status_code}"
)
end
api_submissions_json = JSON.parse(@api_submissions_response.body)
submissions = api_submissions_json["submissions"]
logger.info("api_submissions page has #{submissions.size} posts")
submissions.each do |submission_json|
Domain::Inkbunny::Post.transaction do
deep_update_post_from_submission_json(submission_json)
end
end
end
end
def shallow_update_post_from_submission_json(json)
post = post_for_json(json)
post.shallow_updated_at = Time.now
post.title = json["title"]
post.posted_at = Time.parse json["create_datetime"]
post.last_file_updated_at = Time.parse json["last_file_update_datetime"]
post.num_files = json["pagecount"]&.to_i
post.rating = json["rating_id"]&.to_i
post.submission_type = json["submission_type_id"]&.to_i
post.ib_detail_raw = json
needs_deep_update =
post.last_file_updated_at_changed? || post.num_files_changed? ||
post.files.count != post.num_files
post.save!
[needs_deep_update, post]
end
def deep_update_post_from_submission_json(submission_json)
post = post_for_json(submission_json)
logger.info "deep update post #{post.ib_post_id.to_s.bold}"
post.deep_updated_at = Time.now
post.description = submission_json["description"]
# TODO - enqueue avatar download job if needed
if submission_json["user_icon_url_large"]
post.creator.avatar_url_str = submission_json["user_icon_url_large"]
post.creator.save! if post.creator.changed?
end
post_files_by_md5 = post.files.index_by(&:md5_initial)
file_jsons = submission_json["files"] || fatal_error("no files[] array")
file_jsons.each do |file_json|
md5_initial = file_json["initial_file_md5"]
next if post_files_by_md5[md5_initial]
md5_full = file_json["full_file_md5"]
file =
post.files.create!(
{
ib_file_id: file_json["file_id"]&.to_i,
ib_created_at: Time.parse(file_json["create_datetime"]),
file_order: file_json["submission_file_order"]&.to_i,
ib_detail_raw: file_json,
file_name: file_json["file_name"],
url_str: file_json["file_url_full"],
md5_initial: md5_initial,
md5_full: md5_full,
md5s: {
initial_file_md5: md5_initial,
full_file_md5: file_json["full_file_md5"],
large_file_md5: file_json["large_file_md5"],
small_file_md5: file_json["small_file_md5"],
thumbnail_md5: file_json["thumbnail_md5"]
}
}
)
logger.info "[ib_post_id #{post.ib_post_id.to_s.bold}] " +
"new file #{file.ib_file_id.to_s.bold} - #{file.file_name.black.bold}"
defer_job(
Domain::Inkbunny::Job::FileJob,
{ file: file, caused_by_entry: @api_submissions_response.log_entry }
)
end
post.save!
end
def post_for_json(submission_json)
post_id =
submission_json["submission_id"]&.to_i ||
fatal_error(
"submission_id not found in submission_json: #{submission_json.keys.join(", ")}"
)
@ib_post_id_to_model[post_id] ||
fatal_error("post not found for ib_post_id #{post_id}")
end
end
end