Refactor Inkbunny job handling and enhance gallery scanning

- Removed the `ignore_signature_args :caused_by_entry` from multiple Inkbunny job classes to streamline argument handling.
- Introduced `scanned_gallery_at` attribute in the Inkbunny user model to manage gallery scan timing.
- Added `UserGalleryJob` to handle gallery scanning for Inkbunny users, ensuring efficient processing of user submissions.
- Implemented `ApiSearchPageProcessor` for processing API search results, improving modularity and reusability.
- Updated `LatestPostsJob` to utilize the new processor for fetching and processing submissions.
- Enhanced tests for Inkbunny jobs, ensuring robust coverage for new functionality and improved job argument handling.
This commit is contained in:
Dylan Knutson
2024-12-31 20:08:33 +00:00
parent 93259d2676
commit fc8b75f014
35 changed files with 964 additions and 394 deletions

View File

@@ -1,6 +1,5 @@
class Domain::E621::Job::Base < Scraper::JobBase
discard_on ActiveJob::DeserializationError
def self.http_factory_method
:get_e621_http_client
end

View File

@@ -1,7 +1,6 @@
module Domain::E621::Job
class PostsIndexJob < Base
queue_as :e621
ignore_signature_args :caused_by_entry
def perform(args)
caused_by_entry = args[:caused_by_entry]

View File

@@ -1,7 +1,6 @@
module Domain::E621::Job
class ScanPostJob < Base
queue_as :e621
ignore_signature_args :caused_by_entry
def perform(args)
post = args[:post] || raise("no post provided")

View File

@@ -1,7 +1,6 @@
module Domain::E621::Job
class StaticFileJob < Base
queue_as :static_file
ignore_signature_args :caused_by_entry
def perform(args)
post = args[:post] || fatal_error("post is required")

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
queue_as :fa_browse_page
ignore_signature_args :caused_by_entry
def perform(args)
@caused_by_entry = args[:caused_by_entry]
@@ -17,7 +16,7 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
end
logger.info(
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts across #{@page_number.to_s.bold} pages"
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts across #{@page_number.to_s.bold} pages",
)
end
@@ -33,14 +32,14 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
response =
http_client.get(
url,
caused_by_entry: @first_browse_page_entry || @caused_by_entry
caused_by_entry: @first_browse_page_entry || @caused_by_entry,
)
log_entry = response.log_entry
@first_browse_page_entry ||= log_entry
if response.status_code != 200
fatal_error(
"non 200 response for /browse: #{response.status_code.to_s.underline}"
"non 200 response for /browse: #{response.status_code.to_s.underline}",
)
end
@@ -52,7 +51,7 @@ class Domain::Fa::Job::BrowsePageJob < Domain::Fa::Job::Base
log_entry,
enqueue_posts_pri: :high,
page_desc: "Browse@#{@page_number}",
fill_id_gaps: true
fill_id_gaps: true,
)
@total_num_new_posts_seen += listing_page_stats.new_seen

View File

@@ -4,7 +4,6 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
USERS_PER_FULL_PAGE = Rails.env.test? ? 9 : 190
queue_as :fa_user_favs
ignore_signature_args :caused_by_entry
def perform(args)
@first_job_entry = nil

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
queue_as :fa_browse_page
ignore_signature_args :caused_by_entry
def perform(args)
@caused_by_entry = args[:caused_by_entry]
@@ -12,7 +11,7 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
scan_home_page
logger.info(
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts"
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts",
)
end
@@ -27,7 +26,7 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
if response.status_code != 200
fatal_error(
"non 200 response for /: #{response.status_code.to_s.underline}"
"non 200 response for /: #{response.status_code.to_s.underline}",
)
end
@@ -39,7 +38,7 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
log_entry,
enqueue_posts_pri: :high,
page_desc: "HomePage",
continue_for: @continue_for
continue_for: @continue_for,
)
@total_num_new_posts_seen += listing_page_stats.new_seen

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
queue_as :static_file
ignore_signature_args :caused_by_entry
def perform(args)
@post = args[:post]
@@ -12,7 +11,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
if args[:fa_id]
defer_job(
Domain::Fa::Job::ScanPostJob,
{ fa_id: args[:fa_id], caused_by_entry: @caused_by_entry }
{ fa_id: args[:fa_id], caused_by_entry: @caused_by_entry },
)
end
return
@@ -85,7 +84,7 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
if response.status_code != 200
defer_job(
Domain::Fa::Job::ScanPostJob,
{ post: @post, caused_by_entry: response.log_entry, force_scan: true }
{ post: @post, caused_by_entry: response.log_entry, force_scan: true },
)
err_msg =

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
queue_as :fa_post
ignore_signature_args :caused_by_entry
def perform(args)
@post =
@@ -28,7 +27,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
defer_job(
Domain::Fa::Job::ScanFileJob,
{ post: @post, caused_by_entry: @submission_entry || @caused_by_entry },
{ priority: self.priority }
{ priority: self.priority },
)
end
@@ -37,7 +36,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
if @submission_entry && @submission_entry.status_code == 200
enqueue_jobs_from_found_links(
@submission_entry,
suppress_jobs: [{ job: self.class, fa_id: @post.fa_id }]
suppress_jobs: [{ job: self.class, fa_id: @post.fa_id }],
)
end
logger.info "finished post scan"
@@ -49,11 +48,11 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
response =
http_client.get(
"https://www.furaffinity.net/view/#{@post.fa_id}/",
caused_by_entry: @caused_by_entry
caused_by_entry: @caused_by_entry,
)
if response.status_code != 200
fatal_error(
"error scanning fa_id #{@post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}"
"error scanning fa_id #{@post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}",
)
end
@@ -94,7 +93,7 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
submission.description_html.encode(
"UTF-8",
invalid: :replace,
undef: :replace
undef: :replace,
)
@post.keywords = submission.keywords_array
@post.file_uri = submission.full_res_img

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
queue_as :fa_user_avatar
ignore_signature_args :caused_by_entry
def perform(args)
init_from_args!(args, build_user: false)

View File

@@ -8,14 +8,13 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
USERS_PER_FULL_PAGE = Rails.env.test? ? 9 : 190
queue_as :fa_user_follows
ignore_signature_args :caused_by_entry
def perform(args)
init_from_args!(args)
@first_job_entry = nil
if !@user.due_for_follows_scan? && !@force_scan
logger.warn(
"scanned #{time_ago_in_words(@user.scanned_follows_at)}, skipping"
"scanned #{time_ago_in_words(@user.scanned_follows_at)}, skipping",
)
return
end
@@ -46,7 +45,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
end
if to_add.any?
@user.follower_joins.insert_all!(
to_add.map { |id| { followed_id: id } }
to_add.map { |id| { followed_id: id } },
)
end
@user.scanned_follows_at = Time.now
@@ -58,7 +57,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
if @created_user
logger.info("user was new record, enqueue page scan job")
Domain::Fa::Job::UserPageJob.perform_later(
{ user: @user, caused_by_entry: best_caused_by_entry }
{ user: @user, caused_by_entry: best_caused_by_entry },
)
end
end
@@ -79,7 +78,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
if response.status_code != 200
fatal_error(
"http #{response.status_code.to_s.red.bold}, " +
"log entry #{response.log_entry.id.to_s.bold}"
"log entry #{response.log_entry.id.to_s.bold}",
)
end
@@ -93,7 +92,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
if user_list.last.url_name == @last_in_user_list
logger.info(
"page #{@page_number.to_s.bold} saw same user as last page, break"
"page #{@page_number.to_s.bold} saw same user as last page, break",
)
return :break
end
@@ -119,8 +118,8 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
url_name: user.url_name,
name: user.name,
state_detail: {
"first_seen_entry" => response.log_entry.id
}
"first_seen_entry" => response.log_entry.id,
},
}
end
@@ -130,14 +129,14 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
users_to_create_hashes,
unique_by: :url_name,
update_only: :url_name,
returning: %i[id url_name]
returning: %i[id url_name],
)
.map { |row| row["id"] } unless users_to_create_hashes.empty?
logger.info [
"[page #{@page_number.to_s.bold}]",
"[users: #{user_list.length.to_s.bold}]",
"[created: #{users_to_create_hashes.size.to_s.bold}]"
"[created: #{users_to_create_hashes.size.to_s.bold}]",
].join(" ")
enqueue_new_user_pagescan_jobs(users_to_create_hashes)
@@ -154,8 +153,8 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
Domain::Fa::Job::UserPageJob.perform_later(
{
url_name: user_hash[:url_name],
caused_by_entry: best_caused_by_entry
}
caused_by_entry: best_caused_by_entry,
},
)
end
end

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
queue_as :fa_user_gallery
ignore_signature_args :caused_by_entry
MAX_PAGE_NUMBER = 350
@@ -25,7 +24,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
if !@user.due_for_gallery_scan? && !@force_scan
logger.warn(
"gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping"
"gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping",
)
return
end
@@ -35,8 +34,8 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
Set.new(
[
{ href: "/gallery/#{@user.url_name}/", title: "Main Gallery" },
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" }
]
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" },
],
)
until (@folders - @visited).empty?
@@ -61,7 +60,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
while true
if page_number >= @max_page_number
fatal_error(
"hit max #{page_number}) pages, num submissions: #{@user.num_submissions}"
"hit max #{page_number}) pages, num submissions: #{@user.num_submissions}",
)
end
@@ -77,13 +76,13 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
if response.status_code != 200
fatal_error(
"http #{response.status_code}, log entry #{response.log_entry.id}"
"http #{response.status_code}, log entry #{response.log_entry.id}",
)
end
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
@user,
response
response,
)
logger.error("account disabled / not found, abort")
return :break
@@ -102,7 +101,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
enqueue_posts_pri: :low,
enqueue_gallery_scan: false,
enqueue_page_scan: false,
page_desc: "#{folder[:title]}@#{page_number}"
page_desc: "#{folder[:title]}@#{page_number}",
)
total_num_new_posts_seen += listing_page_stats.new_seen
total_num_posts_seen += listing_page_stats.total_seen
@@ -119,7 +118,7 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
end

View File

@@ -1,7 +1,6 @@
module Domain::Fa::Job
class UserIncrementalJob < Base
queue_as :fa_user_page
ignore_signature_args :caused_by_entry
def perform(args)
init_from_args!(args)
@@ -20,7 +19,7 @@ module Domain::Fa::Job
if !@user.due_for_incremental_scan? && !@force_scan
logger.warn(
"scanned #{@user.time_ago_for_incremental_scan.bold}, skipping"
"scanned #{@user.time_ago_for_incremental_scan.bold}, skipping",
)
return
end
@@ -28,14 +27,14 @@ module Domain::Fa::Job
response =
http_client.get(
"https://www.furaffinity.net/user/#{@user.url_name}/",
caused_by_entry: @caused_by_entry
caused_by_entry: @caused_by_entry,
)
@log_entry = response.log_entry
ret, opts =
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
@user,
response
response,
)
case ret
when :ok
@@ -50,7 +49,7 @@ module Domain::Fa::Job
Domain::Fa::Job::ScanUserUtils.update_user_fields_from_page(
@user,
page,
response
response,
)
check_favs(@user, page.user_page.recent_fav_fa_ids)
@@ -65,7 +64,7 @@ module Domain::Fa::Job
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
end
@@ -74,7 +73,7 @@ module Domain::Fa::Job
recent_fav_posts =
Domain::Fa::Job::ScanPostUtils.find_or_create_by_fa_ids(
recent_fav_fa_ids,
caused_by_entry: @log_entry
caused_by_entry: @log_entry,
)
recent_fav_post_ids = recent_fav_posts.map(&:id)
@@ -91,17 +90,17 @@ module Domain::Fa::Job
if num_missing >= 0
logger.info("add #{num_missing.to_s.bold} new favs for user")
@user.fav_post_joins.insert_all!(
missing_fav_post_ids.map { |post_id| { post_id: post_id } }
missing_fav_post_ids.map { |post_id| { post_id: post_id } },
)
end
if missing_fav_post_ids.include? recent_fav_post_ids.last
logger.info(
"last fav is new (#{num_missing.to_s.bold} missing), enqueue full favs scan"
"last fav is new (#{num_missing.to_s.bold} missing), enqueue full favs scan",
)
defer_job(
Domain::Fa::Job::FavsJob,
{ user: user, caused_by_entry: @log_entry }
{ user: user, caused_by_entry: @log_entry },
)
else
@user.scanned_favs_at = Time.now
@@ -128,7 +127,7 @@ module Domain::Fa::Job
num_missing = missing.size
user.followed_joins.insert_all!(
missing.map { |watcher| { follower_id: watcher.id } }
missing.map { |watcher| { follower_id: watcher.id } },
)
logger.info("added #{num_missing.to_s.bold} new watchers")
end
@@ -150,14 +149,14 @@ module Domain::Fa::Job
num_missing = missing.size
user.follower_joins.insert_all!(
missing.map { |watcher| { followed_id: watcher.id } }
missing.map { |watcher| { followed_id: watcher.id } },
)
logger.info("added #{num_missing.to_s.bold} new users watched")
if missing.any? { |w| w.url_name == recent_watching.last.url_name }
logger.info("last user watched is new, enqueue full follows scan")
Domain::Fa::Job::UserFollowsJob.perform_later(
{ user: user, caused_by_entry: @log_entry }
{ user: user, caused_by_entry: @log_entry },
)
else
@user.scanned_follows_at = Time.now

View File

@@ -1,6 +1,5 @@
class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
queue_as :fa_user_page
ignore_signature_args :caused_by_entry
def perform(args)
init_from_args!(args)
@@ -10,7 +9,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
if !@user.due_for_page_scan? && !@force_scan
logger.warn(
"scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping"
"scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping",
)
return
end
@@ -18,13 +17,13 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
response =
http_client.get(
"https://www.furaffinity.net/user/#{@user.url_name}/",
caused_by_entry: @caused_by_entry
caused_by_entry: @caused_by_entry,
)
ret, opts =
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
@user,
response
response,
)
case ret
when :ok
@@ -39,7 +38,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
Domain::Fa::Job::ScanUserUtils.update_user_fields_from_page(
@user,
page,
response
response,
)
@user.scanned_page_at = Time.now
@user.save!
@@ -48,7 +47,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
if response && response.status_code == 200
enqueue_jobs_from_found_links(
response.log_entry,
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
suppress_jobs: [{ job: self.class, url_name: @user.url_name }],
)
end
end

View File

@@ -0,0 +1,162 @@
# Processes a page of submissions from the API search endpoint.
#
# This is a separate class so it can be used by the UserGalleryJob, which
# needs to process multiple pages of submissions, and also by the
# LatestPostsJob, which needs to process a single page of submissions.
class Domain::Inkbunny::Job::ApiSearchPageProcessor
SUBMISSIONS_PER_PAGE = 100
MAX_LOOP_COUNT = 50
attr_reader :changed_posts
def initialize
@shallow_posts_by_ib_post_id = {}
@users_by_ib_user_id = {}
@changed_posts = []
@total_new_posts = 0
end
def all_posts
@shallow_posts_by_ib_post_id.values
end
def all_users
@users_by_ib_user_id.values
end
# Process a page of submissions from the API search endpoint.
#
# Returns a hash with the following keys:
# - num_total_posts: the total number of posts processed for all submission_jsons so far
# - num_total_users: the total number of users processed for all submission_jsons so far
# - num_total_changed_posts: the number of posts that were changed for all submission_jsons so far
# - num_total_changed_users: the number of users that were changed for all submission_jsons so far
# - num_total_new_posts: the total number of new posts processed for all submission_jsons so far
# - num_new_posts: the number of new posts in the page for the submission_json that was just processed (indicates to the caller that the next page should be fetched)
# - num_pages: the total number of pages in the submission_json that was just processed
# - rid: the RID for the submission_json that was just processed
def process!(submissions_json)
num_new_posts = 0
submissions_json["submissions"].each do |submission_json|
if upsert_post_from_submission_json!(submission_json)
num_new_posts += 1
@total_new_posts += 1
end
end
{
num_total_posts: @shallow_posts_by_ib_post_id.size,
num_total_users: @users_by_ib_user_id.size,
num_total_changed_posts: @changed_posts.size,
num_total_new_posts: @total_new_posts,
num_new_posts: num_new_posts,
num_pages: submissions_json["pages_count"]&.to_i,
rid: submissions_json["rid"],
}
end
# Builds the URL for the API search endpoint.
#
# If rid is provided, then the page is for a specific RID set, constructed from a previous API search.
# If ib_user_id is provided, then the page is for the user's gallery.
# If neither is provided, then the page is for the latest posts.
def self.build_api_search_url(
ib_user_id: nil,
username: nil,
rid: nil,
page: 1
)
base_url = URI.parse("https://inkbunny.net/api_search.php")
query_params = {
"submissions_per_page" => SUBMISSIONS_PER_PAGE,
"page" => page,
}
if rid.present?
query_params["rid"] = rid
elsif ib_user_id.present?
query_params["get_rid"] = "yes"
query_params["orderby"] = "create_datetime"
query_params["user_id"] = ib_user_id
query_params["keywords"] = "no"
else
query_params["get_rid"] = "yes"
query_params["orderby"] = "create_datetime"
query_params["keywords"] = "no"
end
base_url.query = URI.encode_www_form(query_params)
base_url.to_s
end
private
# Update a post from a submission JSON. Submission JSONs are returned from
# the API search endpoint, which contains a shallow version of post data.
#
# Returns true if the post was newly created.
#
# Expected information from the endpoint (see fixture api_search.json):
# - submission_id
# - title
# - create_datetime
# - last_file_update_datetime
# - pagecount
# - rating_id
# - submission_type_id
def upsert_post_from_submission_json!(submission_json)
ib_post_id = submission_json["submission_id"]&.to_i
raise "ib_post_id is blank" if ib_post_id.blank?
if post = @shallow_posts_by_ib_post_id[ib_post_id]
return post
end
post =
Domain::Inkbunny::Post.includes(:creator).find_or_initialize_by(
ib_post_id: ib_post_id,
)
@shallow_posts_by_ib_post_id[ib_post_id] = post
is_new_post = post.new_record?
creator = upsert_user_from_submission_json!(submission_json)
if post.creator && post.creator.ib_user_id != creator.ib_user_id
raise "post.creator.ib_user_id != creator.ib_user_id"
end
post.creator = creator
post.shallow_updated_at = Time.now
post.title = submission_json["title"]
post.posted_at = Time.parse submission_json["create_datetime"]
post.last_file_updated_at =
Time.parse submission_json["last_file_update_datetime"]
post.num_files = submission_json["pagecount"]&.to_i
post.rating = submission_json["rating_id"]&.to_i
post.submission_type = submission_json["submission_type_id"]&.to_i
post.ib_detail_raw["submission_json"] = submission_json
if post.changed? || post.files.count != post.num_files ||
post.creator.avatar_url_str.blank?
@changed_posts << post
end
post.save! if post.changed?
is_new_post
end
# Update a user from a submission JSON. Submission JSONs are returned from
# the API search endpoint, which contains a shallow version of user data.
#
# Expected information from the endpoint (see fixture api_search.json):
# - username
# - user_id
def upsert_user_from_submission_json!(submission_json)
ib_user_id = submission_json["user_id"]&.to_i
if user = @users_by_ib_user_id[ib_user_id]
return user
end
raise "ib_user_id is blank" if ib_user_id.blank?
user = Domain::Inkbunny::User.find_or_initialize_by(ib_user_id: ib_user_id)
@users_by_ib_user_id[ib_user_id] = user
user.name = submission_json["username"]
user.save! if user.changed?
user
end
end

View File

@@ -1,28 +0,0 @@
module Domain::Inkbunny::Job::JobHelper
def self.find_or_create_post_from_submission_json(submission_json)
ib_post_id = submission_json["submission_id"]&.to_i
raise "ib_post_id is blank" if ib_post_id.blank?
post =
Domain::Inkbunny::Post.includes(:creator).find_or_initialize_by(
ib_post_id: ib_post_id,
)
creator = find_or_create_user_from_submission_json(submission_json)
if post.creator && post.creator.ib_user_id != creator.ib_user_id
raise "post.creator.ib_user_id != creator.ib_user_id"
end
post.creator = creator
post.save! if post.changed?
post
end
def self.find_or_create_user_from_submission_json(submission_json)
ib_user_id = submission_json["user_id"]&.to_i
raise "ib_user_id is blank" if ib_user_id.blank?
user = Domain::Inkbunny::User.find_or_initialize_by(ib_user_id: ib_user_id)
user.name = submission_json["username"]
user.save! if user.changed?
user
end
end

View File

@@ -1,61 +1,72 @@
module Domain::Inkbunny::Job
class LatestPostsJob < Base
API_SEARCH_URL =
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
def perform(args)
caused_by_entry = args[:caused_by_entry]
@api_search_response =
http_client.post(API_SEARCH_URL, caused_by_entry: caused_by_entry)
processor = ApiSearchPageProcessor.new
first_log_entry = nil
rid = nil
page = 1
loop_count = 0
if @api_search_response.status_code != 200
fatal_error("api_search failed: #{@api_search_response.status_code}")
while true
loop_count += 1
if loop_count > ApiSearchPageProcessor::MAX_LOOP_COUNT
raise("loop_count: #{loop_count}")
end
url = ApiSearchPageProcessor.build_api_search_url(rid: rid, page: page)
response =
http_client.post(
url,
caused_by_entry: first_log_entry || caused_by_entry,
)
first_log_entry ||= response.log_entry
if response.status_code != 200
fatal_error("api_search failed: #{response.status_code}")
end
result = processor.process!(JSON.parse(response.body))
num_new_posts = result[:num_new_posts]
logger.info(
[
"[rid: #{rid}]",
"[page #{page}]",
"[new posts: #{num_new_posts}]",
"[total changed posts: #{result[:num_total_changed_posts]}]",
"[total changed users: #{result[:num_total_changed_users]}]",
"[total posts: #{result[:num_total_posts]}]",
"[total users: #{result[:num_total_users]}]",
].join(" "),
)
if num_new_posts < ApiSearchPageProcessor::SUBMISSIONS_PER_PAGE
logger.info("[no new posts, stopping]")
break
end
rid ||= result[:rid] || raise("no rid")
page += 1
end
api_search_json = JSON.parse(@api_search_response.body)
ib_submission_jsons = api_search_json["submissions"]
@need_deep_update_ib_post_ids = []
ib_submission_jsons.each do |submission_json|
shallow_update_post!(submission_json)
processor.all_users.each do |user|
if user.due_for_gallery_scan?
defer_job(
Domain::Inkbunny::Job::UserGalleryJob,
{ user: user, caused_by_entry: first_log_entry },
)
end
end
if @need_deep_update_ib_post_ids.any?
posts_to_update = processor.all_posts.reject(&:deep_updated_at)
if posts_to_update.any?
defer_job(
Domain::Inkbunny::Job::UpdatePostsJob,
{
ib_post_ids: @need_deep_update_ib_post_ids,
caused_by_entry: @api_search_response.log_entry,
ib_post_ids: posts_to_update.map(&:ib_post_id),
caused_by_entry: first_log_entry,
},
)
end
end
private
def shallow_update_post!(submission_json)
post =
Domain::Inkbunny::Job::JobHelper.find_or_create_post_from_submission_json(
submission_json,
)
post.shallow_updated_at = Time.now
post.title = submission_json["title"]
post.posted_at = Time.parse submission_json["create_datetime"]
post.last_file_updated_at =
Time.parse submission_json["last_file_update_datetime"]
post.num_files = submission_json["pagecount"]&.to_i
post.rating = submission_json["rating_id"]&.to_i
post.submission_type = submission_json["submission_type_id"]&.to_i
post.ib_detail_raw["submission_json"] = submission_json
if post.last_file_updated_at_changed? || post.num_files_changed? ||
post.files.count != post.num_files ||
post.creator.avatar_url_str.blank?
@need_deep_update_ib_post_ids << post.ib_post_id
end
post.save!
end
end
end

View File

@@ -2,23 +2,23 @@ module Domain::Inkbunny::Job
class UpdatePostsJob < Base
def perform(args)
@caused_by_entry = args[:caused_by_entry]
@ib_post_ids = args[:ib_post_ids]
ib_post_ids = args[:ib_post_ids]
if @ib_post_ids.empty?
if ib_post_ids.empty?
logger.info "empty ib_post_ids"
return
end
@ib_posts =
Domain::Inkbunny::Post
.where(ib_post_id: @ib_post_ids)
.includes(:files, :creator)
.index_by(&:ib_post_id)
ib_post_ids.each_slice(100) do |ib_post_ids_chunk|
process_ib_post_ids_chunk(ib_post_ids_chunk)
end
end
ids_list = @ib_posts.keys.join(",")
def process_ib_post_ids_chunk(ib_post_ids_chunk)
ib_post_ids_list = ib_post_ids_chunk.join(",")
url =
"https://inkbunny.net/api_submissions.php?" +
"submission_ids=#{ids_list}" +
"submission_ids=#{ib_post_ids_list}" +
"&show_description=yes&show_writing=yes&show_pools=yes"
@api_submissions_response =
http_client.get(url, caused_by_entry: @caused_by_entry)
@@ -33,13 +33,19 @@ module Domain::Inkbunny::Job
logger.info("api_submissions page has #{submissions.size} posts")
submissions.each do |submission_json|
Domain::Inkbunny::Post.transaction do
deep_update_post_from_submission_json(submission_json)
deep_update_post_from_submission_json(
submission_json,
caused_by_entry: @log_entry,
)
end
end
end
def deep_update_post_from_submission_json(submission_json)
post = JobHelper.find_or_create_post_from_submission_json(submission_json)
def deep_update_post_from_submission_json(submission_json, caused_by_entry:)
post =
Domain::Inkbunny::Post.find_by!(
ib_post_id: submission_json["submission_id"],
)
logger.info "deep update post #{post.ib_post_id.to_s.bold}"
post.deep_updated_at = Time.now
post.description = submission_json["description"]
@@ -58,7 +64,7 @@ module Domain::Inkbunny::Job
logger.info "avatar url changed, enqueuing download for user #{user.name}"
defer_job(
Domain::Inkbunny::Job::UserAvatarJob,
{ user: user, caused_by_entry: @log_entry },
{ user: user, caused_by_entry: caused_by_entry },
)
end
user.save!
@@ -96,7 +102,7 @@ module Domain::Inkbunny::Job
defer_job(
Domain::Inkbunny::Job::FileJob,
{ file: file, caused_by_entry: @log_entry },
{ file: file, caused_by_entry: caused_by_entry },
)
end
post.save!

View File

@@ -1,7 +1,6 @@
module Domain::Inkbunny::Job
class UserAvatarJob < Base
queue_as :static_file
ignore_signature_args :caused_by_entry
def perform(args)
@user = args[:user] || raise("user must exist")

View File

@@ -0,0 +1,80 @@
module Domain::Inkbunny::Job
class UserGalleryJob < Base
def perform(args)
user = args[:user] || raise("user must exist")
caused_by_entry = args[:caused_by_entry]
logger.prefix = "[#{user.name.bold} / #{user.ib_user_id.to_s.bold}]"
if user.scanned_gallery_at&.after?(1.week.ago)
logger.warn(
"gallery scanned #{time_ago_in_words(user.scanned_gallery_at)}, skipping",
)
return
end
processor = ApiSearchPageProcessor.new
first_log_entry = nil
rid = nil
page = 1
loop_count = 0
while true
loop_count += 1
if loop_count > ApiSearchPageProcessor::MAX_LOOP_COUNT
raise("loop_count: #{loop_count}")
end
url =
ApiSearchPageProcessor.build_api_search_url(
ib_user_id: user.ib_user_id,
rid: rid,
page: page,
)
response =
http_client.post(
url,
caused_by_entry: first_log_entry || caused_by_entry,
)
first_log_entry ||= response.log_entry
if response.status_code != 200
fatal_error("api_search failed: #{response.status_code}")
end
result = processor.process!(JSON.parse(response.body))
num_new_posts = result[:num_new_posts]
logger.info(
[
"[rid: #{rid}]",
"[page: #{page}]",
"[new posts: #{num_new_posts}]",
"[total new posts: #{result[:num_total_new_posts]}]",
"[total changed posts: #{result[:num_total_changed_posts]}]",
"[total posts: #{result[:num_total_posts]}]",
].join(" "),
)
if user.scanned_gallery_at.present? && num_new_posts == 0
logger.info("[no new posts, stopping]")
break
end
rid = result[:rid] || raise("no rid")
break if result[:num_pages] == page
page += 1
end
logger.info("[total new posts: #{result[:num_total_new_posts]}]")
user.scanned_gallery_at = Time.current
user.save!
if processor.changed_posts.any?
defer_job(
Domain::Inkbunny::Job::UpdatePostsJob,
{
ib_post_ids: processor.changed_posts.map(&:ib_post_id),
caused_by_entry: first_log_entry,
},
)
end
end
end
end

View File

@@ -1,6 +1,5 @@
class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
queue_as :static_file
ignore_signature_args :caused_by_entry
def perform(args)
logger.level = :warn

View File

@@ -1,4 +1,6 @@
class Scraper::JobBase < ApplicationJob
ignore_signature_args :caused_by_entry
class JobError < RuntimeError
end
@@ -27,7 +29,14 @@ class Scraper::JobBase < ApplicationJob
first_argument = arguments || []
first_argument = first_argument[0] || {}
ignore_signature_args = self.class.ignore_signature_args
# collect all ignore_signature_args from all superclasses
ignore_signature_args = []
target = self.class
while target.respond_to?(:ignore_signature_args)
ignore_signature_args += target.ignore_signature_args
target = target.superclass
end
ignore_signature_args << :_aj_symbol_keys
ignore_signature_args << :_aj_ruby2_keywords
sig_arguments =

View File

@@ -20,12 +20,17 @@ class Domain::Inkbunny::User < ReduxApplicationRecord
validates_presence_of :ib_user_id, :name
enum :state, %i[ok error]
enum :avatar_state, %i[ok not_found error], prefix: :avatar
after_initialize do
self.state ||= :ok
self.state_detail ||= {}
self.avatar_state_detail ||= {}
end
def due_for_gallery_scan?
scanned_gallery_at.blank? || scanned_gallery_at < 1.month.ago
end
def to_param
name
end

View File

@@ -0,0 +1,5 @@
class AddScannedGalleryAtToInkbunnyUsers < ActiveRecord::Migration[7.2]
def change
add_column :domain_inkbunny_users, :scanned_gallery_at, :datetime
end
end

3
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.2].define(version: 2024_12_30_220636) do
ActiveRecord::Schema[7.2].define(version: 2024_12_31_061234) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_prewarm"
enable_extension "pg_stat_statements"
@@ -1544,6 +1544,7 @@ ActiveRecord::Schema[7.2].define(version: 2024_12_30_220636) do
t.datetime "avatar_downloaded_at"
t.integer "avatar_state"
t.jsonb "avatar_state_detail", default: {}, null: false
t.datetime "scanned_gallery_at"
t.index ["ib_user_id"], name: "index_domain_inkbunny_users_on_ib_user_id", unique: true
end

View File

@@ -6,20 +6,20 @@ describe Domain::E621::Job::PostsIndexJob do
it "works" do
file = create(:http_log_entry)
mock_log_entries =
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
uri: "https://e621.net/posts.json",
status_code: 200,
content_type: "application/json; charset=utf-8",
contents:
SpecUtil.read_fixture_file("domain/e621/job/posts_index_1.json"),
caused_by_entry: file,
},
],
)
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
uri: "https://e621.net/posts.json",
status_code: 200,
content_type: "application/json; charset=utf-8",
contents:
SpecUtil.read_fixture_file("domain/e621/job/posts_index_1.json"),
caused_by_entry: file,
},
],
)
described_class.perform_now({ caused_by_entry: file })

View File

@@ -0,0 +1,147 @@
require "rails_helper"
describe Domain::Inkbunny::Job::LatestPostsJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:api_search_url) do
"https://inkbunny.net/api_search.php?submissions_per_page=100&page=1&get_rid=yes&orderby=create_datetime&keywords=no"
end
context "the files do not change in the response" do
let! :log_entries do
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file("domain/inkbunny/job/api_search.json"),
},
],
)
end
it "creates posts" do
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count)
.by(3)
.and(change(Domain::Inkbunny::User, :count).by(3))
.and(change(Domain::Inkbunny::File, :count).by(0)),
)
user_thendyart = Domain::Inkbunny::User.find_by!(ib_user_id: 941_565)
expect(user_thendyart.name).to eq("ThendyArt")
user_seff = Domain::Inkbunny::User.find_by!(ib_user_id: 229_331)
expect(user_seff.name).to eq("Seff")
expect(user_seff.avatar_url_str).to be_nil
post_3104202 = Domain::Inkbunny::Post.find_by!(ib_post_id: 3_104_202)
expect(post_3104202.title).to eq("Phantom Touch - Page 25")
expect(post_3104202.posted_at).to eq(
Time.parse("2023-08-27 21:31:40.365597+02"),
)
expect(post_3104202.creator).to eq(user_thendyart)
expect(post_3104202.last_file_updated_at).to eq(
Time.parse("2023-08-27 21:30:06.222262+02"),
)
expect(post_3104202.num_files).to eq(1)
expect(post_3104202.rating).to eq("adult")
expect(post_3104202.submission_type).to eq("comic")
expect(post_3104202.shallow_updated_at).to be_within(1.second).of(
Time.now,
)
expect(post_3104202.deep_updated_at).to be_nil
user_soulcentinel = Domain::Inkbunny::User.find_by!(ib_user_id: 349_747)
expect(user_soulcentinel.scanned_gallery_at).to be_nil
expect(
SpecUtil.enqueued_job_args(Domain::Inkbunny::Job::UpdatePostsJob),
).to match(
[
{
ib_post_ids: [3_104_202, 3_104_200, 3_104_197],
caused_by_entry: log_entries[0],
},
],
)
expect(
SpecUtil.enqueued_job_args(Domain::Inkbunny::Job::UserGalleryJob),
).to match(
[
{ user: user_thendyart, caused_by_entry: log_entries[0] },
{ user: user_seff, caused_by_entry: log_entries[0] },
{ user: user_soulcentinel, caused_by_entry: log_entries[0] },
],
)
end
end
context "an existing file changed in the response" do
let! :log_entries do
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_1047334_before.json",
),
},
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_1047334_after.json",
),
},
],
)
end
it "updates posts and files" do
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count).by(1).and(
change(Domain::Inkbunny::User, :count).by(1),
),
)
post_1047334 = Domain::Inkbunny::Post.find_by!(ib_post_id: 1_047_334)
expect(post_1047334.title).to eq("Yellow Snake")
expect(post_1047334.last_file_updated_at).to eq(
Time.parse("2016-03-13 22:18:52.32319+01"),
)
expect(
SpecUtil.enqueued_job_args(Domain::Inkbunny::Job::UpdatePostsJob),
).to match([including(ib_post_ids: [1_047_334])])
SpecUtil.clear_enqueued_jobs!(Domain::Inkbunny::Job::UpdatePostsJob)
# second perform should update the post
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count)
.by(0)
.and(change(Domain::Inkbunny::User, :count).by(0))
.and(change(Domain::Inkbunny::File, :count).by(0)),
)
post_1047334.reload
expect(post_1047334.title).to eq("How to Photograph Snakes")
expect(post_1047334.last_file_updated_at).to eq(
Time.parse("2023-09-14 19:07:45.735562+02"),
)
expect(
SpecUtil.enqueued_job_args(Domain::Inkbunny::Job::UpdatePostsJob),
).to match([including(ib_post_ids: [1_047_334])])
end
end
end

View File

@@ -0,0 +1,72 @@
require "rails_helper"
RSpec.describe Domain::Inkbunny::Job::UserGalleryJob do
let!(:user) do
create(:domain_inkbunny_user, name: "the_user", ib_user_id: 26_540)
end
let(:args) { { user: user, caused_by_entry: nil } }
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
describe "#perform" do
it "skips if user already scanned recently" do
at = 1.hour.ago
user.update!(scanned_gallery_at: at)
perform_now(args)
expect(user.reload.scanned_gallery_at).to be_within(1.second).of(at)
end
context "when fetching posts" do
before do
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
uri:
"https://inkbunny.net/api_search.php?submissions_per_page=100&page=1&get_rid=yes&orderby=create_datetime&user_id=26540&keywords=no",
status_code: 200,
method: :post,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_zaush_gallery_page_1.json",
),
caused_by_entry: nil,
},
{
uri:
"https://inkbunny.net/api_search.php?submissions_per_page=100&page=2&rid=8abcdfca27",
status_code: 200,
method: :post,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_zaush_gallery_page_2.json",
),
caused_by_entry_idx: 0,
},
],
)
end
it "fetches all pages and enqueues update jobs" do
expect { perform_now(args) }.to(
change(Domain::Inkbunny::Post, :count)
.by(4)
.and(change(Domain::Inkbunny::File, :count).by(0))
.and(change(Domain::Inkbunny::User, :count).by(0)),
)
user.reload
expect(user.name).to eq("Zaush")
expect(user.scanned_gallery_at).to be_present
expect(user.posts.count).to eq(4)
post_3507105 = user.posts.find_by(ib_post_id: 350_7105)
expect(post_3507105).to be_present
expect(post_3507105.num_files).to eq(5)
expect(post_3507105.files.count).to eq(0)
end
end
end
end

View File

@@ -1,226 +0,0 @@
require "rails_helper"
describe Domain::Inkbunny::Job::LatestPostsJob do
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
let(:api_search_url) do
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
end
let(:api_submissions_url) do
"https://inkbunny.net/api_submissions.php?submission_ids=3104202,3104200,3104197&show_description=yes&show_writing=yes&show_pools=yes"
end
let(:api_submissions_1047334_url) do
"https://inkbunny.net/api_submissions.php?submission_ids=1047334&show_description=yes&show_writing=yes&show_pools=yes"
end
context "the files do not change in the response" do
let! :log_entries do
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file("domain/inkbunny/job/api_search.json"),
},
# {
# method: :get,
# uri: api_submissions_url,
# content_type: "application/json",
# contents:
# SpecUtil.read_fixture_file(
# "domain/inkbunny/job/api_submissions.json",
# ),
# caused_by_entry_idx: 0,
# },
# # same as the first, should not update or touch any posts
# {
# method: :post,
# uri: api_search_url,
# content_type: "application/json",
# contents:
# SpecUtil.read_fixture_file("domain/inkbunny/job/api_search.json"),
# },
],
)
end
it "creates posts" do
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count).by(3).and(
change(Domain::Inkbunny::User, :count).by(3),
),
)
user_thendyart = Domain::Inkbunny::User.find_by!(ib_user_id: 941_565)
expect(user_thendyart.name).to eq("ThendyArt")
user_seff = Domain::Inkbunny::User.find_by!(ib_user_id: 229_331)
expect(user_seff.name).to eq("Seff")
expect(user_seff.avatar_url_str).to be_nil
# this gets populated in the update_posts job
# expect(user_seff.avatar_url_str).to eq(
# "https://us.ib.metapix.net/usericons/large/176/176443_Seff_seffpfp.png",
# )
post_3104202 = Domain::Inkbunny::Post.find_by!(ib_post_id: 3_104_202)
expect(post_3104202.title).to eq("Phantom Touch - Page 25")
expect(post_3104202.posted_at).to eq(
Time.parse("2023-08-27 21:31:40.365597+02"),
)
expect(post_3104202.creator).to eq(user_thendyart)
expect(post_3104202.last_file_updated_at).to eq(
Time.parse("2023-08-27 21:30:06.222262+02"),
)
expect(post_3104202.num_files).to eq(1)
expect(post_3104202.rating).to eq("adult")
expect(post_3104202.submission_type).to eq("comic")
expect(post_3104202.shallow_updated_at).to be_within(1.second).of(
Time.now,
)
expect(post_3104202.deep_updated_at).to be_nil
update_post_jobs =
SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::UpdatePostsJob)
expect(update_post_jobs.size).to eq(1)
expect(update_post_jobs[0][:args][0][:ib_post_ids]).to eq(
[3_104_202, 3_104_200, 3_104_197],
)
expect(update_post_jobs[0][:args][0][:caused_by_entry]).to eq(
log_entries[0],
)
# expect(post_3104202.files.count).to eq(1)
# file_4652537 = post_3104202.files.first
# expect(file_4652537.ib_file_id).to eq(4_652_537)
# expect(file_4652537.file_order).to eq(0)
# expect(file_4652537.md5_initial).to eq("fbeb553c483a346108beeada93d90086")
# expect(file_4652537.md5_full).to eq("15eea2648c8afaee1fef970befb28b24")
# expect(file_4652537.url_str).to eq(
# "https://us.ib.metapix.net/files/full/4652/4652537_ThendyArt_pt_pg_25.jpg",
# )
# post_3104200 = Domain::Inkbunny::Post.find_by!(ib_post_id: 3_104_200)
# expect(post_3104200.creator).to eq(user_seff)
# expect(post_3104200.title).to eq("Camp Pines Sketch Dump (Aug 2023)")
# expect(post_3104200.description).to match(/Not sure how canon/)
# expect(post_3104200.num_files).to eq(4)
# # should enqueue file download jobs as all are new
# file_jobs = SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::FileJob)
# expect(file_jobs.length).to eq(6)
# expect(
# file_jobs.map { |job| job[:args][0][:file].ib_file_id }.sort,
# ).to eq(
# [4_652_528, 4_652_530, 4_652_531, 4_652_534, 4_652_535, 4_652_537],
# )
# file_jobs.each do |job|
# expect(job[:args][0][:caused_by_entry]).to eq(log_entries[1])
# end
# # perform another scan, nothing should change
# expect { perform_now({}) }.to(
# change(Domain::Inkbunny::Post, :count)
# .by(0)
# .and(change(Domain::Inkbunny::File, :count).by(0))
# .and(change(Domain::Inkbunny::User, :count).by(0)),
# )
# expect(
# SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::FileJob).length,
# ).to eq(file_jobs.length)
end
end
context "an existing file changed in the response" do
let! :log_entries do
SpecUtil.init_http_client_mock(
http_client_mock,
[
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_1047334_before.json",
),
},
# {
# method: :get,
# uri: api_submissions_1047334_url,
# content_type: "application/json",
# contents:
# SpecUtil.read_fixture_file(
# "domain/inkbunny/job/api_submissions_1047334_before.json",
# ),
# caused_by_entry_idx: 0,
# },
{
method: :post,
uri: api_search_url,
content_type: "application/json",
contents:
SpecUtil.read_fixture_file(
"domain/inkbunny/job/api_search_1047334_after.json",
),
},
# {
# method: :get,
# uri: api_submissions_1047334_url,
# content_type: "application/json",
# contents:
# SpecUtil.read_fixture_file(
# "domain/inkbunny/job/api_submissions_1047334_after.json",
# ),
# caused_by_entry_idx: 1,
# },
],
)
end
it "updates posts and files" do
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count).by(1).and(
change(Domain::Inkbunny::User, :count).by(1),
),
)
post_1047334 = Domain::Inkbunny::Post.find_by!(ib_post_id: 1_047_334)
expect(post_1047334.title).to eq("New Submission")
# file_1445274 = post_1047334.files.find_by!(ib_file_id: 1_445_274)
# expect(file_1445274.md5_initial).to eq("0127e88651e73140718f3b8f7f2037d5")
# expect(file_1445274.md5_full).to eq("aa0e22f86a9c345ead2bd711a1c91986")
# file_jobs = SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::FileJob)
# expect(file_jobs.size).to eq(1)
# SpecUtil.clear_enqueued_jobs!(Domain::Inkbunny::Job::FileJob)
update_post_jobs =
SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::UpdatePostsJob)
expect(update_post_jobs.size).to eq(1)
expect(update_post_jobs[0][:args][0][:ib_post_ids]).to eq([1_047_334])
SpecUtil.clear_enqueued_jobs!(Domain::Inkbunny::Job::UpdatePostsJob)
# second perform should create the new file
expect { perform_now({}) }.to(
change(Domain::Inkbunny::Post, :count).by(0).and(
change(Domain::Inkbunny::User, :count).by(0),
),
)
post_1047334.reload
# expect(post_1047334.files.count).to eq(2)
# file_4680214 = post_1047334.files.find_by!(ib_file_id: 4_680_214)
# expect(file_4680214.ib_file_id).to eq(4_680_214)
# expect(file_4680214.md5_initial).to eq("9fbfbdf3cc6d8b3538b7edbfe36bde8c")
# expect(file_4680214.md5_full).to eq("d2e30d953f4785e22c3d9c722249a974")
update_post_jobs =
SpecUtil.enqueued_jobs(Domain::Inkbunny::Job::UpdatePostsJob)
expect(update_post_jobs.size).to eq(1)
expect(update_post_jobs[0][:args][0][:ib_post_ids]).to eq([1_047_334])
end
end
end

View File

@@ -26,7 +26,7 @@
"thumb_huge_noncustom_x": "300",
"thumb_huge_noncustom_y": "225",
"file_name": "4680214_zzreg_how-to-photograph-snakes-15-1200x900-cropped.jpg",
"title": "New Submission",
"title": "How to Photograph Snakes",
"deleted": "f",
"public": "f",
"mimetype": "image/jpeg",

View File

@@ -26,7 +26,7 @@
"thumb_huge_noncustom_x": "300",
"thumb_huge_noncustom_y": "225",
"file_name": "1445274_zzreg_sname-yellow-small-border.png",
"title": "New Submission",
"title": "Yellow Snake",
"deleted": "f",
"public": "f",
"mimetype": "image/jpeg",

View File

@@ -0,0 +1,182 @@
{
"sid": "9B5BPQqTZ,Mj-nYbL2tuIPoO,b",
"results_count_all": "316",
"results_count_thispage": 2,
"pages_count": 2,
"page": 2,
"user_location": "",
"rid": "8abcdfca27",
"rid_ttl": "15 minutes",
"search_params": [
{
"param_name": "field_join_type",
"param_value": "or"
},
{
"param_name": "text",
"param_value": ""
},
{
"param_name": "string_join_type",
"param_value": "and"
},
{
"param_name": "keywords",
"param_value": "yes"
},
{
"param_name": "keyword_id",
"param_value": false
},
{
"param_name": "title",
"param_value": "no"
},
{
"param_name": "description",
"param_value": "no"
},
{
"param_name": "md5",
"param_value": "no"
},
{
"param_name": "username",
"param_value": "zaush"
},
{
"param_name": "user_id",
"param_value": ""
},
{
"param_name": "favs_user_id",
"param_value": ""
},
{
"param_name": "unread_submissions",
"param_value": "no"
},
{
"param_name": "type",
"param_value": ""
},
{
"param_name": "pool_id",
"param_value": ""
},
{
"param_name": "orderby",
"param_value": "create_datetime"
},
{
"param_name": "dayslimit",
"param_value": ""
},
{
"param_name": "random",
"param_value": "no"
},
{
"param_name": "scraps",
"param_value": "both"
},
{
"param_name": "count_limit",
"param_value": 18000
}
],
"submissions": [
{
"submission_id": "3507105",
"hidden": "f",
"username": "Zaush",
"user_id": "26540",
"create_datetime": "2024-12-24 10:49:53.744874+00",
"create_datetime_usertime": "24 Dec 2024 11:49 CET",
"last_file_update_datetime": "2024-12-24 10:48:04.614708+00",
"last_file_update_datetime_usertime": "24 Dec 2024 11:48 CET",
"thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357477_Zaush_dad-of-the-year-1-awa.jpg",
"thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357477_Zaush_dad-of-the-year-1-awa_noncustom.jpg",
"thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357477_Zaush_dad-of-the-year-1-awa_noncustom.jpg",
"thumb_medium_noncustom_x": "88",
"thumb_medium_noncustom_y": "120",
"thumb_large_noncustom_x": "146",
"thumb_large_noncustom_y": "200",
"thumb_huge_noncustom_x": "219",
"thumb_huge_noncustom_y": "300",
"file_name": "5357477_Zaush_dad-of-the-year-1-awa.jpg",
"title": "Dad of the Year",
"deleted": "f",
"public": "t",
"mimetype": "image/jpeg",
"pagecount": "5",
"rating_id": "2",
"rating_name": "Adult",
"file_url_full": "https://tx.ib.metapix.net/files/full/5357/5357477_Zaush_dad-of-the-year-1-awa.jpg",
"file_url_screen": "https://tx.ib.metapix.net/files/screen/5357/5357477_Zaush_dad-of-the-year-1-awa.jpg",
"file_url_preview": "https://tx.ib.metapix.net/files/preview/5357/5357477_Zaush_dad-of-the-year-1-awa.jpg",
"submission_type_id": "1",
"type_name": "Picture/Pinup",
"friends_only": "f",
"guest_block": "f",
"scraps": "f",
"latest_file_name": "5357481_Zaush_dad-of-the-year-5-awa.jpg",
"latest_mimetype": "image/jpeg",
"latest_thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357481_Zaush_dad-of-the-year-5-awa.jpg",
"latest_thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357481_Zaush_dad-of-the-year-5-awa_noncustom.jpg",
"latest_thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357481_Zaush_dad-of-the-year-5-awa_noncustom.jpg",
"latest_thumb_medium_noncustom_x": "120",
"latest_thumb_medium_noncustom_y": "75",
"latest_thumb_large_noncustom_x": "200",
"latest_thumb_large_noncustom_y": "125",
"latest_thumb_huge_noncustom_x": "300",
"latest_thumb_huge_noncustom_y": "187"
},
{
"submission_id": "3507104",
"hidden": "f",
"username": "Zaush",
"user_id": "26540",
"create_datetime": "2024-12-24 10:47:18.829492+00",
"create_datetime_usertime": "24 Dec 2024 11:47 CET",
"last_file_update_datetime": "2024-12-24 10:45:05.386632+00",
"last_file_update_datetime_usertime": "24 Dec 2024 11:45 CET",
"thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357474_Zaush_tommy-fathers-day-1-awa.jpg",
"thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357474_Zaush_tommy-fathers-day-1-awa_noncustom.jpg",
"thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357474_Zaush_tommy-fathers-day-1-awa_noncustom.jpg",
"thumb_medium_noncustom_x": "120",
"thumb_medium_noncustom_y": "92",
"thumb_large_noncustom_x": "200",
"thumb_large_noncustom_y": "153",
"thumb_huge_noncustom_x": "300",
"thumb_huge_noncustom_y": "229",
"file_name": "5357474_Zaush_tommy-fathers-day-1-awa.jpg",
"title": "Father's Day: Part 1",
"deleted": "f",
"public": "t",
"mimetype": "image/jpeg",
"pagecount": "3",
"rating_id": "1",
"rating_name": "Mature",
"file_url_full": "https://tx.ib.metapix.net/files/full/5357/5357474_Zaush_tommy-fathers-day-1-awa.jpg",
"file_url_screen": "https://tx.ib.metapix.net/files/screen/5357/5357474_Zaush_tommy-fathers-day-1-awa.jpg",
"file_url_preview": "https://tx.ib.metapix.net/files/preview/5357/5357474_Zaush_tommy-fathers-day-1-awa.jpg",
"submission_type_id": "1",
"type_name": "Picture/Pinup",
"friends_only": "f",
"guest_block": "f",
"scraps": "f",
"latest_file_name": "5357476_Zaush_tommy-fathers-day-1-postcard-nude-awa.jpg",
"latest_mimetype": "image/jpeg",
"latest_thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357476_Zaush_tommy-fathers-day-1-postcard-nude-awa.jpg",
"latest_thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357476_Zaush_tommy-fathers-day-1-postcard-nude-awa_noncustom.jpg",
"latest_thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357476_Zaush_tommy-fathers-day-1-postcard-nude-awa_noncustom.jpg",
"latest_thumb_medium_noncustom_x": "120",
"latest_thumb_medium_noncustom_y": "86",
"latest_thumb_large_noncustom_x": "200",
"latest_thumb_large_noncustom_y": "143",
"latest_thumb_huge_noncustom_x": "300",
"latest_thumb_huge_noncustom_y": "214"
}
]
}

View File

@@ -0,0 +1,160 @@
{
"sid": "9B5BPQqTZ,Mj-nYbL2tuIPoO,b",
"results_count_all": "316",
"results_count_thispage": 2,
"pages_count": 2,
"page": 1,
"user_location": "",
"rid": "8abcdfca27",
"rid_ttl": "15 minutes",
"search_params": [
{
"param_name": "field_join_type",
"param_value": "or"
},
{
"param_name": "text",
"param_value": ""
},
{
"param_name": "string_join_type",
"param_value": "and"
},
{
"param_name": "keywords",
"param_value": "yes"
},
{
"param_name": "keyword_id",
"param_value": false
},
{
"param_name": "title",
"param_value": "no"
},
{
"param_name": "description",
"param_value": "no"
},
{
"param_name": "md5",
"param_value": "no"
},
{
"param_name": "username",
"param_value": "zaush"
},
{
"param_name": "user_id",
"param_value": ""
},
{
"param_name": "favs_user_id",
"param_value": ""
},
{
"param_name": "unread_submissions",
"param_value": "no"
},
{
"param_name": "type",
"param_value": ""
},
{
"param_name": "pool_id",
"param_value": ""
},
{
"param_name": "orderby",
"param_value": "create_datetime"
},
{
"param_name": "dayslimit",
"param_value": ""
},
{
"param_name": "random",
"param_value": "no"
},
{
"param_name": "scraps",
"param_value": "both"
},
{
"param_name": "count_limit",
"param_value": 18000
}
],
"submissions": [
{
"submission_id": "3507108",
"hidden": "f",
"username": "Zaush",
"user_id": "26540",
"create_datetime": "2024-12-24 10:53:22.946706+00",
"create_datetime_usertime": "24 Dec 2024 11:53 CET",
"last_file_update_datetime": "2024-12-24 10:51:50.901031+00",
"last_file_update_datetime_usertime": "24 Dec 2024 11:51 CET",
"thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357484_Zaush_tommy-fathers-day-2alt-awa.jpg",
"thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357484_Zaush_tommy-fathers-day-2alt-awa_noncustom.jpg",
"thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357484_Zaush_tommy-fathers-day-2alt-awa_noncustom.jpg",
"thumb_medium_noncustom_x": "120",
"thumb_medium_noncustom_y": "98",
"thumb_large_noncustom_x": "200",
"thumb_large_noncustom_y": "163",
"thumb_huge_noncustom_x": "300",
"thumb_huge_noncustom_y": "244",
"file_name": "5357484_Zaush_tommy-fathers-day-2alt-awa.jpg",
"title": "Father's Day: Part 2",
"deleted": "f",
"public": "t",
"mimetype": "image/jpeg",
"pagecount": "1",
"rating_id": "2",
"rating_name": "Adult",
"file_url_full": "https://tx.ib.metapix.net/files/full/5357/5357484_Zaush_tommy-fathers-day-2alt-awa.jpg",
"file_url_screen": "https://tx.ib.metapix.net/files/screen/5357/5357484_Zaush_tommy-fathers-day-2alt-awa.jpg",
"file_url_preview": "https://tx.ib.metapix.net/files/preview/5357/5357484_Zaush_tommy-fathers-day-2alt-awa.jpg",
"submission_type_id": "1",
"type_name": "Picture/Pinup",
"friends_only": "f",
"guest_block": "f",
"scraps": "f"
},
{
"submission_id": "3507107",
"hidden": "f",
"username": "Zaush",
"user_id": "26540",
"create_datetime": "2024-12-24 10:51:33.642069+00",
"create_datetime_usertime": "24 Dec 2024 11:51 CET",
"last_file_update_datetime": "2024-12-24 10:50:12.682677+00",
"last_file_update_datetime_usertime": "24 Dec 2024 11:50 CET",
"thumbnail_url_huge_noncustom": "https://tx.ib.metapix.net/files/preview/5357/5357483_Zaush_zhen-captured-awa.jpg",
"thumbnail_url_large_noncustom": "https://tx.ib.metapix.net/thumbnails/large/5357/5357483_Zaush_zhen-captured-awa_noncustom.jpg",
"thumbnail_url_medium_noncustom": "https://tx.ib.metapix.net/thumbnails/medium/5357/5357483_Zaush_zhen-captured-awa_noncustom.jpg",
"thumb_medium_noncustom_x": "115",
"thumb_medium_noncustom_y": "120",
"thumb_large_noncustom_x": "191",
"thumb_large_noncustom_y": "200",
"thumb_huge_noncustom_x": "287",
"thumb_huge_noncustom_y": "300",
"file_name": "5357483_Zaush_zhen-captured-awa.jpg",
"title": "Zhen Captured",
"deleted": "f",
"public": "t",
"mimetype": "image/jpeg",
"pagecount": "1",
"rating_id": "2",
"rating_name": "Adult",
"file_url_full": "https://tx.ib.metapix.net/files/full/5357/5357483_Zaush_zhen-captured-awa.jpg",
"file_url_screen": "https://tx.ib.metapix.net/files/screen/5357/5357483_Zaush_zhen-captured-awa.jpg",
"file_url_preview": "https://tx.ib.metapix.net/files/preview/5357/5357483_Zaush_zhen-captured-awa.jpg",
"submission_type_id": "1",
"type_name": "Picture/Pinup",
"friends_only": "f",
"guest_block": "f",
"scraps": "f"
}
]
}