275 lines
7.8 KiB
Ruby
275 lines
7.8 KiB
Ruby
# typed: strict
|
|
class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|
self.default_priority = -20
|
|
|
|
Folder = Domain::Sofurry::GalleryPageParser::Folder
|
|
ShortGalleryEntry = Domain::Sofurry::GalleryPageParser::ShortGalleryEntry
|
|
|
|
class GallerySummary < T::Struct
|
|
const :folders, T::Set[Folder]
|
|
const :posts, T::Set[ShortGalleryEntry]
|
|
end
|
|
|
|
class GalleryEntry < T::Struct
|
|
include T::Struct::ActsAsComparable
|
|
const :sofurry_id, Integer
|
|
const :title, String
|
|
const :content_level, Integer
|
|
const :media_type, Domain::Sofurry::GalleryPageParser::MediaType
|
|
const :tags, T::Array[String]
|
|
const :description, T.nilable(String)
|
|
const :posted_at, Time
|
|
const :gallery_log_entry, T.nilable(HttpLogEntry)
|
|
end
|
|
|
|
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
|
|
def initialize(*args)
|
|
super
|
|
@visited_urls = T.let(Set.new, T::Set[String])
|
|
end
|
|
|
|
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
|
def perform(args)
|
|
user = user_from_args!
|
|
logger.push_tags(make_arg_tag(user))
|
|
logger.info(
|
|
format_tags(make_arg_tag(user.gallery_scan, name: "gallery_scan")),
|
|
)
|
|
scan_gallery(user) if force_scan? || user.gallery_scan.due?
|
|
ensure
|
|
user.save! if user
|
|
end
|
|
|
|
private
|
|
|
|
sig { params(user: Domain::User::SofurryUser).returns(T.untyped) }
|
|
def scan_gallery(user)
|
|
user_id = T.must(user.sofurry_id)
|
|
posts = T.let(Set.new, T::Set[GalleryEntry])
|
|
media_types = Domain::Sofurry::GalleryPageParser::MediaType.values
|
|
|
|
media_types_with_posts =
|
|
T.let(Set.new, T::Set[Domain::Sofurry::GalleryPageParser::MediaType])
|
|
|
|
existing_folders =
|
|
user
|
|
.folders
|
|
.filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] }
|
|
.to_h
|
|
|
|
media_types.each do |media_type|
|
|
gallery_summary = gather_gallery_summary(user, media_type)
|
|
folders = gallery_summary.folders
|
|
gallery_posts = gallery_summary.posts
|
|
|
|
folders.each do |folder|
|
|
if (existing = existing_folders[folder.sofurry_id])
|
|
existing.name = folder.name
|
|
else
|
|
folder_model =
|
|
user.folders.build(
|
|
{ sofurry_id: folder.sofurry_id, name: folder.name },
|
|
)
|
|
existing_folders[folder.sofurry_id] = folder_model
|
|
end
|
|
|
|
if gallery_posts.any?
|
|
media_types_with_posts << media_type
|
|
else
|
|
logger.info(
|
|
format_tags(make_tag("skip media type", media_type.serialize)),
|
|
)
|
|
end
|
|
end
|
|
end
|
|
|
|
existing_posts =
|
|
user
|
|
.posts
|
|
.filter_map { |post| (sfid = post.sofurry_id) && [sfid, post] }
|
|
.to_h
|
|
|
|
posts_by_folder =
|
|
T.let(
|
|
Hash.new,
|
|
T::Hash[Domain::PostGroup::SofurryFolder, T::Set[GalleryEntry]],
|
|
)
|
|
|
|
user.folders.each do |folder|
|
|
if folder.folder_scan.due?
|
|
folder_posts =
|
|
scan_gallery_posts(
|
|
user_id,
|
|
existing_posts.keys.to_set,
|
|
folder_id: folder.sofurry_id,
|
|
)
|
|
posts_by_folder[folder] = folder_posts
|
|
posts.merge(folder_posts)
|
|
folder.scanned_folder_at = Time.now
|
|
end
|
|
end
|
|
|
|
logger.info(
|
|
format_tags(
|
|
make_tag(
|
|
"media types with posts",
|
|
media_types_with_posts.map(&:serialize),
|
|
),
|
|
),
|
|
)
|
|
media_types_with_posts.each do |media_type|
|
|
posts.merge(
|
|
scan_gallery_posts(user_id, media_type, existing_posts.keys.to_set),
|
|
)
|
|
end
|
|
|
|
posts.each do |post|
|
|
post_model =
|
|
existing_posts[post.sofurry_id] ||
|
|
user.posts.build({ sofurry_id: post.sofurry_id })
|
|
|
|
post_model.title = post.title
|
|
post_model.tags_array = post.tags
|
|
post_model.content_level = post.content_level
|
|
post_model.description = post.description
|
|
post_model.posted_at = post.posted_at.in_time_zone("UTC")
|
|
post_model.media_type = post.media_type.serialize
|
|
post_model.last_gallery_log_entry = post.gallery_log_entry
|
|
end
|
|
|
|
ReduxApplicationRecord.transaction do
|
|
user.scanned_gallery_at = Time.now
|
|
user.save!
|
|
|
|
posts_by_sofurry_id =
|
|
user.posts.map { |post| [post.sofurry_id, post] }.to_h
|
|
sofurry_post_ids_by_pool_id =
|
|
posts_by_folder.flat_map do |folder, posts|
|
|
posts.map do |post|
|
|
{
|
|
type: "Domain::PostGroupJoin::SofurryFolderJoin",
|
|
group_id: T.must(folder.id),
|
|
post_id: T.must(posts_by_sofurry_id[post.sofurry_id]&.id),
|
|
}
|
|
end
|
|
end
|
|
|
|
Domain::PostGroupJoin::SofurryFolderJoin.upsert_all(
|
|
sofurry_post_ids_by_pool_id,
|
|
unique_by: %i[group_id post_id],
|
|
)
|
|
end
|
|
|
|
bulk_enqueue_jobs do
|
|
user.posts.each do |post|
|
|
if post.state_pending?
|
|
defer_job(Domain::Sofurry::Job::ScanPostJob, { post: })
|
|
end
|
|
end
|
|
|
|
if user.page_scan.due?
|
|
defer_job(Domain::Sofurry::Job::ScanUserJob, { user: })
|
|
end
|
|
end
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
user: Domain::User::SofurryUser,
|
|
media_type: Domain::Sofurry::GalleryPageParser::MediaType,
|
|
).returns(GallerySummary)
|
|
end
|
|
def gather_gallery_summary(user, media_type)
|
|
user_sofurry_id = T.must(user.sofurry_id)
|
|
response =
|
|
http_client.get(
|
|
"https://www.sofurry.com/browse/user/#{media_type.serialize}" +
|
|
"?uid=#{user_sofurry_id}&#{media_type.serialize}-display=45",
|
|
)
|
|
|
|
if response.status_code != 200
|
|
fatal_error("failed to get gallery: #{response.status_code}")
|
|
end
|
|
parser =
|
|
Domain::Sofurry::GalleryPageParser.new(
|
|
response.body,
|
|
user_id: user_sofurry_id,
|
|
)
|
|
user.username ||= parser.username
|
|
|
|
GallerySummary.new(
|
|
folders: parser.folders.to_set,
|
|
posts: parser.posts.to_set,
|
|
)
|
|
end
|
|
|
|
sig do
|
|
params(
|
|
user_id: Integer,
|
|
media_type: Domain::Sofurry::GalleryPageParser::MediaType,
|
|
existing_post_ids: T::Set[Integer],
|
|
folder_id: T.nilable(Integer),
|
|
).returns(T::Set[GalleryEntry])
|
|
end
|
|
def scan_gallery_posts(user_id, media_type, existing_post_ids, folder_id: nil)
|
|
posts = T.let(Set.new, T::Set[GalleryEntry])
|
|
page = 1
|
|
seen_post_ids = T.let(Set.new, T::Set[Integer])
|
|
|
|
loop do
|
|
url =
|
|
if folder_id
|
|
"https://api2.sofurry.com/browse/folder/#{media_type.serialize}" +
|
|
"?by=#{user_id}&folder=#{folder_id}&#{media_type.serialize}-page=#{page}&format=json"
|
|
else
|
|
"https://api2.sofurry.com/browse/user/#{media_type.serialize}" +
|
|
"?uid=#{user_id}&#{media_type.serialize}-page=#{page}&format=json"
|
|
end
|
|
|
|
break if @visited_urls.include?(url)
|
|
@visited_urls << url
|
|
|
|
response = http_client.get(url)
|
|
|
|
if response.status_code != 200
|
|
fatal_error("failed to get gallery: #{response.status_code}")
|
|
end
|
|
|
|
posts_json =
|
|
T.cast(
|
|
JSON.parse(response.body)["items"],
|
|
T::Array[T::Hash[String, T.untyped]],
|
|
)
|
|
|
|
post_ids =
|
|
posts_json
|
|
.map { |post_json| T.cast(post_json["id"].to_i, Integer) }
|
|
.compact
|
|
.to_set
|
|
|
|
break if post_ids.subset?(seen_post_ids)
|
|
seen_post_ids.merge(post_ids)
|
|
|
|
posts.merge(
|
|
posts_json.map do |post_json|
|
|
GalleryEntry.new(
|
|
sofurry_id: post_json["id"]&.to_i,
|
|
title: post_json["title"],
|
|
tags: post_json["tags"].split(", ").map(&:strip),
|
|
description: post_json["description"],
|
|
content_level: post_json["contentLevel"].to_i,
|
|
posted_at: Time.at(post_json["postTime"].to_i),
|
|
media_type: media_type,
|
|
gallery_log_entry: response.log_entry,
|
|
)
|
|
end,
|
|
)
|
|
break if post_ids.subset?(existing_post_ids)
|
|
break if posts_json.length < 30
|
|
page += 1
|
|
end
|
|
|
|
posts
|
|
end
|
|
end
|