# typed: strict class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base self.default_priority = -20 Folder = Domain::Sofurry::GalleryPageParser::Folder ShortGalleryEntry = Domain::Sofurry::GalleryPageParser::ShortGalleryEntry class GallerySummary < T::Struct const :folders, T::Set[Folder] const :posts, T::Set[ShortGalleryEntry] end class GalleryEntry < T::Struct include T::Struct::ActsAsComparable const :sofurry_id, Integer const :title, String const :content_level, Integer const :media_type, Domain::Sofurry::GalleryPageParser::MediaType const :tags, T::Array[String] const :description, T.nilable(String) const :posted_at, Time const :gallery_log_entry, T.nilable(HttpLogEntry) end sig { override.params(args: T::Hash[Symbol, T.untyped]).void } def initialize(*args) super @visited_urls = T.let(Set.new, T::Set[String]) end sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) } def perform(args) user = user_from_args! logger.push_tags(make_arg_tag(user)) logger.info( format_tags(make_arg_tag(user.gallery_scan, name: "gallery_scan")), ) scan_gallery(user) if force_scan? || user.gallery_scan.due? ensure user.save! if user end private sig { params(user: Domain::User::SofurryUser).returns(T.untyped) } def scan_gallery(user) user_id = T.must(user.sofurry_id) posts = T.let(Set.new, T::Set[GalleryEntry]) media_types = Domain::Sofurry::GalleryPageParser::MediaType.values media_types_with_posts = T.let(Set.new, T::Set[Domain::Sofurry::GalleryPageParser::MediaType]) existing_folders = user .folders .filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] } .to_h media_types.each do |media_type| gallery_summary = gather_gallery_summary(user, media_type) folders = gallery_summary.folders gallery_posts = gallery_summary.posts folders.each do |folder| if (existing = existing_folders[folder.sofurry_id]) existing.name = folder.name else folder_model = user.folders.build( { sofurry_id: folder.sofurry_id, name: folder.name }, ) existing_folders[folder.sofurry_id] = folder_model end if gallery_posts.any? media_types_with_posts << media_type else logger.info( format_tags(make_tag("skip media type", media_type.serialize)), ) end end end existing_posts = user .posts .filter_map { |post| (sfid = post.sofurry_id) && [sfid, post] } .to_h posts_by_folder = T.let( Hash.new, T::Hash[Domain::PostGroup::SofurryFolder, T::Set[GalleryEntry]], ) user.folders.each do |folder| if folder.folder_scan.due? folder_posts = scan_gallery_posts( user_id, existing_posts.keys.to_set, folder_id: folder.sofurry_id, ) posts_by_folder[folder] = folder_posts posts.merge(folder_posts) folder.scanned_folder_at = Time.now end end logger.info( format_tags( make_tag( "media types with posts", media_types_with_posts.map(&:serialize), ), ), ) media_types_with_posts.each do |media_type| posts.merge( scan_gallery_posts(user_id, media_type, existing_posts.keys.to_set), ) end posts.each do |post| post_model = existing_posts[post.sofurry_id] || user.posts.build({ sofurry_id: post.sofurry_id }) post_model.title = post.title post_model.tags_array = post.tags post_model.content_level = post.content_level post_model.description = post.description post_model.posted_at = post.posted_at.in_time_zone("UTC") post_model.media_type = post.media_type.serialize post_model.last_gallery_log_entry = post.gallery_log_entry end ReduxApplicationRecord.transaction do user.scanned_gallery_at = Time.now user.save! posts_by_sofurry_id = user.posts.map { |post| [post.sofurry_id, post] }.to_h sofurry_post_ids_by_pool_id = posts_by_folder.flat_map do |folder, posts| posts.map do |post| { type: "Domain::PostGroupJoin::SofurryFolderJoin", group_id: T.must(folder.id), post_id: T.must(posts_by_sofurry_id[post.sofurry_id]&.id), } end end Domain::PostGroupJoin::SofurryFolderJoin.upsert_all( sofurry_post_ids_by_pool_id, unique_by: %i[group_id post_id], ) end bulk_enqueue_jobs do user.posts.each do |post| if post.state_pending? defer_job(Domain::Sofurry::Job::ScanPostJob, { post: }) end end if user.page_scan.due? defer_job(Domain::Sofurry::Job::ScanUserJob, { user: }) end end end sig do params( user: Domain::User::SofurryUser, media_type: Domain::Sofurry::GalleryPageParser::MediaType, ).returns(GallerySummary) end def gather_gallery_summary(user, media_type) user_sofurry_id = T.must(user.sofurry_id) response = http_client.get( "https://www.sofurry.com/browse/user/#{media_type.serialize}" + "?uid=#{user_sofurry_id}&#{media_type.serialize}-display=45", ) if response.status_code != 200 fatal_error("failed to get gallery: #{response.status_code}") end parser = Domain::Sofurry::GalleryPageParser.new( response.body, user_id: user_sofurry_id, ) user.username ||= parser.username GallerySummary.new( folders: parser.folders.to_set, posts: parser.posts.to_set, ) end sig do params( user_id: Integer, media_type: Domain::Sofurry::GalleryPageParser::MediaType, existing_post_ids: T::Set[Integer], folder_id: T.nilable(Integer), ).returns(T::Set[GalleryEntry]) end def scan_gallery_posts(user_id, media_type, existing_post_ids, folder_id: nil) posts = T.let(Set.new, T::Set[GalleryEntry]) page = 1 seen_post_ids = T.let(Set.new, T::Set[Integer]) loop do url = if folder_id "https://api2.sofurry.com/browse/folder/#{media_type.serialize}" + "?by=#{user_id}&folder=#{folder_id}&#{media_type.serialize}-page=#{page}&format=json" else "https://api2.sofurry.com/browse/user/#{media_type.serialize}" + "?uid=#{user_id}&#{media_type.serialize}-page=#{page}&format=json" end break if @visited_urls.include?(url) @visited_urls << url response = http_client.get(url) if response.status_code != 200 fatal_error("failed to get gallery: #{response.status_code}") end posts_json = T.cast( JSON.parse(response.body)["items"], T::Array[T::Hash[String, T.untyped]], ) post_ids = posts_json .map { |post_json| T.cast(post_json["id"].to_i, Integer) } .compact .to_set break if post_ids.subset?(seen_post_ids) seen_post_ids.merge(post_ids) posts.merge( posts_json.map do |post_json| GalleryEntry.new( sofurry_id: post_json["id"]&.to_i, title: post_json["title"], tags: post_json["tags"].split(", ").map(&:strip), description: post_json["description"], content_level: post_json["contentLevel"].to_i, posted_at: Time.at(post_json["postTime"].to_i), media_type: media_type, gallery_log_entry: response.log_entry, ) end, ) break if post_ids.subset?(existing_post_ids) break if posts_json.length < 30 page += 1 end posts end end