sofurry WIP for now, sofurry is down
This commit is contained in:
3
Gemfile
3
Gemfile
@@ -1,8 +1,7 @@
|
||||
source "https://rubygems.org"
|
||||
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
||||
|
||||
ruby "3.2.6"
|
||||
# ruby "3.0.3"
|
||||
ruby "~> 3.2"
|
||||
|
||||
# Bundle edge Rails instead: gem "rails", github: "rails/rails", branch: "main"
|
||||
gem "rails", "~> 7.2"
|
||||
|
||||
6
TODO.md
6
TODO.md
@@ -39,3 +39,9 @@
|
||||
- [ ] Rich inline links to e621 e.g. https://refurrer.com/posts/fa@60070060
|
||||
- [ ] Find FaPost that have favs recorded but no scan / file, enqueue scan
|
||||
- [x] Bunch of posts with empty responses: posts = Domain::Post.joins(files: :log_entry).where(files: { http_log_entries: { response_sha256: BlobFile::EMPTY_FILE_SHA256 }}).limit(10)
|
||||
- [ ] Create GlobalState entries for last FA id on browse page, periodic scan to scan from the newest FA ID to the stored one
|
||||
- [ ] GlobalState entries for long running backfill jobs, automatically restart them if they fail
|
||||
- [ ] Flag to pass to jobs to log HTTP requests / responses to a directory, HTTP mock helper to read from that directory
|
||||
- [ ] fix IP address incorrect for Cloudflare proxied requests
|
||||
- [ ] SOCKS5 proxy for additional workers
|
||||
- [ ] Backup FA scraper using foxbot & g6jy5jkx466lrqojcngbnksugrcfxsl562bzuikrka5rv7srgguqbjid.onion
|
||||
|
||||
@@ -22,6 +22,12 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
||||
const :gallery_log_entry, T.nilable(HttpLogEntry)
|
||||
end
|
||||
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
|
||||
def initialize(*args)
|
||||
super
|
||||
@visited_urls = T.let(Set.new, T::Set[String])
|
||||
end
|
||||
|
||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||
def perform(args)
|
||||
user = user_from_args!
|
||||
@@ -45,29 +51,26 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
||||
media_types_with_posts =
|
||||
T.let(Set.new, T::Set[Domain::Sofurry::GalleryPageParser::MediaType])
|
||||
|
||||
existing_folders =
|
||||
user
|
||||
.folders
|
||||
.filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] }
|
||||
.to_h
|
||||
|
||||
media_types.each do |media_type|
|
||||
gallery_summary = gather_gallery_summary(user, media_type)
|
||||
folders = gallery_summary.folders
|
||||
gallery_posts = gallery_summary.posts
|
||||
|
||||
existing_folders =
|
||||
user
|
||||
.folders
|
||||
.filter { |folder| folder.media_type == media_type.serialize }
|
||||
.filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] }
|
||||
.to_h
|
||||
|
||||
folders.each do |folder|
|
||||
if (existing = existing_folders[folder.id])
|
||||
if (existing = existing_folders[folder.sofurry_id])
|
||||
existing.name = folder.name
|
||||
else
|
||||
user.folders.build(
|
||||
{
|
||||
sofurry_id: folder.id,
|
||||
name: folder.name,
|
||||
media_type: media_type.serialize,
|
||||
},
|
||||
)
|
||||
folder_model =
|
||||
user.folders.build(
|
||||
{ sofurry_id: folder.sofurry_id, name: folder.name },
|
||||
)
|
||||
existing_folders[folder.sofurry_id] = folder_model
|
||||
end
|
||||
|
||||
if gallery_posts.any?
|
||||
@@ -97,7 +100,6 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
||||
folder_posts =
|
||||
scan_gallery_posts(
|
||||
user_id,
|
||||
T.must(folder.media_type_enum),
|
||||
existing_posts.keys.to_set,
|
||||
folder_id: folder.sofurry_id,
|
||||
)
|
||||
@@ -212,16 +214,21 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
||||
def scan_gallery_posts(user_id, media_type, existing_post_ids, folder_id: nil)
|
||||
posts = T.let(Set.new, T::Set[GalleryEntry])
|
||||
page = 1
|
||||
seen_post_ids = T.let(Set.new, T::Set[Integer])
|
||||
|
||||
loop do
|
||||
url =
|
||||
if folder_id
|
||||
"https://api2.sofurry.com/browse/folder/#{media_type.serialize}" +
|
||||
"?by=#{user_id}&folder=#{folder_id}&format=json"
|
||||
"?by=#{user_id}&folder=#{folder_id}&#{media_type.serialize}-page=#{page}&format=json"
|
||||
else
|
||||
"https://api2.sofurry.com/browse/user/#{media_type.serialize}" +
|
||||
"?uid=#{user_id}&#{media_type.serialize}-page=#{page}&format=json"
|
||||
end
|
||||
|
||||
break if @visited_urls.include?(url)
|
||||
@visited_urls << url
|
||||
|
||||
response = http_client.get(url)
|
||||
|
||||
if response.status_code != 200
|
||||
@@ -240,6 +247,9 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
||||
.compact
|
||||
.to_set
|
||||
|
||||
break if post_ids.subset?(seen_post_ids)
|
||||
seen_post_ids.merge(post_ids)
|
||||
|
||||
posts.merge(
|
||||
posts_json.map do |post_json|
|
||||
GalleryEntry.new(
|
||||
|
||||
@@ -5,7 +5,7 @@ class Domain::Sofurry::GalleryPageParser
|
||||
class Folder < T::Struct
|
||||
include T::Struct::ActsAsComparable
|
||||
const :name, String
|
||||
const :id, Integer
|
||||
const :sofurry_id, Integer
|
||||
end
|
||||
|
||||
class MediaType < T::Enum
|
||||
@@ -20,7 +20,7 @@ class Domain::Sofurry::GalleryPageParser
|
||||
|
||||
class ShortGalleryEntry < T::Struct
|
||||
include T::Struct::ActsAsComparable
|
||||
const :id, Integer
|
||||
const :sofurry_id, Integer
|
||||
const :title, String
|
||||
end
|
||||
|
||||
@@ -46,8 +46,8 @@ class Domain::Sofurry::GalleryPageParser
|
||||
raise "user_id mismatch: #{user_id} != #{@user_id}"
|
||||
end
|
||||
name = elem.text
|
||||
id = href_params["folder"]&.to_i
|
||||
Folder.new(name:, id:)
|
||||
sofurry_id = href_params["folder"]&.to_i
|
||||
Folder.new(name:, sofurry_id:)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -60,19 +60,19 @@ class Domain::Sofurry::GalleryPageParser
|
||||
.css(".sf-browse-shortlist,.sf-browse-shortlist-zebra")
|
||||
.map do |elem|
|
||||
title_id = elem.css(".sf-browse-shortlist-title a").first || next
|
||||
id = title_id&.attr("href").split("/")&.last&.to_i || next
|
||||
sofurry_id = title_id&.attr("href").split("/")&.last&.to_i || next
|
||||
title = title_id&.text || next
|
||||
ShortGalleryEntry.new(id:, title:)
|
||||
ShortGalleryEntry.new(sofurry_id:, title:)
|
||||
end
|
||||
.compact,
|
||||
@doc
|
||||
.css("a.sfArtworkSmallInner")
|
||||
.map do |elem|
|
||||
id = elem.attr("href").split("/")&.last&.to_i || next
|
||||
sofurry_id = elem.attr("href").split("/")&.last&.to_i || next
|
||||
img_elem = elem.css("img.sfArtworkItem").first || next
|
||||
title =
|
||||
img_elem.attr("alt")&.strip&.rpartition("|by")&.first || next
|
||||
ShortGalleryEntry.new(id:, title:)
|
||||
ShortGalleryEntry.new(sofurry_id:, title:)
|
||||
end
|
||||
.compact,
|
||||
].flatten
|
||||
|
||||
@@ -9,17 +9,11 @@ class Domain::PostGroup::SofurryFolder < Domain::PostGroup
|
||||
attr_json :sofurry_id, :integer
|
||||
attr_json :name, :string
|
||||
attr_json :owner_id, :integer
|
||||
attr_json :media_types, :string, array: true
|
||||
attr_json_due_timestamp :scanned_folder_at, 3.months
|
||||
|
||||
belongs_to :owner, class_name: "::Domain::User::SofurryUser", optional: true
|
||||
|
||||
attr_json_enum(
|
||||
:media_type,
|
||||
:integer,
|
||||
{ stories: 1, art: 2, music: 3, journals: 4, photos: 5 },
|
||||
prefix: true,
|
||||
)
|
||||
|
||||
sig { override.returns([String, Symbol]) }
|
||||
def self.param_prefix_and_attribute
|
||||
["sf", :sofurry_folder_id]
|
||||
@@ -32,11 +26,4 @@ class Domain::PostGroup::SofurryFolder < Domain::PostGroup
|
||||
"https://www.sofurry.com/browse/folder/#{type}?by=#{owner_id}&folder=#{sofurry_id}"
|
||||
end
|
||||
end
|
||||
|
||||
sig { returns(T.nilable(Domain::Sofurry::GalleryPageParser::MediaType)) }
|
||||
def media_type_enum
|
||||
Domain::Sofurry::GalleryPageParser::MediaType.try_deserialize(
|
||||
self.media_type,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
class Domain::UserJobEvent::AddTrackedObject < Domain::UserJobEvent
|
||||
self.table_name = "domain_user_job_event_add_tracked_objects"
|
||||
belongs_to :log_entry, class_name: "HttpLogEntry"
|
||||
validates :log_entry_id, uniqueness: { scope: :kind }
|
||||
|
||||
attr_json :requested_at, ActiveModelUtcTimeValue.new
|
||||
|
||||
enum :kind, %i[favs followed_by following gallery]
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
"@types/lodash": "^4.14.192",
|
||||
"@types/react": "^18.0.33",
|
||||
"env-cmd": "^10.1.0",
|
||||
"prettier": "^3.5.2",
|
||||
"prettier": "^3.6.2",
|
||||
"prettier-plugin-tailwindcss": "^0.6.9",
|
||||
"react-refresh": "^0.14.0",
|
||||
"typescript": "^5.0.3",
|
||||
|
||||
@@ -4025,10 +4025,10 @@ prettier-plugin-tailwindcss@^0.6.9:
|
||||
resolved "https://registry.npmjs.org/prettier-plugin-tailwindcss/-/prettier-plugin-tailwindcss-0.6.9.tgz"
|
||||
integrity sha512-r0i3uhaZAXYP0At5xGfJH876W3HHGHDp+LCRUJrs57PBeQ6mYHMwr25KH8NPX44F2yGTvdnH7OqCshlQx183Eg==
|
||||
|
||||
prettier@^3.5.2:
|
||||
version "3.5.2"
|
||||
resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.5.2.tgz#d066c6053200da0234bf8fa1ef45168abed8b914"
|
||||
integrity sha512-lc6npv5PH7hVqozBR7lkBNOGXV9vMwROAPlumdBkX0wTbbzPu/U1hk5yL8p2pt4Xoc+2mkT8t/sow2YrV/M5qg==
|
||||
prettier@^3.6.2:
|
||||
version "3.6.2"
|
||||
resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.6.2.tgz#ccda02a1003ebbb2bfda6f83a074978f608b9393"
|
||||
integrity sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==
|
||||
|
||||
process-nextick-args@~2.0.0:
|
||||
version "2.0.1"
|
||||
|
||||
Reference in New Issue
Block a user