sofurry WIP for now, sofurry is down
This commit is contained in:
3
Gemfile
3
Gemfile
@@ -1,8 +1,7 @@
|
|||||||
source "https://rubygems.org"
|
source "https://rubygems.org"
|
||||||
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
git_source(:github) { |repo| "https://github.com/#{repo}.git" }
|
||||||
|
|
||||||
ruby "3.2.6"
|
ruby "~> 3.2"
|
||||||
# ruby "3.0.3"
|
|
||||||
|
|
||||||
# Bundle edge Rails instead: gem "rails", github: "rails/rails", branch: "main"
|
# Bundle edge Rails instead: gem "rails", github: "rails/rails", branch: "main"
|
||||||
gem "rails", "~> 7.2"
|
gem "rails", "~> 7.2"
|
||||||
|
|||||||
6
TODO.md
6
TODO.md
@@ -39,3 +39,9 @@
|
|||||||
- [ ] Rich inline links to e621 e.g. https://refurrer.com/posts/fa@60070060
|
- [ ] Rich inline links to e621 e.g. https://refurrer.com/posts/fa@60070060
|
||||||
- [ ] Find FaPost that have favs recorded but no scan / file, enqueue scan
|
- [ ] Find FaPost that have favs recorded but no scan / file, enqueue scan
|
||||||
- [x] Bunch of posts with empty responses: posts = Domain::Post.joins(files: :log_entry).where(files: { http_log_entries: { response_sha256: BlobFile::EMPTY_FILE_SHA256 }}).limit(10)
|
- [x] Bunch of posts with empty responses: posts = Domain::Post.joins(files: :log_entry).where(files: { http_log_entries: { response_sha256: BlobFile::EMPTY_FILE_SHA256 }}).limit(10)
|
||||||
|
- [ ] Create GlobalState entries for last FA id on browse page, periodic scan to scan from the newest FA ID to the stored one
|
||||||
|
- [ ] GlobalState entries for long running backfill jobs, automatically restart them if they fail
|
||||||
|
- [ ] Flag to pass to jobs to log HTTP requests / responses to a directory, HTTP mock helper to read from that directory
|
||||||
|
- [ ] fix IP address incorrect for Cloudflare proxied requests
|
||||||
|
- [ ] SOCKS5 proxy for additional workers
|
||||||
|
- [ ] Backup FA scraper using foxbot & g6jy5jkx466lrqojcngbnksugrcfxsl562bzuikrka5rv7srgguqbjid.onion
|
||||||
|
|||||||
@@ -22,6 +22,12 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|||||||
const :gallery_log_entry, T.nilable(HttpLogEntry)
|
const :gallery_log_entry, T.nilable(HttpLogEntry)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
sig { override.params(args: T::Hash[Symbol, T.untyped]).void }
|
||||||
|
def initialize(*args)
|
||||||
|
super
|
||||||
|
@visited_urls = T.let(Set.new, T::Set[String])
|
||||||
|
end
|
||||||
|
|
||||||
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
sig { override.params(args: T::Hash[Symbol, T.untyped]).returns(T.untyped) }
|
||||||
def perform(args)
|
def perform(args)
|
||||||
user = user_from_args!
|
user = user_from_args!
|
||||||
@@ -45,29 +51,26 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|||||||
media_types_with_posts =
|
media_types_with_posts =
|
||||||
T.let(Set.new, T::Set[Domain::Sofurry::GalleryPageParser::MediaType])
|
T.let(Set.new, T::Set[Domain::Sofurry::GalleryPageParser::MediaType])
|
||||||
|
|
||||||
|
existing_folders =
|
||||||
|
user
|
||||||
|
.folders
|
||||||
|
.filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] }
|
||||||
|
.to_h
|
||||||
|
|
||||||
media_types.each do |media_type|
|
media_types.each do |media_type|
|
||||||
gallery_summary = gather_gallery_summary(user, media_type)
|
gallery_summary = gather_gallery_summary(user, media_type)
|
||||||
folders = gallery_summary.folders
|
folders = gallery_summary.folders
|
||||||
gallery_posts = gallery_summary.posts
|
gallery_posts = gallery_summary.posts
|
||||||
|
|
||||||
existing_folders =
|
|
||||||
user
|
|
||||||
.folders
|
|
||||||
.filter { |folder| folder.media_type == media_type.serialize }
|
|
||||||
.filter_map { |folder| (sfid = folder.sofurry_id) && [sfid, folder] }
|
|
||||||
.to_h
|
|
||||||
|
|
||||||
folders.each do |folder|
|
folders.each do |folder|
|
||||||
if (existing = existing_folders[folder.id])
|
if (existing = existing_folders[folder.sofurry_id])
|
||||||
existing.name = folder.name
|
existing.name = folder.name
|
||||||
else
|
else
|
||||||
user.folders.build(
|
folder_model =
|
||||||
{
|
user.folders.build(
|
||||||
sofurry_id: folder.id,
|
{ sofurry_id: folder.sofurry_id, name: folder.name },
|
||||||
name: folder.name,
|
)
|
||||||
media_type: media_type.serialize,
|
existing_folders[folder.sofurry_id] = folder_model
|
||||||
},
|
|
||||||
)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if gallery_posts.any?
|
if gallery_posts.any?
|
||||||
@@ -97,7 +100,6 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|||||||
folder_posts =
|
folder_posts =
|
||||||
scan_gallery_posts(
|
scan_gallery_posts(
|
||||||
user_id,
|
user_id,
|
||||||
T.must(folder.media_type_enum),
|
|
||||||
existing_posts.keys.to_set,
|
existing_posts.keys.to_set,
|
||||||
folder_id: folder.sofurry_id,
|
folder_id: folder.sofurry_id,
|
||||||
)
|
)
|
||||||
@@ -212,16 +214,21 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|||||||
def scan_gallery_posts(user_id, media_type, existing_post_ids, folder_id: nil)
|
def scan_gallery_posts(user_id, media_type, existing_post_ids, folder_id: nil)
|
||||||
posts = T.let(Set.new, T::Set[GalleryEntry])
|
posts = T.let(Set.new, T::Set[GalleryEntry])
|
||||||
page = 1
|
page = 1
|
||||||
|
seen_post_ids = T.let(Set.new, T::Set[Integer])
|
||||||
|
|
||||||
loop do
|
loop do
|
||||||
url =
|
url =
|
||||||
if folder_id
|
if folder_id
|
||||||
"https://api2.sofurry.com/browse/folder/#{media_type.serialize}" +
|
"https://api2.sofurry.com/browse/folder/#{media_type.serialize}" +
|
||||||
"?by=#{user_id}&folder=#{folder_id}&format=json"
|
"?by=#{user_id}&folder=#{folder_id}&#{media_type.serialize}-page=#{page}&format=json"
|
||||||
else
|
else
|
||||||
"https://api2.sofurry.com/browse/user/#{media_type.serialize}" +
|
"https://api2.sofurry.com/browse/user/#{media_type.serialize}" +
|
||||||
"?uid=#{user_id}&#{media_type.serialize}-page=#{page}&format=json"
|
"?uid=#{user_id}&#{media_type.serialize}-page=#{page}&format=json"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
break if @visited_urls.include?(url)
|
||||||
|
@visited_urls << url
|
||||||
|
|
||||||
response = http_client.get(url)
|
response = http_client.get(url)
|
||||||
|
|
||||||
if response.status_code != 200
|
if response.status_code != 200
|
||||||
@@ -240,6 +247,9 @@ class Domain::Sofurry::Job::ScanGalleryJob < Domain::Sofurry::Job::Base
|
|||||||
.compact
|
.compact
|
||||||
.to_set
|
.to_set
|
||||||
|
|
||||||
|
break if post_ids.subset?(seen_post_ids)
|
||||||
|
seen_post_ids.merge(post_ids)
|
||||||
|
|
||||||
posts.merge(
|
posts.merge(
|
||||||
posts_json.map do |post_json|
|
posts_json.map do |post_json|
|
||||||
GalleryEntry.new(
|
GalleryEntry.new(
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ class Domain::Sofurry::GalleryPageParser
|
|||||||
class Folder < T::Struct
|
class Folder < T::Struct
|
||||||
include T::Struct::ActsAsComparable
|
include T::Struct::ActsAsComparable
|
||||||
const :name, String
|
const :name, String
|
||||||
const :id, Integer
|
const :sofurry_id, Integer
|
||||||
end
|
end
|
||||||
|
|
||||||
class MediaType < T::Enum
|
class MediaType < T::Enum
|
||||||
@@ -20,7 +20,7 @@ class Domain::Sofurry::GalleryPageParser
|
|||||||
|
|
||||||
class ShortGalleryEntry < T::Struct
|
class ShortGalleryEntry < T::Struct
|
||||||
include T::Struct::ActsAsComparable
|
include T::Struct::ActsAsComparable
|
||||||
const :id, Integer
|
const :sofurry_id, Integer
|
||||||
const :title, String
|
const :title, String
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -46,8 +46,8 @@ class Domain::Sofurry::GalleryPageParser
|
|||||||
raise "user_id mismatch: #{user_id} != #{@user_id}"
|
raise "user_id mismatch: #{user_id} != #{@user_id}"
|
||||||
end
|
end
|
||||||
name = elem.text
|
name = elem.text
|
||||||
id = href_params["folder"]&.to_i
|
sofurry_id = href_params["folder"]&.to_i
|
||||||
Folder.new(name:, id:)
|
Folder.new(name:, sofurry_id:)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -60,19 +60,19 @@ class Domain::Sofurry::GalleryPageParser
|
|||||||
.css(".sf-browse-shortlist,.sf-browse-shortlist-zebra")
|
.css(".sf-browse-shortlist,.sf-browse-shortlist-zebra")
|
||||||
.map do |elem|
|
.map do |elem|
|
||||||
title_id = elem.css(".sf-browse-shortlist-title a").first || next
|
title_id = elem.css(".sf-browse-shortlist-title a").first || next
|
||||||
id = title_id&.attr("href").split("/")&.last&.to_i || next
|
sofurry_id = title_id&.attr("href").split("/")&.last&.to_i || next
|
||||||
title = title_id&.text || next
|
title = title_id&.text || next
|
||||||
ShortGalleryEntry.new(id:, title:)
|
ShortGalleryEntry.new(sofurry_id:, title:)
|
||||||
end
|
end
|
||||||
.compact,
|
.compact,
|
||||||
@doc
|
@doc
|
||||||
.css("a.sfArtworkSmallInner")
|
.css("a.sfArtworkSmallInner")
|
||||||
.map do |elem|
|
.map do |elem|
|
||||||
id = elem.attr("href").split("/")&.last&.to_i || next
|
sofurry_id = elem.attr("href").split("/")&.last&.to_i || next
|
||||||
img_elem = elem.css("img.sfArtworkItem").first || next
|
img_elem = elem.css("img.sfArtworkItem").first || next
|
||||||
title =
|
title =
|
||||||
img_elem.attr("alt")&.strip&.rpartition("|by")&.first || next
|
img_elem.attr("alt")&.strip&.rpartition("|by")&.first || next
|
||||||
ShortGalleryEntry.new(id:, title:)
|
ShortGalleryEntry.new(sofurry_id:, title:)
|
||||||
end
|
end
|
||||||
.compact,
|
.compact,
|
||||||
].flatten
|
].flatten
|
||||||
|
|||||||
@@ -9,17 +9,11 @@ class Domain::PostGroup::SofurryFolder < Domain::PostGroup
|
|||||||
attr_json :sofurry_id, :integer
|
attr_json :sofurry_id, :integer
|
||||||
attr_json :name, :string
|
attr_json :name, :string
|
||||||
attr_json :owner_id, :integer
|
attr_json :owner_id, :integer
|
||||||
|
attr_json :media_types, :string, array: true
|
||||||
attr_json_due_timestamp :scanned_folder_at, 3.months
|
attr_json_due_timestamp :scanned_folder_at, 3.months
|
||||||
|
|
||||||
belongs_to :owner, class_name: "::Domain::User::SofurryUser", optional: true
|
belongs_to :owner, class_name: "::Domain::User::SofurryUser", optional: true
|
||||||
|
|
||||||
attr_json_enum(
|
|
||||||
:media_type,
|
|
||||||
:integer,
|
|
||||||
{ stories: 1, art: 2, music: 3, journals: 4, photos: 5 },
|
|
||||||
prefix: true,
|
|
||||||
)
|
|
||||||
|
|
||||||
sig { override.returns([String, Symbol]) }
|
sig { override.returns([String, Symbol]) }
|
||||||
def self.param_prefix_and_attribute
|
def self.param_prefix_and_attribute
|
||||||
["sf", :sofurry_folder_id]
|
["sf", :sofurry_folder_id]
|
||||||
@@ -32,11 +26,4 @@ class Domain::PostGroup::SofurryFolder < Domain::PostGroup
|
|||||||
"https://www.sofurry.com/browse/folder/#{type}?by=#{owner_id}&folder=#{sofurry_id}"
|
"https://www.sofurry.com/browse/folder/#{type}?by=#{owner_id}&folder=#{sofurry_id}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
sig { returns(T.nilable(Domain::Sofurry::GalleryPageParser::MediaType)) }
|
|
||||||
def media_type_enum
|
|
||||||
Domain::Sofurry::GalleryPageParser::MediaType.try_deserialize(
|
|
||||||
self.media_type,
|
|
||||||
)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -2,6 +2,8 @@
|
|||||||
class Domain::UserJobEvent::AddTrackedObject < Domain::UserJobEvent
|
class Domain::UserJobEvent::AddTrackedObject < Domain::UserJobEvent
|
||||||
self.table_name = "domain_user_job_event_add_tracked_objects"
|
self.table_name = "domain_user_job_event_add_tracked_objects"
|
||||||
belongs_to :log_entry, class_name: "HttpLogEntry"
|
belongs_to :log_entry, class_name: "HttpLogEntry"
|
||||||
|
validates :log_entry_id, uniqueness: { scope: :kind }
|
||||||
|
|
||||||
attr_json :requested_at, ActiveModelUtcTimeValue.new
|
attr_json :requested_at, ActiveModelUtcTimeValue.new
|
||||||
|
|
||||||
enum :kind, %i[favs followed_by following gallery]
|
enum :kind, %i[favs followed_by following gallery]
|
||||||
|
|||||||
@@ -52,7 +52,7 @@
|
|||||||
"@types/lodash": "^4.14.192",
|
"@types/lodash": "^4.14.192",
|
||||||
"@types/react": "^18.0.33",
|
"@types/react": "^18.0.33",
|
||||||
"env-cmd": "^10.1.0",
|
"env-cmd": "^10.1.0",
|
||||||
"prettier": "^3.5.2",
|
"prettier": "^3.6.2",
|
||||||
"prettier-plugin-tailwindcss": "^0.6.9",
|
"prettier-plugin-tailwindcss": "^0.6.9",
|
||||||
"react-refresh": "^0.14.0",
|
"react-refresh": "^0.14.0",
|
||||||
"typescript": "^5.0.3",
|
"typescript": "^5.0.3",
|
||||||
|
|||||||
@@ -4025,10 +4025,10 @@ prettier-plugin-tailwindcss@^0.6.9:
|
|||||||
resolved "https://registry.npmjs.org/prettier-plugin-tailwindcss/-/prettier-plugin-tailwindcss-0.6.9.tgz"
|
resolved "https://registry.npmjs.org/prettier-plugin-tailwindcss/-/prettier-plugin-tailwindcss-0.6.9.tgz"
|
||||||
integrity sha512-r0i3uhaZAXYP0At5xGfJH876W3HHGHDp+LCRUJrs57PBeQ6mYHMwr25KH8NPX44F2yGTvdnH7OqCshlQx183Eg==
|
integrity sha512-r0i3uhaZAXYP0At5xGfJH876W3HHGHDp+LCRUJrs57PBeQ6mYHMwr25KH8NPX44F2yGTvdnH7OqCshlQx183Eg==
|
||||||
|
|
||||||
prettier@^3.5.2:
|
prettier@^3.6.2:
|
||||||
version "3.5.2"
|
version "3.6.2"
|
||||||
resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.5.2.tgz#d066c6053200da0234bf8fa1ef45168abed8b914"
|
resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.6.2.tgz#ccda02a1003ebbb2bfda6f83a074978f608b9393"
|
||||||
integrity sha512-lc6npv5PH7hVqozBR7lkBNOGXV9vMwROAPlumdBkX0wTbbzPu/U1hk5yL8p2pt4Xoc+2mkT8t/sow2YrV/M5qg==
|
integrity sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==
|
||||||
|
|
||||||
process-nextick-args@~2.0.0:
|
process-nextick-args@~2.0.0:
|
||||||
version "2.0.1"
|
version "2.0.1"
|
||||||
|
|||||||
Reference in New Issue
Block a user