remove legacy Domain::{Fa,InkBunny,E621} model usage

This commit is contained in:
Dylan Knutson
2025-06-24 17:51:59 +00:00
parent 5cb24a8065
commit 70c65ffdbd
115 changed files with 110 additions and 44529 deletions

222
Rakefile
View File

@@ -40,22 +40,23 @@ task periodic_tasks: %i[environment set_logger_stdout] do
loop { sleep 10 }
end
namespace :db_sampler do
task export: :environment do
url_names = ENV["url_names"] || raise("need 'url_names' (comma-separated)")
outfile = $stdout
DbSampler.new(outfile).export(url_names.split(","))
ensure
outfile.close if outfile
end
# TODO - migrate to Domain::Post / Domain::User
# namespace :db_sampler do
# task export: :environment do
# url_names = ENV["url_names"] || raise("need 'url_names' (comma-separated)")
# outfile = $stdout
# DbSampler.new(outfile).export(url_names.split(","))
# ensure
# outfile.close if outfile
# end
task import: [:environment] do
infile = $stdin
DbSampler.new(infile).import
ensure
infile.close if infile
end
end
# task import: [:environment] do
# infile = $stdin
# DbSampler.new(infile).import
# ensure
# infile.close if infile
# end
# end
task good_job: %i[environment set_ar_stdout set_logger_stdout] do
env_hash = {
@@ -93,138 +94,6 @@ task :reverse_csv do
out_csv.close
end
task migrate_to_domain: :environment do
only_user = ENV["only_user"]
allowed_domains = %w[e621 fa ib]
only_domains = (ENV["only_domains"] || "").split(",")
only_domains = allowed_domains if only_domains.empty?
if (only_domains - allowed_domains).any?
raise "only_domains must be a subset of #{allowed_domains.join(", ")}"
end
migrator = Domain::MigrateToDomain.new
if only_domains.include?("e621")
# migrator.migrate_e621_users(only_user: only_user)
# migrator.migrate_e621_posts(only_user: only_user)
migrator.migrate_e621_users_favs(only_user: only_user)
end
if only_domains.include?("fa")
# migrator.migrate_fa_users(only_user: only_user)
# migrator.migrate_fa_posts(only_user: only_user)
# migrator.migrate_fa_users_favs(only_user: only_user)
migrator.migrate_fa_users_followed_users(only_user: only_user)
end
if only_domains.include?("ib")
migrator.migrate_inkbunny_users(only_user: only_user)
migrator.migrate_inkbunny_posts(only_user: only_user)
migrator.migrate_inkbunny_pools(only_user: nil) if only_user.nil?
end
end
task infer_last_submission_log_entries: :environment do
only_fa_id = ENV["only_fa_id"]
start = ENV["start_at"]&.to_i || nil
if only_fa_id
relation = Domain::Fa::Post.where(fa_id: only_fa_id)
else
relation =
Domain::Fa::Post
.where(state: :ok)
.where(last_submission_page_id: nil)
.or(Domain::Fa::Post.where(state: :ok).where(posted_at: nil))
end
relation.find_each(batch_size: 10, start:) do |post|
parts = ["[id: #{post.id}]", "[fa_id: #{post.fa_id}]"]
log_entry = post.guess_last_submission_page
unless log_entry
parts << "[no log entry]"
next
end
contents = log_entry.response&.contents
unless contents
parts << "[no contents]"
next
end
parser = Domain::Fa::Parser::Page.new(contents)
if parser.submission_not_found?
parts << "[removed]"
post.state = :removed
else
posted_at = parser.submission.posted_date
post.posted_at ||= posted_at
parts << "[posted at: #{posted_at}]"
end
if post.last_submission_page_id.present? &&
log_entry.id != post.last_submission_page_id
parts << "[overwrite]"
end
post.last_submission_page_id = log_entry.id
parts << "[log entry: #{log_entry.id}]"
parts << "[uri: #{log_entry.uri.to_s}]"
post.save!
rescue => e
parts << "[error: #{e.message}]"
ensure
puts parts.join(" ")
end
end
task fix_fa_post_files: :environment do
file_ids = ENV["file_ids"]&.split(",") || raise("need 'file_ids'")
Domain::Fa::Post
.where(file_id: file_ids)
.find_each { |post| post.fix_file_by_uri! }
end
task fix_fa_post_files_by_csv: :environment do
require "csv"
csv_file = ENV["csv_file"] || raise("need 'csv_file'")
CSV
.open(csv_file, headers: true)
.each do |row|
id = row["id"].to_i
post = Domain::Fa::Post.find(id)
post.fix_file_by_uri!
end
end
task fix_buggy_fa_posts: :environment do
post_fa_ids = %w[7704069 7704068 6432347 6432346].map(&:to_i)
require "uri"
post_fa_ids.each do |fa_id|
post = Domain::Fa::Post.find_by(fa_id: fa_id)
next unless post&.file
post_file_url_str = Addressable::URI.parse(post.file_url_str).to_s
file_url_str = Addressable::URI.parse(CGI.unescape(post.file.uri.to_s)).to_s
hle = post.guess_last_submission_page
parser = Domain::Fa::Parser::Page.new(hle.response.contents)
if parser.submission_not_found?
post.file = nil
post.save!
puts "submission not found"
else
submission = parser.submission
full_res_img = Addressable::URI.parse(submission.full_res_img)
full_res_img.scheme = "https" if full_res_img.scheme.blank?
matches = full_res_img.to_s == post.file_url_str
end
end
end
task enqueue_fa_posts_missing_files: %i[environment set_logger_stdout] do
Domain::Post::FaPost
.where(state: "ok")
@@ -407,56 +276,6 @@ rescue => e
binding.pry
end
task fix_fa_user_avatars: :environment do
new_users_missing_avatar =
Domain::User::FaUser.where.missing(:avatar).select(:url_name)
old_users_with_avatar =
Domain::Fa::User
.where(url_name: new_users_missing_avatar)
.includes(:avatar)
.filter(&:avatar)
old_users_with_avatar.each do |old_user|
old_avatar = old_user.avatar
new_user = Domain::User::FaUser.find_by(url_name: old_user.url_name)
if old_avatar.log_entry.nil?
puts "enqueue fresh download for #{old_user.url_name}"
new_avatar = Domain::UserAvatar.new
new_user.avatar = new_avatar
new_user.save!
Domain::Fa::Job::UserAvatarJob.perform_now(avatar: new_avatar)
new_avatar.reload
binding.pry
next
end
new_avatar = Domain::UserAvatar.new
new_avatar.log_entry_id = old_avatar.log_entry_id
new_avatar.last_log_entry_id = old_avatar.log_entry_id
new_avatar.url_str = old_avatar.file_url_str
new_avatar.downloaded_at = old_avatar.log_entry&.created_at
new_avatar.state =
case old_avatar.state
when "ok"
old_avatar.log_entry_id.present? ? "ok" : "pending"
when "file_not_found"
new_avatar.error_message = old_avatar.state
"file_404"
else
new_avatar.error_message = old_avatar.state
"http_error"
end
new_user.avatar = new_avatar
new_user.save!
puts "migrated #{old_user.url_name}"
rescue => e
puts "error: #{e.message}"
binding.pry
end
end
task run_fa_user_avatar_jobs: :environment do
avatars =
Domain::UserAvatar
@@ -473,15 +292,6 @@ task run_fa_user_avatar_jobs: :environment do
end
end
task sample_migrated_favs: :environment do
new_user = Domain::User::FaUser.where.not(migrated_user_favs_at: nil).last
old_user = Domain::Fa::User.find_by(url_name: new_user.url_name)
puts "user: #{new_user.url_name}"
puts "old fav count: #{old_user.fav_posts.count}"
puts "new fav count: #{new_user.faved_posts.count}"
end
task create_post_file_fingerprints: :environment do
def migrate_posts_for_user(user)
puts "migrating posts for #{user.to_param}"

View File

@@ -1,218 +0,0 @@
# typed: true
class Domain::Fa::ApiController < ApplicationController
skip_before_action :authenticate_user!
before_action :validate_api_token!
skip_before_action :verify_authenticity_token,
only: %i[enqueue_objects object_statuses similar_users]
skip_before_action :validate_api_token!,
only: %i[object_statuses similar_users]
def object_statuses
fa_ids = (params[:fa_ids] || []).reject(&:blank?).map(&:to_i)
url_names = (params[:url_names] || []).reject(&:blank?)
url_name_to_user =
Domain::User::FaUser
.where(url_name: url_names)
.map { |user| [T.must(user.url_name), user] }
.to_h
fa_id_to_post =
Domain::Post::FaPost
.includes(:file)
.where(fa_id: fa_ids)
.map { |post| [T.must(post.fa_id), post] }
.to_h
posts_response = {}
users_response = {}
fa_ids.each do |fa_id|
post = fa_id_to_post[fa_id]
if post
post_state =
if post.file.present?
"have_file"
elsif post.scanned_at?
"scanned_post"
else
post.state
end
post_response = {
state: post_state,
seen_at: time_ago_or_never(post.created_at),
object_url: request.base_url + helpers.domain_post_path(post),
post_scan: {
last_at: time_ago_or_never(post.scanned_at),
due_for_scan: !post.scanned_at?,
},
file_scan: {
last_at: time_ago_or_never(post.file&.created_at),
due_for_scan: !post.file&.created_at?,
},
}
else
post_response = { state: "not_seen" }
end
posts_response[fa_id] = post_response
end
url_names.each do |url_name|
user = url_name_to_user[url_name]
if user
user_response = {
created_at: time_ago_or_never(user.created_at),
state: user.state,
object_url: request.base_url + helpers.domain_user_path(user),
page_scan: {
last_at: time_ago_or_never(user.scanned_page_at),
due_for_scan: user.page_scan.due?,
},
gallery_scan: {
last_at: time_ago_or_never(user.gallery_scan.at),
due_for_scan: user.gallery_scan.due?,
},
favs_scan: {
last_at: time_ago_or_never(user.favs_scan.at),
due_for_scan: user.favs_scan.due?,
},
}
else
user_response = { state: "not_seen" }
end
users_response[url_name] = user_response
end
render json: { posts: posts_response, users: users_response }
end
def enqueue_objects
@enqueue_counts ||= Hash.new { |h, k| h[k] = 0 }
fa_ids = (params[:fa_ids] || []).map(&:to_i)
url_names = (params[:url_names] || [])
url_names_to_enqueue = Set.new(params[:url_names_to_enqueue] || [])
fa_id_to_post =
Domain::Fa::Post
.includes(:file)
.where(fa_id: fa_ids)
.map { |post| [post.fa_id, post] }
.to_h
url_name_to_user =
Domain::Fa::User
.where(url_name: url_names)
.map { |user| [user.url_name, user] }
.to_h
fa_ids.each do |fa_id|
post = fa_id_to_post[fa_id]
defer_post_scan(post, fa_id)
end
url_names.each do |url_name|
user = url_name_to_user[url_name]
defer_user_scan(user, url_name, url_names_to_enqueue.include?(url_name))
end
enqueue_deferred!
render json: {
post_scans: @enqueue_counts[Domain::Fa::Job::ScanPostJob],
post_files: @enqueue_counts[Domain::Fa::Job::ScanFileJob],
user_pages: @enqueue_counts[Domain::Fa::Job::UserPageJob],
user_galleries: @enqueue_counts[Domain::Fa::Job::UserGalleryJob],
}
end
private
def defer_post_scan(post, fa_id)
if !post || !post.scanned?
defer_manual(Domain::Fa::Job::ScanPostJob, { fa_id: fa_id }, -17)
end
if post && post.file_uri && !post.file.present?
return(
defer_manual(
Domain::Fa::Job::ScanFileJob,
{ post: post },
-15,
"static_file",
)
)
end
end
def defer_user_scan(user, url_name, highpri)
if !user || user.due_for_page_scan?
defer_manual(
Domain::Fa::Job::UserPageJob,
{ url_name: url_name },
highpri ? -16 : -6,
)
return
end
if !user || user.due_for_gallery_scan?
defer_manual(
Domain::Fa::Job::UserGalleryJob,
{ url_name: url_name },
highpri ? -14 : -4,
)
return
end
false
end
def defer_manual(klass, args, priority, queue = "manual")
@@enqueue_deduper ||= Set.new
return unless @@enqueue_deduper.add?([klass, args, priority])
@deferred_jobs ||= []
@deferred_jobs << [klass, args, priority, queue]
@enqueue_counts[klass] += 1
end
def enqueue_deferred!
GoodJob::Bulk.enqueue do
while job = (@deferred_jobs || []).shift
klass, args, priority, queue = job
klass.set(priority: priority, queue: queue).perform_later(args)
end
end
end
def time_ago_or_never(time)
if time
helpers.time_ago_in_words(time, include_seconds: true) + " ago"
else
"never"
end
end
API_TOKENS = {
"a4eb03ac-b33c-439c-9b51-a834d1c5cf48" => "dymk",
"56cc81fe-8c00-4436-8981-4580eab00e66" => "taargus",
"9c38727f-f11d-41de-b775-0effd86d520c" => "xjal",
"e38c568f-a24d-4f26-87f0-dfcd898a359d" => "fyacin",
"41fa1144-d4cd-11ed-afa1-0242ac120002" => "soft_fox_lad",
"9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro",
}
def validate_api_token!
api_token = request.params[:api_token]
api_user_name = API_TOKENS[api_token]
return if api_user_name
return if VpnOnlyRouteConstraint.new.matches?(request)
render status: 403, json: { error: "not authenticated" }
end
end

View File

@@ -1,165 +0,0 @@
# typed: strict
module Domain::Fa::PostsHelper
extend T::Sig
include ActionView::Helpers::DateHelper
include ActionView::Helpers::SanitizeHelper
include ActionView::Helpers::RenderingHelper
include ActionView::Helpers::TagHelper
sig { params(post: Domain::Fa::Post).returns(String) }
def post_state_string(post)
if post.have_file?
"file"
elsif post.scanned?
"scanned"
else
post.state || "unknown"
end
end
sig do
params(
params:
T.any(ActionController::Parameters, T::Hash[T.untyped, T.untyped]),
).returns(T.nilable(String))
end
def page_str(params)
if (params[:page] || 1).to_i > 1
"(page #{params[:page]})"
else
nil
end
end
sig { params(post: Domain::Fa::Post).returns(T.nilable(HttpLogEntry)) }
def guess_scanned_http_log_entry(post)
HttpLogEntry.find_all_by_uri(
"https://www.furaffinity.net/view/#{post.fa_id}",
).first
end
sig { params(post: Domain::Fa::Post).returns(T.nilable(HttpLogEntry)) }
def guess_file_downloaded_http_log_entry(post)
if (uri = post.file_uri)
HttpLogEntry.find_all_by_uri(uri).first
end
end
sig { params(html: String).returns(String) }
def fa_post_description_sanitized(html)
fa_post_id_to_node = {}
fa_user_url_name_to_node = {}
sanitizer =
Sanitize.new(
elements: %w[br img b i span strong],
attributes: {
"span" => %w[style],
},
css: {
properties: %w[font-size color],
},
transformers: [
Kernel.lambda do |env|
# Only allow and transform FA links
if env[:node_name] == "a"
node = env[:node]
# by default, assume the host is www.furaffinity.net
href = node["href"]&.downcase || ""
href = "//" + href if href.match?(/^(www\.)?furaffinity\.net/)
uri =
begin
URI.parse(href)
rescue URI::InvalidURIError
nil
end
valid_type = !uri.is_a?(URI::MailTo)
next { node_whitelist: [node] } if uri.nil? || !valid_type
uri.host ||= "www.furaffinity.net"
uri.scheme ||= "https"
path = uri.path
fa_host_matcher = /^(www\.)?furaffinity\.net$/
fa_post_matcher = %r{^/view/(\d+)/?$}
fa_user_matcher = %r{^/user/(\w+)/?$}
if fa_host_matcher.match?(uri.host) && path
if match = path.match(fa_post_matcher)
fa_id = match[1].to_i
fa_post_id_to_node[fa_id] = node
next { node_whitelist: [node] }
elsif match = path.match(fa_user_matcher)
fa_url_name = match[1]
fa_user_url_name_to_node[fa_url_name] = node
next { node_whitelist: [node] }
end
end
# Don't allow any other links
node.replace(node.children)
end
end,
],
)
fragment = Nokogiri::HTML5.fragment(sanitizer.send(:preprocess, html))
sanitizer.node!(fragment)
if fa_post_id_to_node.any?
# Batch load posts and their titles, ensuring fa_post_ids are strings
posts_by_id =
Domain::Fa::Post.where(fa_id: fa_post_id_to_node.keys).index_by(&:fa_id)
# Replace the link text with post titles if available
fa_post_id_to_node.each do |fa_id, node|
if (post = posts_by_id[fa_id])
node.replace(
Nokogiri::HTML5.fragment(
render(
partial: "domain/fa/posts/description_inline_link_fa_post",
locals: {
post: post,
},
),
),
)
else
node.replace(node.children)
end
end
end
if fa_user_url_name_to_node.any?
# Batch load users and their names, ensuring fa_user_url_names are strings
users_by_url_name =
Domain::Fa::User
.where(url_name: fa_user_url_name_to_node.keys)
.includes(:avatar)
.index_by(&:url_name)
# Replace the link text with user names if available
fa_user_url_name_to_node.each do |fa_url_name, node|
if (user = users_by_url_name[fa_url_name])
node.replace(
Nokogiri::HTML5.fragment(
render(
partial: "domain/fa/posts/description_inline_link_fa_user",
locals: {
user: user,
},
),
),
)
else
node.replace(node.children)
end
end
end
raw fragment.to_html(preserve_newline: true)
end
end

View File

@@ -1,131 +0,0 @@
# typed: false
module Domain::Fa::UsersHelper
extend T::Sig
def avatar_url(sha256, thumb: "32-avatar")
blob_path(HexUtil.bin2hex(sha256), format: "jpg", thumb: thumb)
end
def fa_user_avatar_path(user, thumb: nil)
if (sha256 = user.avatar&.file_sha256)
blob_path(HexUtil.bin2hex(sha256), format: "jpg", thumb: thumb)
else
# default / 'not found' avatar image
# "/blobs/9080fd4e7e23920eb2dccfe2d86903fc3e748eebb2e5aa8c657bbf6f3d941cdc/contents.jpg"
asset_path("user-circle.svg")
end
end
def sanitized_fa_user_profile_html(html)
# try to preload all the FA usernames in the profile
maybe_url_names =
Nokogiri
.HTML(html)
.css("a")
.flat_map do |node|
href = URI.parse(node["href"])
right_host = href.host.nil? || href.host == "www.furaffinity.net"
right_path = href.path =~ %r{/user/.+}
if right_host && right_path
[href]
else
[]
end
end
.map { |href| href.path.split("/")[2]&.downcase }
preloaded_users =
Domain::Fa::User
.where(url_name: maybe_url_names)
.select(:id, :state, :state_detail, :log_entry_detail, :url_name)
.joins(:avatar)
.includes(:avatar)
.index_by(&:url_name)
raw Sanitize.fragment(
html,
elements: %w[br img b i span strong],
attributes: {
"span" => %w[style],
"a" => [],
},
css: {
properties: %w[font-size color],
},
transformers:
lambda do |env|
return unless env[:node_name] == "a"
node = env[:node]
href = URI.parse(node["href"])
unless href.host == nil || href.host == "www.furaffinity.net"
return
end
return unless href.path =~ %r{/user/.+}
url_name = href.path.split("/")[2]&.downcase
Sanitize.node!(
node,
{ elements: %w[a], attributes: { "a" => %w[href] } },
)
node["href"] = domain_fa_user_path(url_name)
node["class"] = "text-slate-200 underline decoration-slate-200 " +
"decoration-dashed decoration-dashed decoration-1"
whitelist = [node]
user =
preloaded_users[url_name] ||
Domain::Fa::User.find_by(url_name: url_name)
if user
img = Nokogiri::XML::Node.new("img", node.document)
img["class"] = "inline w-5"
img["src"] = fa_user_avatar_path(user, thumb: "32-avatar")
node.prepend_child(img)
whitelist << img
end
{ node_allowlist: whitelist }
end,
)
end
# TODO - remove this once we've migrated similarity scores to new user model
sig do
params(
user: Domain::User::FaUser,
limit: Integer,
exclude_followed_by: T.nilable(Domain::User::FaUser),
).returns(T::Array[Domain::User::FaUser])
end
def similar_users_by_followed(user, limit: 10, exclude_followed_by: nil)
factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user)
return [] if factors.nil?
relation =
Domain::NeighborFinder
.find_neighbors(factors)
.limit(limit)
.includes(:user)
if exclude_followed_by
relation =
relation.where.not(
user_id: exclude_followed_by.followed_users.select(:to_id),
)
end
relation.map { |factor| factor.user }
end
def fa_user_account_status(user)
log_entry_id = user.log_entry_detail["last_user_page_id"]
return "unknown" if log_entry_id.nil?
log_entry = HttpLogEntry.find_by(id: log_entry_id)
return "unknown" if log_entry.nil?
parser =
Domain::Fa::Parser::Page.new(
log_entry.response.contents,
require_logged_in: false,
)
return "unknown" unless parser.probably_user_page?
parser.user_page.account_status
end
end

View File

@@ -68,4 +68,18 @@ module Domain::PaginationHelper
path += "?#{uri.query}" if uri.query.present?
path
end
sig do
params(
params:
T.any(ActionController::Parameters, T::Hash[T.untyped, T.untyped]),
).returns(T.nilable(String))
end
def page_str(params)
if (params[:page] || 1).to_i > 1
"(page #{params[:page]})"
else
nil
end
end
end

View File

@@ -1,5 +1,5 @@
# typed: false
module Domain::E621::PostsHelper
module Domain::Posts::E621PostsHelper
def icon_asset_for_url(url)
domain = extract_domain(url)
return nil unless domain
@@ -81,7 +81,7 @@ module Domain::E621::PostsHelper
return unless %w[www.furaffinity.net furaffinity.net].include?(uri.host)
fa_id = uri.path.match(%r{/view/(\d+)})[1]
return unless fa_id
Domain::Fa::Post.find_by(fa_id: fa_id)
raise("not implemented")
rescue StandardError
nil
end

View File

@@ -0,0 +1,32 @@
# typed: true
module Domain::Users::FaUsersHelper
extend T::Sig
include HelpersInterface
# TODO - remove this once we've migrated similarity scores to new user model
sig do
params(
user: Domain::User::FaUser,
limit: Integer,
exclude_followed_by: T.nilable(Domain::User::FaUser),
).returns(T::Array[Domain::User::FaUser])
end
def similar_users_by_followed(user, limit: 10, exclude_followed_by: nil)
factors = Domain::Factors::UserUserFollowToFactors.find_by(user: user)
return [] if factors.nil?
relation =
Domain::NeighborFinder
.find_neighbors(factors)
.limit(limit)
.includes(:user)
if exclude_followed_by
relation =
relation.where.not(
user_id: exclude_followed_by.followed_users.select(:to_id),
)
end
relation.map { |factor| factor.user }
end
end

View File

@@ -1,22 +0,0 @@
# typed: true
module SourceHelper
def self.source_name_to_class_name
{
"furaffinity" => "Domain::Fa::Post",
"e621" => "Domain::E621::Post",
"inkbunny" => "Domain::Inkbunny::Post",
}
end
def self.all_source_names
source_name_to_class_name.keys
end
def self.source_names_to_class_names(list)
list.map { |source| source_name_to_class_name[source] }.compact
end
def self.has_all_sources?(list)
list.sort == all_source_names.sort
end
end

View File

@@ -10,16 +10,6 @@ class Domain::E621::Job::StaticFileJob < Domain::E621::Job::Base
T.cast(file, Domain::PostFile)
elsif (post = args[:post]) && post.is_a?(Domain::Post::E621Post)
T.must(post.file)
elsif (post = args[:post]) && post.is_a?(Domain::E621::Post)
post =
Domain::Post::E621Post.find_by(e621_id: post.e621_id) ||
fatal_error(
format_tags(
"post with not found",
make_tag("e621_id", post.e621_id),
),
)
T.must(post.file)
else
fatal_error(":file or :post is required")
end

View File

@@ -36,8 +36,6 @@ class Domain::Fa::Job::Base < Scraper::JobBase
post = args[:post]
if post.is_a?(Domain::Post::FaPost)
return post
elsif post.is_a?(Domain::Fa::Post)
return Domain::Post::FaPost.find_by!(fa_id: post.fa_id)
elsif fa_id = args[:fa_id]
if build_post
Domain::Post::FaPost.find_or_initialize_by(fa_id: fa_id)
@@ -58,12 +56,9 @@ class Domain::Fa::Job::Base < Scraper::JobBase
return avatar
elsif user.is_a?(Domain::User::FaUser)
return T.must(user.avatar)
elsif user.is_a?(Domain::Fa::User)
user = Domain::User::FaUser.find_by(url_name: user.url_name)
return T.must(user&.avatar)
else
fatal_error(
"arg 'avatar' must be a Domain::UserAvatar or user must be a Domain::Fa::User",
"arg 'avatar' must be a Domain::UserAvatar or user must be a Domain::User::FaUser",
)
end
end
@@ -74,8 +69,6 @@ class Domain::Fa::Job::Base < Scraper::JobBase
user = args[:user]
if user.is_a?(Domain::User::FaUser)
user
elsif user.is_a?(Domain::Fa::User)
Domain::User::FaUser.find_by!(url_name: user.url_name)
elsif url_name = args[:url_name]
if create_if_missing
user =
@@ -97,7 +90,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
end
else
fatal_error(
"arg 'user' must be a Domain::User::FaUser or Domain::Fa::User, or url_name must be provided",
"arg 'user' must be a Domain::User::FaUser, or url_name must be provided",
)
end
end

View File

@@ -19,13 +19,11 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
return
end
post =
if post.is_a?(Domain::Fa::Post)
Domain::Post::FaPost.find_by!(fa_id: post.fa_id)
elsif post.is_a?(Domain::Post::FaPost)
if post.is_a?(Domain::Post::FaPost)
post
else
fatal_error(
"invalid post model: #{post.class}, expected Domain::Fa::Post or Domain::Post::FaPost",
"invalid post model: #{post.class}, expected Domain::Post::FaPost",
)
end
post.file

View File

@@ -126,6 +126,7 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
)
user.scanned_gallery_at = Time.current
Domain::User.reset_counters(user.id, :user_post_creations)
end
end

View File

@@ -73,6 +73,8 @@ module Domain::Inkbunny::Job
{ ib_post_ids: processor.changed_posts.map(&:ib_id) },
)
end
Domain::User.reset_counters(user.id, :user_post_creations)
ensure
logger.pop_tags
end

View File

@@ -1,4 +1,5 @@
# typed: false
# TODO - add specs for sqlite exporter
class Domain::Fa::SqliteExporter
include HasMeasureDuration
include HasColorLogger

View File

@@ -1,70 +0,0 @@
# typed: false
class Domain::Fa::UserFactorCalculator
extend T::Sig
include HasColorLogger
include HasMeasureDuration
def initialize(epochs = 20)
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
end
def fit
dataset =
measure(
proc do |r|
r && "loaded #{r.length.to_s.bold} follows" || "loading follows"
end,
) do
Domain::Fa::Follow
.all
.pluck(:follower_id, :followed_id)
.map { |id1, id2| { user_id: id1, item_id: id2 } }
end
measure("fit #{dataset.length.to_s.bold} follows") do
@recommender.fit(dataset)
end
end
def write_factors
measure("#{"for_followed".bold} - done") do
write_factors_col(:item_ids, :item_factors, :for_followed)
end
measure("#{"for_follower".bold} - done") do
write_factors_col(:user_ids, :user_factors, :for_follower)
end
end
def write_factors_col(id_list_name, getter_name, factors_col_name)
total = 0
id_list = @recommender.send(id_list_name)
native_col_width =
Domain::Fa::UserFactor.columns_hash[
factors_col_name.to_s
].sql_type_metadata.limit
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
id_list
.map do |user_id|
factors = @recommender.send(getter_name, user_id)
padding = [0.0] * (native_col_width - factors.length)
{ :user_id => user_id, factors_col_name => padding + factors.to_a }
end
.each_slice(20_000) do |chunk|
total += chunk.size
measure(
" -> write #{chunk.size.to_s.bold} factors - (#{total.to_s.bold} total)",
) do
Domain::Fa::UserFactor.upsert_all(
chunk,
unique_by: :user_id,
update_only: factors_col_name,
returning: :id,
)
end
end
end
end

File diff suppressed because it is too large Load Diff

View File

@@ -1,56 +0,0 @@
# typed: strict
module HasIndexedPost
extend ActiveSupport::Concern
extend T::Helpers
extend T::Sig
abstract!
requires_ancestor { ReduxApplicationRecord }
# Abstract methods that must be implemented by including class
sig { abstract.returns(T.nilable(IndexedPost)) }
def indexed_post
end
sig { abstract.params(post: T.nilable(IndexedPost)).void }
def indexed_post=(post)
end
sig { abstract.returns(T.nilable(ActiveSupport::TimeWithZone)) }
def created_at
end
sig { abstract.returns(T.nilable(ActiveSupport::TimeWithZone)) }
def posted_at
end
sig { abstract.returns(T::Boolean) }
def posted_at_changed?
end
included do
T.bind(self, T.class_of(ReduxApplicationRecord))
has_one :indexed_post,
as: :postable,
dependent: :destroy,
validate: false,
autosave: true
before_create :ensure_indexed_post!
sig { returns(IndexedPost) }
def ensure_indexed_post!
self.indexed_post ||=
IndexedPost.new(created_at: self.created_at, postable: self)
end
before_save :ensure_indexed_post_posted_at!
sig { returns(T.nilable(IndexedPost)) }
def ensure_indexed_post_posted_at!
if self.posted_at_changed?
self.ensure_indexed_post!
T.must(self.indexed_post).tap { |ip| ip.posted_at = self.posted_at }
end
end
end
end

View File

@@ -1,6 +0,0 @@
# typed: strict
class Domain::E621::Fav < ReduxApplicationRecord
self.table_name = "domain_e621_favs"
belongs_to :user, class_name: "Domain::E621::User", inverse_of: :favs
belongs_to :post, class_name: "Domain::E621::Post", inverse_of: :favs
end

View File

@@ -1,97 +0,0 @@
# typed: strict
class Domain::E621::Post < ReduxApplicationRecord
include AttrJsonRecordAliases
include HasIndexedPost
include Discard::Model
class FileError
include AttrJson::Model
attr_json :retry_count, :integer
attr_json :status_code, :integer
attr_json :log_entry_id, :integer
end
self.table_name = "domain_e621_posts"
self.discard_column = :deleted_at
default_scope -> { kept }
# see state_detail for scan_error/file_error
enum :state, %i[ok scan_error file_error]
enum :rating, %i[s q e]
validates_presence_of(:e621_id, :state)
after_initialize do
self.state = :ok unless self.state.present?
# self.state_detail ||= {}
self.flags_array ||= []
self.pools_array ||= []
self.sources_array ||= []
self.artists_array ||= []
self.tags_array ||= {}
end
has_many :taggings, class_name: "Domain::E621::Tagging", inverse_of: :post
has_many :tags, class_name: "Domain::E621::Tag", through: :taggings
# If the file was scraped, this is the blob entry that represents it
belongs_to :file,
class_name: "::HttpLogEntry",
optional: :true,
autosave: true
belongs_to :parent_e621,
class_name: "Domain::E621::Post",
foreign_key: :e621_id,
optional: true
has_many :favs,
class_name: "Domain::E621::Fav",
inverse_of: :post,
dependent: :destroy
has_many :faving_users,
class_name: "Domain::E621::User",
through: :favs,
source: :user
# When was the post's /posts/<post_id>/favorites pages scanned?
# Used to identify users with a significant number of favorites, setting
# their `num_other_favs_cached` attribute
attr_json :scanned_post_favs_at, :datetime
attr_json :tags_array, ActiveModel::Type::Value.new
attr_json :flags_array, :string, array: true
attr_json :pools_array, :string, array: true
attr_json :sources_array, :string, array: true
attr_json :artists_array, :string, array: true
attr_json :e621_updated_at, :datetime
attr_json :last_index_page_id, :integer
attr_json :caused_by_entry_id, :integer
attr_json :scan_log_entry_id, :integer
attr_json :index_page_ids, :integer, array: true
attr_json :prev_md5s, :string, array: true
attr_json :scan_error, :string
attr_json :file_error, FileError.to_type
belongs_to :index_page_http_log_entry,
class_name: "HttpLogEntry",
optional: true,
foreign_key: :last_index_page_id
sig { returns(String) }
def to_param
self.e621_id.to_s
end
sig { returns(T.nilable(T::Hash[String, T.untyped])) }
def tags_array
ta = super
return nil if ta.nil?
ta.is_a?(Hash) ? ta : { "general" => ta }
end
sig { returns(T.nilable(Addressable::URI)) }
def file_uri
Addressable::URI.parse(self.file_url_str) if self.file_url_str.present?
end
end

View File

@@ -1,5 +0,0 @@
# typed: strict
class Domain::E621::Tag < ReduxApplicationRecord
self.table_name = "domain_e621_tags"
has_many :taggings, class_name: "Domain::E621::Tagging", inverse_of: :tag
end

View File

@@ -1,14 +0,0 @@
# typed: strict
class Domain::E621::Tagging < ReduxApplicationRecord
self.table_name = "domain_e621_taggings"
self.primary_key = %i[post_id tag_id]
belongs_to :post, class_name: "Domain::E621::Post", inverse_of: :taggings
belongs_to :tag, class_name: "Domain::E621::Tag", inverse_of: :taggings
validates_presence_of :post, :tag
enum :category,
%i[general artist copyright character species invalid meta lore],
prefix: true
validates_inclusion_of(:category, in: self.categories.keys)
end

View File

@@ -1,31 +0,0 @@
# typed: strict
class Domain::E621::User < ReduxApplicationRecord
self.table_name = "domain_e621_users"
include AttrJsonRecordAliases
include AttrJson::Record::QueryScopes
validates_inclusion_of :scanned_favs_status,
in: %w[ok error],
if: :scanned_favs_status?
has_many :favs, class_name: "Domain::E621::Fav", inverse_of: :user
has_many :faved_posts,
class_name: "Domain::E621::Post",
through: :favs,
source: :post
attr_json :favs_are_hidden, :boolean
# number of favorites that the user has, derived from scraped html
# on /posts/<post_id>/favorites?page=<n>
# Used to find users with a significant number of favorites
attr_json :num_other_favs_cached, :integer
attr_json :scanned_favs_status, :string
attr_json :scanned_favs_at, :datetime
sig { returns(T.nilable(::String)) }
def url_name
if name = self.name
name.gsub(" ", "_")
end
end
end

View File

@@ -1,10 +0,0 @@
# typed: strict
class Domain::Fa::Fav < ReduxApplicationRecord
self.table_name = "domain_fa_favs"
self.primary_key = %i[user_id post_id]
scope :active, -> { where(removed: false) }
belongs_to :user, class_name: "::Domain::Fa::User"
belongs_to :post, class_name: "::Domain::Fa::Post"
end

View File

@@ -1,8 +0,0 @@
# typed: strict
class Domain::Fa::Follow < ReduxApplicationRecord
self.table_name = "domain_fa_follows"
self.primary_key = %i[follower_id followed_id]
belongs_to :follower, class_name: "::Domain::Fa::User"
belongs_to :followed, class_name: "::Domain::Fa::User"
end

View File

@@ -1,233 +0,0 @@
# typed: strict
class Domain::Fa::Post < ReduxApplicationRecord
self.table_name = "domain_fa_posts"
include HasIndexedPost
include Pundit::Authorization
include AttrJsonRecordAliases
enum :state,
[
:ok, # so far so good, post may not yet be scanned or have file downloaded
:removed, # post has been removed
:scan_error, # error scanning post page (see state_detail)
:file_error, # error downloading post file (see state_detail)
]
validates_inclusion_of(:state, in: self.states.keys)
validates_presence_of(:fa_id, :state)
after_initialize { self.state ||= :ok }
belongs_to :creator,
class_name: "::Domain::Fa::User",
inverse_of: :posts,
optional: true,
autosave: true
# If the file was scraped, this is the blob entry that represents it
belongs_to :file, class_name: "::HttpLogEntry", optional: true, autosave: true
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :post
has_many :faved_by,
class_name: "::Domain::Fa::User",
through: :fav_post_joins,
source: :user
has_one :disco,
class_name: "::Domain::Fa::PostFactor",
inverse_of: :post,
foreign_key: :post_id,
dependent: :destroy
attr_json :title, :string
attr_json :category, :string
attr_json :theme, :string
attr_json :species, :string
attr_json :gender, :string
attr_json :description, :string
attr_json :keywords, :string, array: true, default: []
attr_json :num_favorites, :integer
attr_json :num_comments, :integer
attr_json :num_views, :integer
attr_json :posted_at, :datetime
attr_json :scanned_at, :datetime
attr_json :last_user_page_id, :integer
attr_json :last_submission_page_id, :integer
attr_json :first_browse_page_id, :integer
attr_json :first_gallery_page_id, :integer
attr_json :first_seen_entry_id, :integer
attr_json :scan_file_error, :string
belongs_to :last_user_page, class_name: "HttpLogEntry", optional: true
belongs_to :last_submission_page, class_name: "HttpLogEntry", optional: true
belongs_to :first_browse_page, class_name: "HttpLogEntry", optional: true
belongs_to :first_gallery_page, class_name: "HttpLogEntry", optional: true
belongs_to :first_seen_entry, class_name: "HttpLogEntry", optional: true
sig { params(id: Integer).returns(T.nilable(Domain::Fa::Post)) }
def self.find_by_fa_id(id)
where(fa_id: id).first
end
sig { params(id: Integer).returns(Domain::Fa::Post) }
def self.find_by_fa_id!(id)
where(fa_id: id).first!
end
sig { returns(String) }
def to_param
self.fa_id.to_s
end
sig { returns(T.nilable(Addressable::URI)) }
def file_uri
Addressable::URI.parse(self.file_url_str) if self.file_url_str
end
sig { params(uri: T.nilable(T.any(String, Addressable::URI))).void }
def file_uri=(uri)
if uri
uri = Addressable::URI.parse(uri)
uri.scheme = "https" if uri.scheme.blank?
self.file_url_str = uri.to_s
else
self.file_url_str = nil
end
end
sig { void }
def fix_file_by_uri!
parts = []
parts << "[fa_id: #{self.fa_id}]"
file_uri = self.file&.uri
file_url_str = self.file_url_str
if file_url_str.present? && file_uri.present? &&
(file_uri.to_s != file_url_str)
old_file = self.file
new_file = HttpLogEntry.find_by_uri_host_path(file_url_str)
if old_file == new_file
parts << "[no change][invariant]"
elsif new_file
self.file = new_file
parts << "[old: #{old_file&.id} / #{old_file&.uri.to_s}]"
parts << "[new: #{self.file&.id} / #{self.file&.uri.to_s}]"
self.save!
else
parts << "[new_file_not_found: #{file_url_str}]"
end
else
parts << "[no change]"
parts << "[file: #{self.file&.id} / #{self.file&.uri.to_s}]"
parts << "[file_url_str: #{self.file_url_str}]"
end
logger.info parts.join(" ")
end
sig { returns(T::Boolean) }
def scanned?
self.file_url_str.present?
end
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
def scanned_at
# # at some point, `scanned_at` was populated to avoid having to look up the
# # post's `last_submission_page` log entry, but we fall back to that
# # if scanned_at isn't populated yet
# if state_detail["scanned_at"]
# Time.at(state_detail["scanned_at"])
# else
# last_submission_page&.created_at
# end
super || last_submission_page&.created_at
end
# sig { params(time: T.nilable(Time)).void }
# def scanned_at=(time)
# self.state_detail["scanned_at"] = time&.to_i
# end
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
def guess_posted_at
pa = posted_at
return pa if pa
begin
contents = guess_last_submission_page&.response_bytes
if contents
parser = Domain::Fa::Parser::Page.new(contents)
parser.submission.posted_date if parser.probably_submission?
end
end
end
# sig { params(log_entry: T.nilable(HttpLogEntry)).void }
# def last_submission_page=(log_entry)
# self.last_submission_page_id = log_entry&.id
# end
# sig { returns(T.nilable(HttpLogEntry)) }
# def last_submission_page
# HttpLogEntry.find_by(id: self.last_submission_page_id)
# end
sig { returns(T.nilable(HttpLogEntry)) }
def guess_last_submission_page
last_submission_page ||
begin
HttpLogEntry
.where(
uri_host: "www.furaffinity.net",
uri_path: ["/view/#{self.fa_id}/", "/view/#{self.fa_id}"],
uri_query: nil,
status_code: 200,
)
.order(created_at: :desc)
.first
end
end
sig { returns(T.nilable(String)) }
def description
content = super
return nil if content.nil? || content.blank?
# this is a hack to remove the first two lines of the description, which are
# always empty and a <br><br>
lines = content.lines.map(&:strip).map(&:chomp)
if lines.length > 3
if lines[0] == "" && lines[1]&.start_with?("<a href=") &&
lines[2] == "<br><br>"
return (lines[3..] || []).join("\n")
end
end
content
end
sig { returns(T::Boolean) }
def have_file?
self.file_id.present?
end
sig do
params(
submission: T.untyped,
first_seen_log_entry: T.nilable(HttpLogEntry),
).returns(Domain::Fa::Post)
end
def self.find_or_initialize_by_submission_parser(
submission,
first_seen_log_entry: nil
)
creator =
Domain::Fa::User.find_or_create_by!(
{ url_name: submission.artist_url_name },
) { |user| user.name = submission.artist }
Domain::Fa::Post.find_or_initialize_by(fa_id: submission.id) do |post|
post.creator = creator
post.title = submission.title
post.first_seen_entry = first_seen_log_entry
end
end
end

View File

@@ -1,10 +0,0 @@
# typed: strict
class Domain::Fa::PostFactor < ReduxApplicationRecord
self.table_name = "domain_fa_post_factors"
self.primary_key = "post_id"
belongs_to :post, class_name: "::Domain::Fa::Post"
FACTORS_WIDTHS = 8
has_neighbors :for_favorite
end

View File

@@ -1,283 +0,0 @@
# typed: false
class Domain::Fa::User < ReduxApplicationRecord
self.table_name = "domain_fa_users"
include Pundit::Authorization
has_many :posts,
class_name: "::Domain::Fa::Post",
inverse_of: :creator,
foreign_key: :creator_id
has_one :disco,
class_name: "::Domain::Fa::UserFactor",
inverse_of: :user,
foreign_key: :user_id,
dependent: :destroy
has_one :avatar,
class_name: "::Domain::Fa::UserAvatar",
inverse_of: :user,
dependent: :destroy
enum :state,
[
:ok, # so far so good, user may not yet be scanned
:scan_error, # user has been removed or otherwise, see state_detail
]
# Who this user follows (join table)
has_many :follower_joins,
class_name: "::Domain::Fa::Follow",
foreign_key: :follower_id,
inverse_of: :follower,
dependent: :destroy
# Who this user follows (User model)
has_many :follows, through: :follower_joins, source: :followed
# Who follows this user (join table)
has_many :followed_joins,
class_name: "::Domain::Fa::Follow",
foreign_key: :followed_id,
inverse_of: :followed,
dependent: :destroy
# Who follows this user (User model)
has_many :followed_by, through: :followed_joins, source: :follower
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :user
has_many :active_fav_post_joins,
-> { where(removed: false) },
class_name: "::Domain::Fa::Fav",
inverse_of: :user
has_many :fav_posts,
class_name: "::Domain::Fa::Post",
through: :fav_post_joins,
source: :post
has_many :active_fav_posts,
class_name: "::Domain::Fa::Post",
through: :active_fav_post_joins,
source: :post
# FA `name` can be up to 30 chars long,
# `url_name` can be longer.
validates_presence_of(:name, :url_name)
validate do
if name && url_name
expected = self.class.name_to_url_name(name)
matches =
if name.length >= 30
url_name.starts_with?(expected)
else
url_name == expected
end
unless matches
errors.add(
:name,
"name '#{name}' does not match url_name, expected #{expected} but was #{url_name}",
)
end
end
if url_name && url_name =~ /[A-Z]/
errors.add(:url_name, "url_name '#{url_name}' contains uppercase")
end
if url_name && url_name =~ /\s/
errors.add(:url_name, "url_name '#{url_name}' contains whitespace")
end
end
after_initialize do
self.state ||= :ok
self.state_detail ||= {}
self.log_entry_detail ||= {}
end
before_destroy { throw :abort if posts.any? }
SCAN_TYPES = {
page: 1.month,
gallery: 1.year,
follows: 1.month,
favs: 1.month,
incremental: 1.month,
}
SCAN_FIELD_TYPES = {
page: :column,
gallery: :column,
follows: :column,
favs: :column,
incremental: :state_detail,
}
SCAN_TYPES.keys.each do |scan_type|
define_method(:"due_for_#{scan_type}_scan?") { scan_due?(scan_type) }
define_method(:"time_ago_for_#{scan_type}_scan") do
scanned_ago_in_words(scan_type)
end
next unless SCAN_FIELD_TYPES[scan_type] == :state_detail
define_method(:"scanned_#{scan_type}_at") do
get_scanned_at_value(scan_type)
end
define_method(:"scanned_#{scan_type}_at=") do |value|
set_scanned_at_value(scan_type, value)
end
end
DATE_HELPER = Class.new.extend(ActionView::Helpers::DateHelper)
def scanned_ago_in_words(scan_type)
if (timestamp = get_scanned_at_value(scan_type))
DATE_HELPER.time_ago_in_words(timestamp) + " ago"
else
"never"
end
end
def scan_due?(scan_type)
duration =
SCAN_TYPES[scan_type] || raise("invalid scan type '#{scan_type}'")
timestamp = get_scanned_at_value(scan_type)
timestamp.nil? || timestamp <= duration.ago
end
def take_posts_from(other_user)
return if other_user == self
other_posts = other_user.posts
other_posts.update_all(creator_id: id)
other_user.posts.reload
posts.reload
end
sig { returns(Domain::Fa::UserAvatar) }
def ensure_avatar!
self.class.transaction { avatar || create_avatar! }
end
def self.find_or_build_from_submission_parser(submission_parser)
unless submission_parser.is_a?(
Domain::Fa::Parser::ListedSubmissionParserHelper,
) ||
submission_parser.is_a?(Domain::Fa::Parser::SubmissionParserHelper)
raise ArgumentError
end
find_or_initialize_by(url_name: submission_parser.artist_url_name) do |user|
user.name = submission_parser.artist
end
end
URL_NAME_EXCEPTIONS = { "Kammiu" => "rammiu" }
def self.name_to_url_name(name)
name = name.strip
URL_NAME_EXCEPTIONS[name] || name.delete("_").gsub(/\s/, "").downcase
end
# TODO: - write method for getting suggested users to follow
# based on this user
# something like:
# UserFactor.nearest_neighbors(
# :for_followed,
# self.disco.for_follows,
# # should this be euclidean? idk, need to test
# distance: "inner_product"
# )
# exclude self.follows.pluck(:followed_id)
# find users similar to 'self' based on who 'self' follows
def similar_users_by_follower(exclude_followed_by: nil)
similar_users_by(:for_follower, exclude_followed_by)
end
# find users similar to 'self one based on who follows 'self'
def similar_users_by_followed(exclude_followed_by: nil)
similar_users_by(:for_followed, exclude_followed_by)
end
def guess_user_page_log_entry
for_path =
proc do |uri_path|
HttpLogEntry
.where(
uri_scheme: "https",
uri_host: "www.furaffinity.net",
uri_path: uri_path,
)
.order(created_at: :desc)
.first
end
for_hle_id = proc { |hle_id| hle_id && HttpLogEntry.find_by(id: hle_id) }
# older versions don't end in a trailing slash
hle_id = self.log_entry_detail && self.log_entry_detail["last_user_page_id"]
# first try the last scanned user page (present on most fa user models)
for_hle_id.call(hle_id) ||
# if that's missing, see if there's an existing request logged to the user page
for_path.call("/user/#{url_name}/") ||
# and try the non-trailing-slash version as well
for_path.call("/user/#{url_name}")
# TODO: - maybe can look for posts as well, those might list an avatar
end
def to_param
url_name
end
private
def similar_users_by(factor_col, exclude_followed_by)
query = disco.nearest_neighbors(factor_col, distance: "euclidean")
query =
query.where.not(
user_id: exclude_followed_by.follows.select(:followed_id),
) if exclude_followed_by
users_from_disco_query(query)
end
def users_from_disco_query(disco_query)
Domain::Fa::User
.select("domain_fa_users.*", disco_query.select_values.last)
.joins(:disco)
.merge(disco_query.reselect(:user_id))
end
def get_scanned_at_value(scan_type)
case SCAN_FIELD_TYPES[scan_type]
when :column
send(:"scanned_#{scan_type}_at")
when :state_detail
str = state_detail["scanned_#{scan_type}_at"]
Time.zone.parse(str) if str
else
raise("invalid scan type '#{scan_type}'")
end
end
def set_scanned_at_value(scan_type, value)
case SCAN_FIELD_TYPES[scan_type]
when :column
send(:"scanned_#{scan_type}_at=", value)
when :state_detail
state_detail["scanned_#{scan_type}_at"] = value.iso8601
else
raise("invalid scan type '#{scan_type}'")
end
end
end

View File

@@ -1,78 +0,0 @@
# typed: false
class Domain::Fa::UserAvatar < ReduxApplicationRecord
self.table_name = "domain_fa_user_avatars"
enum :state,
[
:ok, # got the file, no problem
:download_error, # other error processing the file
:no_file_on_guessed_user_page_error,
:file_not_found, # 404 from server
]
after_initialize do
self.state ||= :ok
self.state_detail ||= {}
end
belongs_to :user, class_name: "::Domain::Fa::User"
belongs_to :file,
foreign_key: :file_sha256,
class_name: "::BlobFile",
optional: true
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
def file
@file_model ||= BlobFile.migrate_sha256!(file_sha256) if file_sha256
end
def file_uri
Addressable::URI.parse(file_url_str) unless file_url_str.blank?
end
def file_uri=(uri)
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
uri.scheme = "https" if uri.scheme.blank?
self.file_url_str = uri.to_s
end
def guess_user_page_log_entry
user.guess_user_page_log_entry
end
def guess_file_uri_from_hles_with_info
hle = guess_user_page_log_entry
if hle
page =
Domain::Fa::Parser::Page.new(
hle.response.contents,
require_logged_in: false,
)
if page.probably_user_page? && (url = page.user_page.profile_thumb_url)
return :user_page, url
end
end
posts =
user
.posts
.where(state: [:ok, nil])
.where("file_url_str IS NOT NULL")
.order(created_at: :desc)
.limit(3)
for post in posts
if (hle = post.guess_last_submission_page)
page = Domain::Fa::Parser::Page.new(hle.response.contents)
next unless page.probably_submission?
url = page.submission.artist_avatar_url
return :post_page, url, post.fa_id if url
end
end
[:not_found, nil]
end
def guess_file_uri_from_hles
guess_file_uri_from_hles_with_info[1]
end
end

View File

@@ -1,10 +0,0 @@
# typed: strict
class Domain::Fa::UserFactor < ReduxApplicationRecord
self.table_name = "domain_fa_user_factors"
belongs_to :user, class_name: "::Domain::Fa::User"
FACTORS_WIDTHS = 16
has_neighbors :for_follower
has_neighbors :for_followed
end

View File

@@ -1,4 +0,0 @@
# typed: strict
class Domain::Inkbunny::Fav < ReduxApplicationRecord
self.table_name = "domain_inkbunny_favs"
end

View File

@@ -1,28 +0,0 @@
# typed: strict
class Domain::Inkbunny::File < ReduxApplicationRecord
self.table_name = "domain_inkbunny_files"
belongs_to :post, class_name: "::Domain::Inkbunny::Post", inverse_of: :files
belongs_to :blob_entry,
class_name: "::BlobEntry",
foreign_key: :blob_entry_sha256,
optional: true
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
enum :state, %i[ok error]
after_initialize do
self.state = :ok unless self.state.present?
self.state_detail ||= {}
end
validates_presence_of(
%i[ib_file_id file_name url_str ib_created_at file_order],
)
validates_presence_of(
%i[md5_initial md5s],
unless: -> { T.bind(self, Domain::Inkbunny::File).state == "error" },
)
end

View File

@@ -1,4 +0,0 @@
# typed: strict
class Domain::Inkbunny::Follow < ReduxApplicationRecord
self.table_name = "domain_inkbunny_follows"
end

View File

@@ -1,12 +0,0 @@
# typed: strict
class Domain::Inkbunny::Pool < ReduxApplicationRecord
self.table_name = "domain_inkbunny_pools"
has_many :pool_joins, class_name: "::Domain::Inkbunny::PoolJoin"
has_many :posts, through: :pool_joins, source: :post
belongs_to :deep_update_log_entry,
class_name: "::HttpLogEntry",
optional: true
validates :ib_pool_id, presence: true, uniqueness: true
end

View File

@@ -1,17 +0,0 @@
# typed: strict
class Domain::Inkbunny::PoolJoin < ReduxApplicationRecord
self.table_name = "domain_inkbunny_pool_joins"
belongs_to :pool
belongs_to :post
belongs_to :left_post,
class_name: "Domain::Inkbunny::Post",
foreign_key: :left_post_id,
optional: true
belongs_to :right_post,
class_name: "Domain::Inkbunny::Post",
foreign_key: :right_post_id,
optional: true
end

View File

@@ -1,54 +0,0 @@
# typed: strict
class Domain::Inkbunny::Post < ReduxApplicationRecord
self.table_name = "domain_inkbunny_posts"
include HasIndexedPost
belongs_to :creator,
class_name: "::Domain::Inkbunny::User",
inverse_of: :posts
belongs_to :deep_update_log_entry,
class_name: "::HttpLogEntry",
optional: true
belongs_to :shallow_update_log_entry,
class_name: "::HttpLogEntry",
optional: true
has_many :files, class_name: "::Domain::Inkbunny::File", inverse_of: :post
enum :state, %i[ok error]
enum :rating, %i[general mature adult]
enum :submission_type,
%i[
unknown
picture_pinup
sketch
picture_series
comic
portfolio
flash_animation
flash_interactive
video_feature
video_animation
music_single
music_album
writing_document
character_sheet
photography
]
has_many :pool_joins, class_name: "::Domain::Inkbunny::PoolJoin"
has_many :pools, through: :pool_joins, source: :pool
after_initialize do
self.state = :ok unless self.state.present?
self.state_detail ||= {}
self.ib_detail_raw ||= {}
end
sig { returns(String) }
def to_param
ib_post_id.to_s
end
end

View File

@@ -1,4 +0,0 @@
# typed: strict
class Domain::Inkbunny::Tag < ReduxApplicationRecord
self.table_name = "domain_inkbunny_tags"
end

View File

@@ -1,4 +0,0 @@
# typed: strict
class Domain::Inkbunny::Tagging < ReduxApplicationRecord
self.table_name = "domain_inkbunny_taggings"
end

View File

@@ -1,49 +0,0 @@
# typed: strict
class Domain::Inkbunny::User < ReduxApplicationRecord
self.table_name = "domain_inkbunny_users"
has_many :posts,
class_name: "::Domain::Inkbunny::Post",
inverse_of: :creator,
foreign_key: :creator_id
belongs_to :avatar,
class_name: "::BlobEntry",
foreign_key: :avatar_file_sha256,
primary_key: :sha256,
optional: true
belongs_to :avatar_log_entry,
class_name: "::HttpLogEntry",
foreign_key: :avatar_file_log_entry_id,
optional: true
belongs_to :deep_update_log_entry,
class_name: "::HttpLogEntry",
optional: true
belongs_to :shallow_update_log_entry,
class_name: "::HttpLogEntry",
optional: true
validates_presence_of :ib_user_id, :name
enum :state, %i[ok error]
enum :avatar_state, %i[ok not_found error], prefix: :avatar
after_initialize do
self.state = :ok unless self.state.present?
self.state_detail ||= {}
self.avatar_state_detail ||= {}
end
sig { returns(T::Boolean) }
def due_for_gallery_scan?
s = scanned_gallery_at
s.blank? || s < 1.month.ago
end
sig { returns(T.nilable(String)) }
def to_param
name
end
end

View File

@@ -8,11 +8,6 @@ class Domain::User::InkbunnyUser < Domain::User
attr_json :shallow_update_log_entry_id, :integer
attr_json :ib_detail_raw, ActiveModel::Type::Value.new
has_many :posts,
class_name: "::Domain::Inkbunny::Post",
through: :user_post_creations,
source: :post
belongs_to :deep_update_log_entry,
class_name: "::HttpLogEntry",
optional: true

View File

@@ -1,157 +0,0 @@
# typed: strict
class IndexedPost < ReduxApplicationRecord
extend T::Sig
validates_presence_of :postable_id, :postable_type
before_validation do
if self.attributes["posted_at"].nil?
self.attributes["posted_at"] = postable&.posted_at
self.posted_at = postable&.posted_at
end
end
# fake a polymorphic association because tapioca doesn't support it
# belongs_to :postable, polymorphic: true, inverse_of: :indexed_post
# has_one :file, through: :postable
sig do
params(
postable:
T.any(Domain::Fa::Post, Domain::E621::Post, Domain::Inkbunny::Post),
).returns(
T.any(Domain::Fa::Post, Domain::E621::Post, Domain::Inkbunny::Post),
)
end
def postable=(postable)
self.postable_id = postable.id
self.postable_type = postable.class.name
T.must(
@postable =
T.let(
postable,
T.nilable(
T.any(Domain::Fa::Post, Domain::E621::Post, Domain::Inkbunny::Post),
),
),
)
end
sig do
returns(
T.nilable(
T.any(Domain::Fa::Post, Domain::E621::Post, Domain::Inkbunny::Post),
),
)
end
def postable
@postable ||=
(id = postable_id) &&
case postable_type
when "Domain::Fa::Post"
Domain::Fa::Post.find(id)
when "Domain::E621::Post"
Domain::E621::Post.find(id)
when "Domain::Inkbunny::Post"
Domain::Inkbunny::Post.find(id)
end
end
sig { returns(T.nilable(String)) }
def artist_name
case post = postable
when Domain::Fa::Post
post.creator&.name
when Domain::E621::Post
array = post.tags_array
return nil unless array
array.is_a?(Hash) ? array["artist"].first : nil
when Domain::Inkbunny::Post
post.creator&.name
end
end
sig { returns(T.nilable(String)) }
def artist_path
case post = postable
when Domain::Fa::Post
if post.creator
Rails.application.routes.url_helpers.domain_fa_user_path(post.creator)
end
when Domain::E621::Post
nil
when Domain::Inkbunny::Post
if post.creator
Rails.application.routes.url_helpers.domain_inkbunny_user_path(
post.creator,
)
end
end
end
sig { returns(T.nilable(String)) }
def title
case post = postable
when Domain::Fa::Post
post.title || "FA ##{post.fa_id}"
when Domain::E621::Post
"E621 ##{post.e621_id}"
when Domain::Inkbunny::Post
post.title || "IB ##{post.ib_post_id}"
end
end
sig { returns(T.nilable(ActiveSupport::TimeWithZone)) }
def posted_at
super || postable&.posted_at
end
sig { returns(T.nilable(String)) }
def file_sha256
case post = postable
when Domain::Fa::Post, Domain::E621::Post
post.file&.response_sha256
when Domain::Inkbunny::Post
post.files.first&.blob_entry_sha256
end
end
sig { returns(T.nilable(String)) }
def file_blob_entry
case post = postable
when Domain::Fa::Post, Domain::E621::Post
post.file&.response&.to_s
when Domain::Inkbunny::Post
post.files.first&.blob_entry&.to_s
end
end
sig { returns(T.nilable(String)) }
def external_link_title
case post = postable
when Domain::Fa::Post
fa_id = post.fa_id
"FA ##{fa_id}" if fa_id.present?
when Domain::E621::Post
e621_id = post.e621_id
"E621 ##{e621_id}" if e621_id.present?
when Domain::Inkbunny::Post
ib_post_id = post.ib_post_id
"IB ##{ib_post_id}" if ib_post_id.present?
end
end
sig { returns(T.nilable(String)) }
def external_link_url
case post = postable
when Domain::Fa::Post
fa_id = post.fa_id
"https://www.furaffinity.net/view/#{fa_id}" if fa_id.present?
when Domain::E621::Post
e621_id = post.e621_id
"https://e621.net/posts/#{e621_id}" if e621_id.present?
when Domain::Inkbunny::Post
ib_post_id = post.ib_post_id
"https://inkbunny.net/s/#{ib_post_id}" if ib_post_id.present?
end
end
end

View File

@@ -1,20 +0,0 @@
# typed: true
class Domain::Fa::PostPolicy < ApplicationPolicy
def show?
true # Anyone can view the basic post info
end
def view_file?
is_role_admin?
end
def view_scraper_metadata?
is_role_admin?
end
class Scope < ApplicationPolicy::Scope
def resolve
scope.all # All users can see posts exist in lists
end
end
end

View File

@@ -1,29 +0,0 @@
# typed: true
class Domain::Fa::UserPolicy < ApplicationPolicy
def index?
true # Anyone can view the index
end
def show?
true # Anyone can view basic user info
end
# Only admins and moderators can access these actions
def scan_user?
is_role_admin? || is_role_moderator?
end
def enqueue_objects?
is_role_admin? || is_role_moderator?
end
def view_scraped_at_timestamps?
is_role_admin?
end
class Scope < ApplicationPolicy::Scope
def resolve
scope.all # All users can see users exist in lists
end
end
end

View File

@@ -1,20 +0,0 @@
# typed: true
class Domain::Inkbunny::PostPolicy < ApplicationPolicy
def show?
true # Anyone can view the basic post info
end
def view_file?
is_role_admin?
end
def view_scraper_metadata?
is_role_admin?
end
class Scope < ApplicationPolicy::Scope
def resolve
scope.all # All users can see posts exist in lists
end
end
end

View File

@@ -17,16 +17,6 @@
<% case job_arg.value %>
<% when HttpLogEntry %>
<%= render "good_job/arguments/http_log_entry", log_entry: job_arg.value %>
<% when Domain::Fa::Post %>
<%= render "good_job/arguments/domain_fa_post", post: job_arg.value %>
<% when Domain::Fa::User %>
<%= render "good_job/arguments/domain_fa_user", user: job_arg.value %>
<% when Domain::Inkbunny::User %>
<%= render "good_job/arguments/domain_inkbunny_user", user: job_arg.value %>
<% when Domain::Inkbunny::File %>
<%= render "good_job/arguments/domain_inkbunny_file", file: job_arg.value %>
<% when Domain::E621::Post %>
<%= render "good_job/arguments/domain_e621_post", post: job_arg.value %>
<% when Domain::PostFile %>
<%= render "good_job/arguments/domain_post_file", post_file: job_arg.value %>
<% when Domain::Post %>

View File

@@ -1,68 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to Rails.application.routes.url_helpers.domain_e621_post_path(post),
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-cat me-1"></i>Domain::E621::Post #<%= post.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<% if post.file_url_str.present? %>
<%= link_to post.file_url_str,
class: "badge bg-secondary text-truncate-link",
target: "_blank",
rel: "noopener noreferrer nofollow" do %>
<i class="fa-solid fa-link me-1"></i><%= post.file_url_str %>
<% end %>
<% end %>
<% if post.file.present? %>
<%= link_to Rails.application.routes.url_helpers.log_entry_path(post.file),
class: "badge bg-secondary",
target: "_blank" do %>
<i class="fa-solid fa-file me-1"></i>HttpLogEntry #<%= post.file.id %>
<% end %>
<span
class="badge <%= post.file.status_code.to_i < 400 ? "bg-success" : "bg-danger" %>"
>
<i class="fa-solid fa-signal me-1"></i><%= post.file.status_code %>
</span>
<% else %>
<span class="badge bg-warning text-dark">
<i class="fa-solid fa-file me-1"></i>Not present
</span>
<% end %>
<span
class="badge <%= case post.state
when "ok"
"bg-success"
when "removed"
"bg-danger"
when "scan_error", "file_error"
"bg-warning text-dark"
end %>"
>
<i
class="<%= case post.state
when "ok"
"fa-solid fa-check"
when "removed"
"fa-solid fa-trash"
when "scan_error"
"fa-solid fa-magnifying-glass-exclamation"
when "file_error"
"fa-solid fa-file-circle-exclamation"
end %> me-1"
></i
><%= post.state %>
</span>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(post.created_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= post.created_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
</div>
</div>

View File

@@ -1,49 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to "https://e621.net/users/#{user.e621_user_id}",
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-paw me-1"></i>Domain::E621::User #<%= user.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<% if user.scanned_favs_status.present? %>
<span
class="badge <%= user.scanned_favs_status == "ok" ? "bg-success" : "bg-warning text-dark" %>"
>
<i
class="<%= if user.scanned_favs_status == "ok"
"fa-solid fa-check"
else
"fa-solid fa-exclamation-triangle"
end %> me-1"
></i>
<%= user.scanned_favs_status %>
</span>
<% end %>
<span class="badge bg-secondary">
<i class="fa-solid fa-user me-1"></i><%= user.name %>
</span>
<% if user.num_other_favs_cached.present? %>
<span class="badge bg-info text-dark">
<i class="fa-solid fa-heart me-1"></i><%= user.num_other_favs_cached %>
favs
</span>
<% end %>
<% if user.scanned_favs_at.present? %>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(user.scanned_favs_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= user.scanned_favs_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
<% end %>
<% if user.favs_are_hidden %>
<span class="badge bg-danger">
<i class="fa-solid fa-eye-slash me-1"></i>Hidden
</span>
<% end %>
</div>
</div>

View File

@@ -1,58 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to Rails.application.routes.url_helpers.domain_fa_post_path(post),
class: "badge bg-primary ",
target: "_blank" do %>
<i class="fa-solid fa-paw me-1"></i>Domain::Fa::Post #<%= post.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<span
class="badge <%= case post.state
when "ok"
"bg-success"
when "removed"
"bg-danger"
when "scan_error", "file_error"
"bg-warning text-dark"
end %>"
>
<i
class="<%= case post.state
when "ok"
"fa-solid fa-check"
when "removed"
"fa-solid fa-trash"
when "scan_error"
"fa-solid fa-magnifying-glass-exclamation"
when "file_error"
"fa-solid fa-file-circle-exclamation"
end %> me-1"
></i
><%= post.state %>
</span>
<span class="badge bg-secondary" title="<%= post.title %>">
<i class="fa-regular fa-image me-1"></i><%= post.title %>
</span>
<% if post.creator.present? %>
<%= link_to Rails.application.routes.url_helpers.domain_fa_user_path(post.creator),
class: "badge bg-light text-dark" do %>
<i class="fa-solid fa-user me-1"></i><%= post.creator.url_name %>
<% end %>
<% else %>
<span class="badge bg-light text-dark">
<i class="fa-solid fa-user me-1"></i>No Creator
</span>
<% end %>
<span
class="badge bg-light text-dark"
title="<%= post.posted_at ? time_ago_in_words(post.posted_at) : "(posted at unknown)" %> ago"
>
<i class="fa-regular fa-clock me-1"></i><%= if post.posted_at
post.posted_at.strftime("%Y-%m-%d %H:%M:%S")
else
"(posted at unknown)"
end %>
</span>
</div>
</div>

View File

@@ -1,37 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to Rails.application.routes.url_helpers.domain_fa_user_path(user),
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-paw me-1"></i>Domain::Fa::User #<%= user.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<span
class="badge <%= user.state == "ok" ? "bg-success" : "bg-warning text-dark" %>"
>
<i
class="<%= if user.state == "ok"
"fa-solid fa-check"
else
"fa-solid fa-magnifying-glass-exclamation"
end %> me-1"
></i
><%= user.state %>
</span>
<span class="badge bg-secondary">
<i class="fa-solid fa-at me-1"></i><%= user.url_name %>
</span>
<% if user.name.present? && user.name != user.url_name %>
<span class="badge bg-success">
<i class="fa-solid fa-signature me-1"></i><%= user.name %>
</span>
<% end %>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(user.created_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= user.created_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
</div>
</div>

View File

@@ -1,66 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to Rails.application.routes.url_helpers.domain_inkbunny_post_path(
file.post,
),
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-paw me-1"></i>Domain::Inkbunny::File #<%= file.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<% if file.url_str.present? %>
<%= link_to file.url_str,
class: "badge bg-secondary text-truncate-link",
target: "_blank",
rel: "noopener noreferrer nofollow" do %>
<i class="fa-solid fa-link me-1"></i><%= file.url_str %>
<% end %>
<% end %>
<% if file.log_entry.present? %>
<%= link_to Rails.application.routes.url_helpers.log_entry_path(file.log_entry),
class: "badge bg-secondary",
target: "_blank" do %>
<i class="fa-solid fa-file me-1"></i>HttpLogEntry
#<%= file.log_entry.id %>
<% end %>
<% else %>
<!-- be not present -->
<span class="badge bg-warning text-dark">
<i class="fa-solid fa-file me-1"></i>Not present
</span>
<% end %>
<span
class="badge <%= case file.state
when "ok"
"bg-success"
when "removed"
"bg-danger"
when "scan_error", "file_error"
"bg-warning text-dark"
end %>"
>
<i
class="<%= case file.state
when "ok"
"fa-solid fa-check"
when "removed"
"fa-solid fa-trash"
when "scan_error"
"fa-solid fa-magnifying-glass-exclamation"
when "file_error"
"fa-solid fa-file-circle-exclamation"
end %> me-1"
></i
><%= file.state %>
</span>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(file.created_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= file.created_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
</div>
</div>

View File

@@ -1,21 +0,0 @@
<div class="d-flex align-items-center gap-2">
<%= link_to Rails.application.routes.url_helpers.domain_inkbunny_user_path(user),
class: "badge bg-primary",
target: "_blank" do %>
<i class="fa-solid fa-rabbit me-1"></i>Domain::Inkbunny::User
#<%= user.id %>
<% end %>
<div class="d-flex align-items-center ms-auto gap-2">
<span class="badge bg-secondary">
<i class="fa-solid fa-at me-1"></i><%= user.name %>
</span>
<span
class="badge bg-light text-dark"
title="<%= time_ago_in_words(user.created_at) %> ago"
>
<i class="fa-regular fa-clock me-1"></i
><%= user.created_at.strftime("%Y-%m-%d %H:%M:%S") %>
</span>
</div>
</div>

View File

@@ -18,7 +18,6 @@ Rails.application.routes.draw do
namespace :api do
namespace :fa do
get :similar_users, to: "/domain/users#similar_users"
get :object_statuses, to: "/domain/fa/api#object_statuses"
end
end

View File

@@ -1,82 +1,4 @@
namespace :fa do
desc "fix FurAffinity user name"
task fix_names: %i[set_logger_stdout environment] do
start_at = (ENV["start_at"] || 0).to_i
Domain::Fa::User.find_each(start: start_at, batch_size: 100) do |user|
page_log_entry = user.guess_user_page_log_entry
unless page_log_entry
# Rails.logger.error("user #{user.id} / #{user.url_name} does not have a scanned user page")
next
end
parser =
Domain::Fa::Parser::Page.new(
page_log_entry.response.contents,
require_logged_in: false,
)
unless parser.probably_user_page?
# Rails.logger.error("user #{user.id} / #{user.url_name} page #{page_log_entry.id} is not a user page")
next
end
old_name = user.name
new_name = parser.user_page.name
if old_name != new_name
user.name = new_name
user.save!
puts ""
Rails.logger.info(
"updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}",
)
else
print "."
end
end
end
desc "dump FurAffinity users into json file, sorted by name"
task dump_users: %i[set_logger_stdout environment] do
out_file = ENV["out"] || raise("`out` file not specified (json)")
print "counting users... "
total = Domain::Fa::User.count
puts "#{total}"
progress = ProgressBar.create(total: total, throttle_rate: 0.2)
columns = {
name: "name of the FurAffinity user",
url_name:
"identifier used in URLs to refer to the user, e.g. https://www.furaffinity.net/user/test",
num_submissions:
"number of submissions the user has made, as per the user page",
num_comments_recieved:
"number of comments the user's posts have received, as per the user page",
num_comments_given:
"number of comments the user has made on other posts, as per the user page",
num_journals:
"number of journals the user has made, as per the user page",
num_favorites:
"number of posts the user has favorited, as per the user page",
num_pageviews:
"number of pageviews of the user's page, as per the user page",
registered_at: "when the account was registered, as per the user page",
}
model_ids = Domain::Fa::User.order(url_name: :asc).pluck(:id)
File.open(out_file, "wt") do |file|
file.write(JSON.dump(columns))
model_ids
.in_groups_of(100, false)
.each do |ids_batch|
rows =
Domain::Fa::User
.where(id: ids_batch)
.order(:url_name)
.pluck(*columns.keys)
rows.each do |row|
file << JSON.dump(columns.keys.zip(row).to_h)
file << "\n"
end
progress.progress += ids_batch.size
end
end
end
desc "enqueue waiting posts"
task enqueue_waiting_posts: %i[set_logger_stdout environment] do |t, args|
start_at = (ENV["start_at"] || 0).to_i
@@ -132,42 +54,6 @@ namespace :fa do
loop { sleep poll_duration if enqueuer.run_once == :sleep }
end
desc "calculate user follow factors for similar users"
task calc_similar_users: %i[set_logger_stdout environment] do
epochs = (ENV["epochs"] || 20).to_i
worker = Domain::Fa::UserFactorCalculator.new(epochs)
worker.fit
worker.write_factors
end
desc "dump user follows to stdout, formatted for mf-train"
task dump_user_follows: %i[set_logger_stdout environment] do
relation = Domain::Fa::Follow.all
total = relation.count
$stderr.puts "dumping #{total} follows..."
# print progress bar to stderr
progress =
ProgressBar.create(
total: total,
format: "%t: %c/%C %B %p%% %a %e",
output: $stderr,
throttle_rate: 0.2,
)
# make stdout unbuffered
$stdout.sync = false
relation.in_batches(of: 100_000) do |relation|
values = relation.pluck(:follower_id, :followed_id)
values.each do |follower_id, followed_id|
$stdout.puts "#{follower_id} #{followed_id} 1"
end
progress.progress += values.size
end
$stdout.flush
end
desc "run a single browse page job"
task browse_page_job: %i[set_logger_stdout environment] do
Domain::Fa::Job::BrowsePageJob.set(
@@ -235,36 +121,6 @@ namespace :fa do
).perform_later({ url_name: url_name, force_scan: true })
end
desc "fix up posts that have the 'click here' user"
task fix_click_here_user: %i[set_logger_stdout environment] do
click_here_user = Domain::Fa::User.find_by(url_name: "click here") || raise
click_here_user.posts.each do |post|
file_uri = post.file_uri
raise("post id #{post.id} does not have file uri") unless file_uri
raise("must be d.facdn.net") unless file_uri.host == "d.facdn.net"
path = file_uri.path
match = %r{^/art/(.+)/\d+/}.match(path)
raise("must be /art/<url_name>\\d+: #{path}") unless match
url_name = match[1]
real_user =
Domain::Fa::User.find_by(url_name: url_name) ||
raise("url name #{url_name} doesn't exist")
puts "real user: #{real_user.name} / #{real_user.url_name}"
post.creator = real_user
post.save!
Domain::Fa::Job::UserPageJob.set(priority: -10).perform_later(
{ user: real_user },
)
Domain::Fa::Job::UserGalleryJob.set(priority: -10).perform_later(
{ user: real_user },
)
Domain::Fa::Job::ScanPostJob.set(priority: -10).perform_later(
{ post: post },
)
end
end
task export_to_sqlite: %i[environment set_logger_stdout] do
profile = !!ENV["profile"]
sample = !!ENV["sample"]
@@ -284,14 +140,14 @@ namespace :fa do
exporter.end_profiling! if profile
end
desc "Backfill favs by scanning historical HTTP logs for favorites pages"
task backfill_favs: :environment do
FaBackfillFavs.new(
start_at: ENV["start_at"]&.to_i,
limit: ENV["limit"]&.to_i,
batch_size: ENV["batch_size"]&.to_i,
).run
end
# desc "Backfill favs by scanning historical HTTP logs for favorites pages"
# task backfill_favs: :environment do
# FaBackfillFavs.new(
# start_at: ENV["start_at"]&.to_i,
# limit: ENV["limit"]&.to_i,
# batch_size: ENV["batch_size"]&.to_i,
# ).run
# end
desc "Enqueue pending favs jobs"
task enqueue_pending_favs: :environment do

View File

@@ -1,63 +0,0 @@
namespace :indexed_posts do
desc "Ensure all Domain::*::Post records have an IndexedPost record"
task ensure_all_posts: %i[ensure_fa_posts ensure_e621_posts ensure_ib_posts]
desc "Ensure all Domain::Fa::Post records have an IndexedPost record"
task ensure_fa_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::Fa::Post),
)
end
desc "Ensure all Domain::E621::Post records have an IndexedPost record"
task ensure_e621_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::E621::Post),
)
end
desc "Ensure all Domain::Inkbunny::Post records have an IndexedPost record"
task ensure_ib_posts: %i[environment set_logger_stdout] do
IndexedPostsRake.ensure_indexed_post_on(
IndexedPostsRake.model_without_indexed_post(Domain::Inkbunny::Post),
)
end
end
module IndexedPostsRake
def self.model_without_indexed_post(model)
model.left_outer_joins(:indexed_post).where(indexed_posts: { id: nil })
end
def self.ensure_indexed_post_on(relation)
total = relation.count
Rails.logger.info(
"Ensuring indexed post on #{relation.name.bold} (#{total} total to process)",
)
progress =
ProgressBar.create(
total: total,
throttle_rate: 0.2,
format: "%t: %c/%C |%B| %R/s %P%% %e",
)
pool = Concurrent::FixedThreadPool.new(2)
mutex = Mutex.new
relation.find_in_batches do |batch|
pool.post do
relation.model.transaction do
batch.each do |post|
post.ensure_indexed_post!
post.save!
post.indexed_post.save!
mutex.synchronize { progress.increment }
end
end
end
end
pool.shutdown
pool.wait_for_termination
end
end

View File

@@ -17,9 +17,6 @@ class ApplicationController
include ::Turbo::FramesHelper
include ::Turbo::IncludesHelper
include ::Turbo::StreamsHelper
include ::ActionView::Helpers::DateHelper
include ::ActionView::Helpers::SanitizeHelper
include ::ActionView::Helpers::RenderingHelper
include ::ActionView::Helpers::CaptureHelper
include ::ActionView::Helpers::OutputSafetyHelper
include ::ActionView::Helpers::TagHelper
@@ -38,17 +35,15 @@ class ApplicationController
include ::Domain::DomainModelHelper
include ::Domain::PostsHelper
include ::Domain::DescriptionsHelper
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::Posts::E621PostsHelper
include ::Domain::Users::FaUsersHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper
include ::SourceHelper
include ::TimestampHelper
include ::UiHelper
include ::DeviseHelper

View File

@@ -14,9 +14,6 @@ class DeviseController
include ::Turbo::FramesHelper
include ::Turbo::IncludesHelper
include ::Turbo::StreamsHelper
include ::ActionView::Helpers::DateHelper
include ::ActionView::Helpers::SanitizeHelper
include ::ActionView::Helpers::RenderingHelper
include ::ActionView::Helpers::CaptureHelper
include ::ActionView::Helpers::OutputSafetyHelper
include ::ActionView::Helpers::TagHelper
@@ -35,17 +32,15 @@ class DeviseController
include ::Domain::DomainModelHelper
include ::Domain::PostsHelper
include ::Domain::DescriptionsHelper
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::Posts::E621PostsHelper
include ::Domain::Users::FaUsersHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper
include ::SourceHelper
include ::TimestampHelper
include ::UiHelper
include ::DeviseHelper

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::E621::Post::FileError`.
# Please instead update this file by running `bin/tapioca dsl Domain::E621::Post::FileError`.
class Domain::E621::Post::FileError
sig { returns(T.nilable(::Integer)) }
def log_entry_id; end
sig { params(value: T.nilable(::Integer)).returns(T.nilable(::Integer)) }
def log_entry_id=(value); end
sig { returns(T.nilable(::Integer)) }
def retry_count; end
sig { params(value: T.nilable(::Integer)).returns(T.nilable(::Integer)) }
def retry_count=(value); end
sig { returns(T.nilable(::Integer)) }
def status_code; end
sig { params(value: T.nilable(::Integer)).returns(T.nilable(::Integer)) }
def status_code=(value); end
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::Fa::SqliteExporter`.
# Please instead update this file by running `bin/tapioca dsl Domain::Fa::SqliteExporter`.
class Domain::Fa::SqliteExporter
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

View File

@@ -1,16 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::Fa::SqliteExporter::Inserter`.
# Please instead update this file by running `bin/tapioca dsl Domain::Fa::SqliteExporter::Inserter`.
class Domain::Fa::SqliteExporter::Inserter
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::Fa::UserFactorCalculator`.
# Please instead update this file by running `bin/tapioca dsl Domain::Fa::UserFactorCalculator`.
class Domain::Fa::UserFactorCalculator
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,16 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Domain::MigrateToDomain`.
# Please instead update this file by running `bin/tapioca dsl Domain::MigrateToDomain`.
class Domain::MigrateToDomain
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

View File

@@ -473,12 +473,6 @@ class Domain::PostFile
sig { params(value: T.nilable(::BlobFile)).void }
def blob=(value); end
sig { returns(T::Boolean) }
def blob_changed?; end
sig { returns(T::Boolean) }
def blob_previously_changed?; end
sig { params(args: T.untyped, blk: T.untyped).returns(::BlobFile) }
def build_blob(*args, &blk); end

View File

@@ -1,16 +0,0 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `FaBackfillFavs`.
# Please instead update this file by running `bin/tapioca dsl FaBackfillFavs`.
class FaBackfillFavs
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

View File

@@ -9,9 +9,6 @@ module GeneratedPathHelpersModule
include ::ActionDispatch::Routing::UrlFor
include ::ActionDispatch::Routing::PolymorphicRoutes
sig { params(args: T.untyped).returns(String) }
def api_fa_object_statuses_path(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_similar_users_path(*args); end

View File

@@ -9,9 +9,6 @@ module GeneratedUrlHelpersModule
include ::ActionDispatch::Routing::UrlFor
include ::ActionDispatch::Routing::PolymorphicRoutes
sig { params(args: T.untyped).returns(String) }
def api_fa_object_statuses_url(*args); end
sig { params(args: T.untyped).returns(String) }
def api_fa_similar_users_url(*args); end

File diff suppressed because it is too large Load Diff

View File

@@ -17,9 +17,6 @@ class Rails::ApplicationController
include ::Turbo::FramesHelper
include ::Turbo::IncludesHelper
include ::Turbo::StreamsHelper
include ::ActionView::Helpers::DateHelper
include ::ActionView::Helpers::SanitizeHelper
include ::ActionView::Helpers::RenderingHelper
include ::ActionView::Helpers::CaptureHelper
include ::ActionView::Helpers::OutputSafetyHelper
include ::ActionView::Helpers::TagHelper
@@ -38,17 +35,15 @@ class Rails::ApplicationController
include ::Domain::DomainModelHelper
include ::Domain::PostsHelper
include ::Domain::DescriptionsHelper
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::Posts::E621PostsHelper
include ::Domain::Users::FaUsersHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper
include ::SourceHelper
include ::TimestampHelper
include ::UiHelper
include ::DeviseHelper

View File

@@ -17,9 +17,6 @@ class Rails::Conductor::BaseController
include ::Turbo::FramesHelper
include ::Turbo::IncludesHelper
include ::Turbo::StreamsHelper
include ::ActionView::Helpers::DateHelper
include ::ActionView::Helpers::SanitizeHelper
include ::ActionView::Helpers::RenderingHelper
include ::ActionView::Helpers::CaptureHelper
include ::ActionView::Helpers::OutputSafetyHelper
include ::ActionView::Helpers::TagHelper
@@ -38,17 +35,15 @@ class Rails::Conductor::BaseController
include ::Domain::DomainModelHelper
include ::Domain::PostsHelper
include ::Domain::DescriptionsHelper
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::Posts::E621PostsHelper
include ::Domain::Users::FaUsersHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper
include ::SourceHelper
include ::TimestampHelper
include ::UiHelper
include ::DeviseHelper

View File

@@ -17,9 +17,6 @@ class Rails::HealthController
include ::Turbo::FramesHelper
include ::Turbo::IncludesHelper
include ::Turbo::StreamsHelper
include ::ActionView::Helpers::DateHelper
include ::ActionView::Helpers::SanitizeHelper
include ::ActionView::Helpers::RenderingHelper
include ::ActionView::Helpers::CaptureHelper
include ::ActionView::Helpers::OutputSafetyHelper
include ::ActionView::Helpers::TagHelper
@@ -38,17 +35,15 @@ class Rails::HealthController
include ::Domain::DomainModelHelper
include ::Domain::PostsHelper
include ::Domain::DescriptionsHelper
include ::Domain::E621::PostsHelper
include ::Domain::Fa::PostsHelper
include ::Domain::Fa::UsersHelper
include ::Domain::ModelHelper
include ::Domain::PaginationHelper
include ::Domain::PostGroupsHelper
include ::Domain::Posts::E621PostsHelper
include ::Domain::Users::FaUsersHelper
include ::Domain::VisualSearchHelper
include ::DomainSourceHelper
include ::GoodJobHelper
include ::IpAddressHelper
include ::SourceHelper
include ::TimestampHelper
include ::UiHelper
include ::DeviseHelper

View File

@@ -0,0 +1,16 @@
# typed: true
# DO NOT EDIT MANUALLY
# This is an autogenerated file for dynamic methods in `Scraper::FurArchiverHttpClientConfig`.
# Please instead update this file by running `bin/tapioca dsl Scraper::FurArchiverHttpClientConfig`.
class Scraper::FurArchiverHttpClientConfig
sig { returns(ColorLogger) }
def logger; end
class << self
sig { returns(ColorLogger) }
def logger; end
end
end

View File

@@ -1,14 +0,0 @@
# typed: false
require "rails_helper"
RSpec.describe Domain::Fa::ApiController, type: :controller do
describe "POST #object_statuses" do
it "returns status of posts and users" do
post :object_statuses, params: { fa_ids: [123], url_names: ["test"] }
expect(response).to be_successful
expect(JSON.parse(response.body)).to include("posts", "users")
end
end
# Add more tests for other endpoints
end

View File

@@ -1,13 +0,0 @@
# typed: false
FactoryBot.define do
factory :domain_e621_post, class: "Domain::E621::Post" do
sequence(:e621_id) { |n| n }
state { :ok }
state_detail { {} }
flags_array { [] }
pools_array { [] }
sources_array { [] }
artists_array { [] }
tags_array { {} }
end
end

View File

@@ -1,8 +0,0 @@
# typed: false
FactoryBot.define do
factory :domain_e621_user, class: "Domain::E621::User" do
sequence(:e621_user_id) { |n| n }
sequence(:name) { |n| "user#{n}" }
scanned_favs_at { nil }
end
end

View File

@@ -1,15 +0,0 @@
# typed: false
FactoryBot.define do
factory :domain_fa_post, class: "Domain::Fa::Post" do
sequence(:fa_id) { |n| n }
state { :ok }
trait :with_creator do
association :creator, factory: :domain_fa_user
end
trait :with_file do
association :file, factory: :http_log_entry
end
end
end

View File

@@ -1,12 +0,0 @@
# typed: false
FactoryBot.define do
factory :domain_fa_user_avatar, class: "Domain::Fa::UserAvatar" do
association :user, factory: :domain_fa_user
state { :ok }
state_detail { {} }
file_url_str { "https://example.com/avatar.jpg" }
association :log_entry, factory: :http_log_entry
after(:build) { |avatar| avatar.file = avatar.log_entry&.response }
end
end

View File

@@ -1,16 +0,0 @@
# typed: false
FactoryBot.define do
factory :domain_fa_user, class: "Domain::Fa::User" do
sequence(:name) { |n| "User #{n}" }
url_name { Domain::Fa::User.name_to_url_name(name) }
state { :ok }
state_detail { {} }
log_entry_detail { {} }
trait :with_avatar do
after(:create) do |user|
create(:domain_fa_user_avatar, user: user, file: create(:blob_file))
end
end
end
end

View File

@@ -1,14 +0,0 @@
# typed: false
FactoryBot.define do
factory :indexed_post do
association :postable, factory: :domain_fa_post
trait :with_e621_post do
association :postable, factory: :domain_e621_post
end
trait :with_inkbunny_post do
association :postable, factory: :domain_inkbunny_post
end
end
end

Some files were not shown because too many files have changed in this diff Show More