format
This commit is contained in:
@@ -12,3 +12,5 @@ rbenv rehash
|
||||
npm install yarn
|
||||
bin/rails yarn:install
|
||||
yarn
|
||||
|
||||
yarn add --dev prettier @prettier/plugin-ruby
|
||||
|
||||
2
.vscode/settings.json
vendored
2
.vscode/settings.json
vendored
@@ -10,5 +10,5 @@
|
||||
},
|
||||
"[erb]": {
|
||||
"editor.defaultFormatter": "aliariff.vscode-erb-beautify"
|
||||
}
|
||||
},
|
||||
}
|
||||
@@ -22,7 +22,7 @@ class ApplicationController < ActionController::Base
|
||||
"9c38727f-f11d-41de-b775-0effd86d520c" => "xjal",
|
||||
"e38c568f-a24d-4f26-87f0-dfcd898a359d" => "fyacin",
|
||||
"41fa1144-d4cd-11ed-afa1-0242ac120002" => "soft_fox_lad",
|
||||
"9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro",
|
||||
"9b3cf444-5913-4efb-9935-bf26501232ff" => "syfaro"
|
||||
}
|
||||
|
||||
def validate_api_token
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
class BlobsController < ApplicationController
|
||||
skip_before_action :validate_api_token,
|
||||
only: %i[
|
||||
contents
|
||||
]
|
||||
skip_before_action :validate_api_token, only: %i[contents]
|
||||
|
||||
def show
|
||||
sha256 = HexUtil.hex2bin(params[:sha256])
|
||||
@@ -25,20 +22,26 @@ class BlobsController < ApplicationController
|
||||
# images, videos, etc
|
||||
blob_entry = BlobEntryP.find(HexUtil.hex2bin(sha256))
|
||||
if helpers.is_send_data_content_type?(blob_entry.content_type)
|
||||
if !thumb.blank? && helpers.is_thumbable_content_type?(blob_entry.content_type)
|
||||
if !thumb.blank? &&
|
||||
helpers.is_thumbable_content_type?(blob_entry.content_type)
|
||||
filename = "thumb-#{thumb}-#{sha256}"
|
||||
filename = filename[..File.extname(filename).length]
|
||||
filename += ".jpeg"
|
||||
|
||||
width, height = thumb_params(thumb)
|
||||
image = Vips::Image.thumbnail_buffer(blob_entry.contents, width, height: height)
|
||||
image =
|
||||
Vips::Image.thumbnail_buffer(
|
||||
blob_entry.contents,
|
||||
width,
|
||||
height: height
|
||||
)
|
||||
resized_image_contents = image.jpegsave_buffer
|
||||
|
||||
send_data(
|
||||
resized_image_contents,
|
||||
type: "image/jpg",
|
||||
disposition: "inline",
|
||||
filename: filename,
|
||||
filename: filename
|
||||
)
|
||||
else
|
||||
ext = helpers.ext_for_content_type(blob_entry.content_type)
|
||||
@@ -47,16 +50,17 @@ class BlobsController < ApplicationController
|
||||
blob_entry.contents,
|
||||
type: blob_entry.content_type,
|
||||
disposition: "inline",
|
||||
filename: "data#{ext}",
|
||||
filename: "data#{ext}"
|
||||
)
|
||||
end
|
||||
elsif blob_entry.content_type =~ /text\/plain/
|
||||
elsif blob_entry.content_type =~ %r{text/plain}
|
||||
render plain: blob_entry.contents
|
||||
elsif blob_entry.content_type.starts_with? "text/html"
|
||||
render html: blob_entry.contents.html_safe
|
||||
elsif blob_entry.content_type.starts_with? "application/json"
|
||||
pretty_json = JSON.pretty_generate(JSON.parse blob_entry.contents)
|
||||
render html: "<html><body><pre>#{pretty_json}</pre></body></html>".html_safe
|
||||
render html:
|
||||
"<html><body><pre>#{pretty_json}</pre></body></html>".html_safe
|
||||
else
|
||||
render plain: "no renderer for #{blob_entry.content_type}"
|
||||
end
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
class Domain::Fa::ApiController < ApplicationController
|
||||
skip_before_action :verify_authenticity_token,
|
||||
only: %i[
|
||||
enqueue_objects
|
||||
object_statuses
|
||||
]
|
||||
only: %i[enqueue_objects object_statuses]
|
||||
|
||||
skip_before_action :validate_api_token,
|
||||
only: %i[
|
||||
search_user_names
|
||||
]
|
||||
skip_before_action :validate_api_token, only: %i[search_user_names]
|
||||
|
||||
def search_user_names
|
||||
name = params[:name]
|
||||
@@ -25,26 +19,26 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
fa_ids = (params[:fa_ids] || []).map(&:to_i)
|
||||
url_names = (params[:url_names] || [])
|
||||
|
||||
jobs_async = GoodJob::Job.
|
||||
select(:id, :queue_name, :serialized_params).
|
||||
where(queue_name: "manual", finished_at: nil).
|
||||
where(
|
||||
[
|
||||
"(serialized_params->'exception_executions' = '{}')",
|
||||
"(serialized_params->'exception_executions' is null)",
|
||||
].join(" OR ")
|
||||
).load_async
|
||||
jobs_async =
|
||||
GoodJob::Job
|
||||
.select(:id, :queue_name, :serialized_params)
|
||||
.where(queue_name: "manual", finished_at: nil)
|
||||
.where(
|
||||
[
|
||||
"(serialized_params->'exception_executions' = '{}')",
|
||||
"(serialized_params->'exception_executions' is null)"
|
||||
].join(" OR ")
|
||||
)
|
||||
.load_async
|
||||
|
||||
users_async = Domain::Fa::User.
|
||||
where(url_name: url_names).
|
||||
load_async
|
||||
users_async = Domain::Fa::User.where(url_name: url_names).load_async
|
||||
|
||||
fa_id_to_post = Domain::Fa::Post.
|
||||
includes(:file).
|
||||
where(fa_id: fa_ids).
|
||||
map do |post|
|
||||
[post.fa_id, post]
|
||||
end.to_h
|
||||
fa_id_to_post =
|
||||
Domain::Fa::Post
|
||||
.includes(:file)
|
||||
.where(fa_id: fa_ids)
|
||||
.map { |post| [post.fa_id, post] }
|
||||
.to_h
|
||||
|
||||
posts_response = {}
|
||||
users_response = {}
|
||||
@@ -56,7 +50,7 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
terminal_state: false,
|
||||
seen_at: time_ago_or_never(post&.created_at),
|
||||
scanned_at: "never",
|
||||
downloaded_at: "never",
|
||||
downloaded_at: "never"
|
||||
}
|
||||
|
||||
if post
|
||||
@@ -64,7 +58,9 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
post_response[:scanned_at] = time_ago_or_never(post.scanned_at)
|
||||
|
||||
if post.file.present?
|
||||
post_response[:downloaded_at] = time_ago_or_never(post.file.created_at)
|
||||
post_response[:downloaded_at] = time_ago_or_never(
|
||||
post.file.created_at
|
||||
)
|
||||
post_response[:state] = "have_file"
|
||||
post_response[:terminal_state] = true
|
||||
elsif post.scanned?
|
||||
@@ -79,9 +75,7 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
posts_response[fa_id] = post_response
|
||||
end
|
||||
|
||||
url_name_to_user = users_async.map do |user|
|
||||
[user.url_name, user]
|
||||
end.to_h
|
||||
url_name_to_user = users_async.map { |user| [user.url_name, user] }.to_h
|
||||
|
||||
url_names.each do |url_name|
|
||||
user = url_name_to_user[url_name]
|
||||
@@ -90,7 +84,7 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
user_response = {
|
||||
created_at: time_ago_or_never(user.created_at),
|
||||
scanned_gallery_at: time_ago_or_never(user.scanned_gallery_at),
|
||||
scanned_page_at: time_ago_or_never(user.scanned_page_at),
|
||||
scanned_page_at: time_ago_or_never(user.scanned_page_at)
|
||||
}
|
||||
states = []
|
||||
states << "page" unless user.due_for_page_scan?
|
||||
@@ -103,41 +97,41 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
user_response[:terminal_state] = true
|
||||
end
|
||||
else
|
||||
user_response = {
|
||||
state: "not_seen",
|
||||
terminal_state: false,
|
||||
}
|
||||
user_response = { state: "not_seen", terminal_state: false }
|
||||
end
|
||||
users_response[url_name] = user_response
|
||||
end
|
||||
|
||||
queue_depths = Hash.new do |hash, key|
|
||||
hash[key] = 0
|
||||
end
|
||||
queue_depths = Hash.new { |hash, key| hash[key] = 0 }
|
||||
|
||||
jobs_async.each do |job|
|
||||
queue_depths[job.serialized_params["job_class"]] += 1
|
||||
end
|
||||
|
||||
queue_depths = queue_depths.map do |key, value|
|
||||
[key.
|
||||
delete_prefix("Domain::Fa::Job::").
|
||||
split("::").
|
||||
last.
|
||||
underscore.
|
||||
delete_suffix("_job").
|
||||
gsub("_", " "),
|
||||
value]
|
||||
end.to_h
|
||||
queue_depths =
|
||||
queue_depths
|
||||
.map do |key, value|
|
||||
[
|
||||
key
|
||||
.delete_prefix("Domain::Fa::Job::")
|
||||
.split("::")
|
||||
.last
|
||||
.underscore
|
||||
.delete_suffix("_job")
|
||||
.gsub("_", " "),
|
||||
value
|
||||
]
|
||||
end
|
||||
.to_h
|
||||
|
||||
render json: {
|
||||
posts: posts_response,
|
||||
users: users_response,
|
||||
queues: {
|
||||
total_depth: queue_depths.values.sum,
|
||||
depths: queue_depths,
|
||||
},
|
||||
}
|
||||
posts: posts_response,
|
||||
users: users_response,
|
||||
queues: {
|
||||
total_depth: queue_depths.values.sum,
|
||||
depths: queue_depths
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def enqueue_objects
|
||||
@@ -147,13 +141,18 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
url_names = (params[:url_names] || [])
|
||||
url_names_to_enqueue = Set.new(params[:url_names_to_enqueue] || [])
|
||||
|
||||
fa_id_to_post = Domain::Fa::Post.includes(:file).where(fa_id: fa_ids).map do |post|
|
||||
[post.fa_id, post]
|
||||
end.to_h
|
||||
fa_id_to_post =
|
||||
Domain::Fa::Post
|
||||
.includes(:file)
|
||||
.where(fa_id: fa_ids)
|
||||
.map { |post| [post.fa_id, post] }
|
||||
.to_h
|
||||
|
||||
url_name_to_user = Domain::Fa::User.where(url_name: url_names).map do |user|
|
||||
[user.url_name, user]
|
||||
end.to_h
|
||||
url_name_to_user =
|
||||
Domain::Fa::User
|
||||
.where(url_name: url_names)
|
||||
.map { |user| [user.url_name, user] }
|
||||
.to_h
|
||||
|
||||
fa_ids.each do |fa_id|
|
||||
post = fa_id_to_post[fa_id]
|
||||
@@ -168,11 +167,11 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
enqueue_deferred!
|
||||
|
||||
render json: {
|
||||
post_scans: @enqueue_counts[Domain::Fa::Job::ScanPostJob],
|
||||
post_files: @enqueue_counts[Domain::Fa::Job::ScanFileJob],
|
||||
user_pages: @enqueue_counts[Domain::Fa::Job::UserPageJob],
|
||||
user_galleries: @enqueue_counts[Domain::Fa::Job::UserGalleryJob],
|
||||
}
|
||||
post_scans: @enqueue_counts[Domain::Fa::Job::ScanPostJob],
|
||||
post_files: @enqueue_counts[Domain::Fa::Job::ScanFileJob],
|
||||
user_pages: @enqueue_counts[Domain::Fa::Job::UserPageJob],
|
||||
user_galleries: @enqueue_counts[Domain::Fa::Job::UserGalleryJob]
|
||||
}
|
||||
end
|
||||
|
||||
def similar_users
|
||||
@@ -181,9 +180,10 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
|
||||
user = Domain::Fa::User.find_by(url_name: url_name)
|
||||
if user.nil?
|
||||
render status: 404, json: {
|
||||
render status: 404,
|
||||
json: {
|
||||
error: "user '#{url_name}' not found",
|
||||
error_type: "user_not_found",
|
||||
error_type: "user_not_found"
|
||||
}
|
||||
return
|
||||
end
|
||||
@@ -191,10 +191,12 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
all_similar_users = helpers.similar_users_by_followed(user, limit: 10)
|
||||
|
||||
if all_similar_users.nil?
|
||||
render status: 500, json: {
|
||||
error: "user '#{url_name}' has not had recommendations computed yet",
|
||||
error_type: "recs_not_computed",
|
||||
}
|
||||
render status: 500,
|
||||
json: {
|
||||
error:
|
||||
"user '#{url_name}' has not had recommendations computed yet",
|
||||
error_type: "recs_not_computed"
|
||||
}
|
||||
return
|
||||
end
|
||||
|
||||
@@ -202,83 +204,98 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
|
||||
not_followed_similar_users = nil
|
||||
if exclude_url_name
|
||||
exclude_folowed_by_user = Domain::Fa::User.find_by(url_name: exclude_url_name)
|
||||
not_followed_similar_users = if exclude_folowed_by_user.nil?
|
||||
exclude_folowed_by_user =
|
||||
Domain::Fa::User.find_by(url_name: exclude_url_name)
|
||||
not_followed_similar_users =
|
||||
if exclude_folowed_by_user.nil?
|
||||
# TODO - enqueue a manual UserFollowsJob for this user and have client
|
||||
# re-try the request later
|
||||
{
|
||||
error: "user '#{exclude_url_name}' not found",
|
||||
error_type: "exclude_user_not_found",
|
||||
error_type: "exclude_user_not_found"
|
||||
}
|
||||
elsif exclude_folowed_by_user.scanned_follows_at.nil?
|
||||
{
|
||||
error: "user '#{exclude_url_name}' followers list hasn't been scanned",
|
||||
error_type: "exclude_user_not_scanned",
|
||||
error:
|
||||
"user '#{exclude_url_name}' followers list hasn't been scanned",
|
||||
error_type: "exclude_user_not_scanned"
|
||||
}
|
||||
else
|
||||
users_list_to_similar_list(helpers.similar_users_by_followed(
|
||||
user,
|
||||
limit: 10,
|
||||
exclude_followed_by: exclude_folowed_by_user,
|
||||
))
|
||||
users_list_to_similar_list(
|
||||
helpers.similar_users_by_followed(
|
||||
user,
|
||||
limit: 10,
|
||||
exclude_followed_by: exclude_folowed_by_user
|
||||
)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
render json: {
|
||||
all: all_similar_users,
|
||||
not_followed: not_followed_similar_users,
|
||||
not_followed: not_followed_similar_users
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def get_best_user_page_http_log_entry_for(user)
|
||||
for_path = proc { |uri_path|
|
||||
HttpLogEntry.where(
|
||||
uri_scheme: "https",
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: uri_path,
|
||||
).order(created_at: :desc).first&.response
|
||||
}
|
||||
for_path =
|
||||
proc do |uri_path|
|
||||
HttpLogEntry
|
||||
.where(
|
||||
uri_scheme: "https",
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: uri_path
|
||||
)
|
||||
.order(created_at: :desc)
|
||||
.first
|
||||
&.response
|
||||
end
|
||||
|
||||
for_hle_id = proc { |hle_id|
|
||||
hle_id && HttpLogEntry.find_by(id: hle_id)&.response
|
||||
}
|
||||
for_hle_id =
|
||||
proc { |hle_id| hle_id && HttpLogEntry.find_by(id: hle_id)&.response }
|
||||
|
||||
# older versions don't end in a trailing slash
|
||||
hle_id = user.log_entry_detail && user.log_entry_detail["last_user_page_id"]
|
||||
for_hle_id.call(hle_id) ||
|
||||
for_path.call("/user/#{user.url_name}/") ||
|
||||
for_hle_id.call(hle_id) || for_path.call("/user/#{user.url_name}/") ||
|
||||
for_path.call("/user/#{user.url_name}")
|
||||
end
|
||||
|
||||
def defer_post_scan(post, fa_id)
|
||||
if !post || !post.scanned?
|
||||
defer_manual(Domain::Fa::Job::ScanPostJob, {
|
||||
fa_id: fa_id,
|
||||
}, -17)
|
||||
defer_manual(Domain::Fa::Job::ScanPostJob, { fa_id: fa_id }, -17)
|
||||
end
|
||||
|
||||
if post && post.file_uri && !post.file.present?
|
||||
return defer_manual(Domain::Fa::Job::ScanFileJob, {
|
||||
post: post,
|
||||
}, -15, "static_file")
|
||||
return(
|
||||
defer_manual(
|
||||
Domain::Fa::Job::ScanFileJob,
|
||||
{ post: post },
|
||||
-15,
|
||||
"static_file"
|
||||
)
|
||||
)
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
def defer_user_scan(user, url_name, highpri)
|
||||
if !user || user.due_for_page_scan?
|
||||
defer_manual(Domain::Fa::Job::UserPageJob, {
|
||||
url_name: url_name,
|
||||
}, highpri ? -16 : -6)
|
||||
defer_manual(
|
||||
Domain::Fa::Job::UserPageJob,
|
||||
{ url_name: url_name },
|
||||
highpri ? -16 : -6
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
if !user || user.due_for_gallery_scan?
|
||||
defer_manual(Domain::Fa::Job::UserGalleryJob, {
|
||||
url_name: url_name,
|
||||
}, highpri ? -14 : -4)
|
||||
defer_manual(
|
||||
Domain::Fa::Job::UserGalleryJob,
|
||||
{ url_name: url_name },
|
||||
highpri ? -14 : -4
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
@@ -312,14 +329,18 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
end
|
||||
|
||||
def users_for_name(name)
|
||||
users = Domain::Fa::User.where([
|
||||
"(name ilike :name) OR (url_name ilike :name)",
|
||||
{ name: "#{ReduxApplicationRecord.sanitize_sql_like(name)}%" },
|
||||
]).
|
||||
includes(:avatar).
|
||||
select(:id, :state, :state_detail, :log_entry_detail, :name, :url_name).
|
||||
order(name: :asc).
|
||||
limit(10)
|
||||
users =
|
||||
Domain::Fa::User
|
||||
.where(
|
||||
[
|
||||
"(name ilike :name) OR (url_name ilike :name)",
|
||||
{ name: "#{ReduxApplicationRecord.sanitize_sql_like(name)}%" }
|
||||
]
|
||||
)
|
||||
.includes(:avatar)
|
||||
.select(:id, :state, :state_detail, :log_entry_detail, :name, :url_name)
|
||||
.order(name: :asc)
|
||||
.limit(10)
|
||||
|
||||
users.map do |user|
|
||||
{
|
||||
@@ -327,7 +348,7 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
name: user.name,
|
||||
url_name: user.url_name,
|
||||
thumb: helpers.fa_user_avatar_path(user, thumb: "64-avatar"),
|
||||
show_path: domain_fa_user_path(user.url_name),
|
||||
show_path: domain_fa_user_path(user.url_name)
|
||||
}
|
||||
end
|
||||
end
|
||||
@@ -335,36 +356,37 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
def users_list_to_similar_list(users_list)
|
||||
users_list.map do |user|
|
||||
profile_thumb_url = user.avatar&.file_uri&.to_s
|
||||
profile_thumb_url || begin
|
||||
profile_page_response = get_best_user_page_http_log_entry_for(user)
|
||||
if profile_page_response
|
||||
parser = Domain::Fa::Parser::Page.new(profile_page_response.contents, require_logged_in: false)
|
||||
profile_thumb_url = parser.user_page.profile_thumb_url
|
||||
else
|
||||
if user.due_for_follows_scan?
|
||||
Domain::Fa::Job::UserFollowsJob.set({
|
||||
priority: -20,
|
||||
}).perform_later({
|
||||
user: user,
|
||||
})
|
||||
end
|
||||
if user.due_for_page_scan?
|
||||
Domain::Fa::Job::UserPageJob.set({
|
||||
priority: -20,
|
||||
}).perform_later({
|
||||
user: user,
|
||||
})
|
||||
profile_thumb_url ||
|
||||
begin
|
||||
profile_page_response = get_best_user_page_http_log_entry_for(user)
|
||||
if profile_page_response
|
||||
parser =
|
||||
Domain::Fa::Parser::Page.new(
|
||||
profile_page_response.contents,
|
||||
require_logged_in: false
|
||||
)
|
||||
profile_thumb_url = parser.user_page.profile_thumb_url
|
||||
else
|
||||
if user.due_for_follows_scan?
|
||||
Domain::Fa::Job::UserFollowsJob.set(
|
||||
{ priority: -20 }
|
||||
).perform_later({ user: user })
|
||||
end
|
||||
if user.due_for_page_scan?
|
||||
Domain::Fa::Job::UserPageJob.set({ priority: -20 }).perform_later(
|
||||
{ user: user }
|
||||
)
|
||||
end
|
||||
end
|
||||
rescue StandardError
|
||||
logger.error("error getting profile_thumb_url: #{$!.message}")
|
||||
end
|
||||
rescue
|
||||
logger.error("error getting profile_thumb_url: #{$!.message}")
|
||||
end
|
||||
|
||||
{
|
||||
name: user.name,
|
||||
url_name: user.url_name,
|
||||
profile_thumb_url: profile_thumb_url,
|
||||
url: "https://www.furaffinity.net/user/#{user.url_name}/",
|
||||
url: "https://www.furaffinity.net/user/#{user.url_name}/"
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,14 +1,8 @@
|
||||
class Domain::Fa::ExportController < ApplicationController
|
||||
skip_before_action :verify_authenticity_token,
|
||||
only: %i[
|
||||
enqueue_objects
|
||||
object_statuses
|
||||
]
|
||||
only: %i[enqueue_objects object_statuses]
|
||||
|
||||
skip_before_action :validate_api_token,
|
||||
only: %i[
|
||||
search_user_names
|
||||
]
|
||||
skip_before_action :validate_api_token, only: %i[search_user_names]
|
||||
|
||||
def user
|
||||
@user = Domain::Fa::User.find_by!(url_name: params[:url_name])
|
||||
|
||||
@@ -1,22 +1,24 @@
|
||||
class Domain::Fa::PostsController < ApplicationController
|
||||
before_action :set_domain_fa_post,
|
||||
only: %i[ show scan_post scan_post ]
|
||||
before_action :set_domain_fa_post, only: %i[show scan_post scan_post]
|
||||
|
||||
skip_before_action :verify_authenticity_token,
|
||||
only: %i[ try_scan_post try_scan_posts ]
|
||||
only: %i[try_scan_post try_scan_posts]
|
||||
|
||||
# GET /domain/fa/posts
|
||||
def index
|
||||
if params[:user_url_name]
|
||||
@user = Domain::Fa::User.find_by(url_name: params[:user_url_name]) || raise("404")
|
||||
@user =
|
||||
Domain::Fa::User.find_by(url_name: params[:user_url_name]) ||
|
||||
raise("404")
|
||||
end
|
||||
relation = @user ? @user.posts : Domain::Fa::Post
|
||||
@posts = relation.
|
||||
includes(:creator, :file).
|
||||
page(params[:page]).
|
||||
per(50).
|
||||
order(fa_id: :desc).
|
||||
without_count
|
||||
@posts =
|
||||
relation
|
||||
.includes(:creator, :file)
|
||||
.page(params[:page])
|
||||
.per(50)
|
||||
.order(fa_id: :desc)
|
||||
.without_count
|
||||
end
|
||||
|
||||
# GET /domain/fa/posts/1
|
||||
@@ -37,38 +39,36 @@ class Domain::Fa::PostsController < ApplicationController
|
||||
enqueued = try_enqueue_post_scan(post, fa_id)
|
||||
|
||||
if post && post.file.present?
|
||||
state_string = "downloaded #{helpers.time_ago_in_words(post.file.created_at, include_seconds: true)} ago"
|
||||
state_string =
|
||||
"downloaded #{helpers.time_ago_in_words(post.file.created_at, include_seconds: true)} ago"
|
||||
elsif post && post.scanned?
|
||||
state_string = "scanned #{helpers.time_ago_in_words(post.scanned_at, include_seconds: true)} ago"
|
||||
state_string =
|
||||
"scanned #{helpers.time_ago_in_words(post.scanned_at, include_seconds: true)} ago"
|
||||
else
|
||||
state_string = []
|
||||
if !post
|
||||
state_string << "not seen"
|
||||
else
|
||||
state_string << "#{post.state}"
|
||||
end
|
||||
!post ? state_string << "not seen" : state_string << "#{post.state}"
|
||||
|
||||
if enqueued
|
||||
state_string << "enqueued"
|
||||
end
|
||||
state_string << "enqueued" if enqueued
|
||||
|
||||
state_string = state_string.join(", ")
|
||||
end
|
||||
|
||||
render json: {
|
||||
enqueued: enqueued,
|
||||
title: post&.title,
|
||||
state: state_string,
|
||||
is_terminal_state: post&.scanned? && post&.file&.present? || false,
|
||||
}
|
||||
enqueued: enqueued,
|
||||
title: post&.title,
|
||||
state: state_string,
|
||||
is_terminal_state: post&.scanned? && post&.file&.present? || false
|
||||
}
|
||||
end
|
||||
|
||||
def try_scan_posts
|
||||
Rails.logger.info "params: #{params.inspect}"
|
||||
fa_ids = params[:fa_ids].map(&:to_i)
|
||||
fa_id_to_post = Domain::Fa::Post.where(fa_id: fa_ids).map do |post|
|
||||
[post.fa_id, post]
|
||||
end.to_h
|
||||
fa_id_to_post =
|
||||
Domain::Fa::Post
|
||||
.where(fa_id: fa_ids)
|
||||
.map { |post| [post.fa_id, post] }
|
||||
.to_h
|
||||
|
||||
response = {}
|
||||
|
||||
@@ -86,7 +86,7 @@ class Domain::Fa::PostsController < ApplicationController
|
||||
|
||||
response[fa_id] = {
|
||||
state: state,
|
||||
enqueued: try_enqueue_post_scan(post, fa_id),
|
||||
enqueued: try_enqueue_post_scan(post, fa_id)
|
||||
}
|
||||
end
|
||||
render json: response
|
||||
@@ -103,21 +103,19 @@ class Domain::Fa::PostsController < ApplicationController
|
||||
|
||||
if !post || !post.scanned?
|
||||
Rails.logger.info "Enqueue scan #{fa_id}"
|
||||
Domain::Fa::Job::ScanPostJob.
|
||||
set(priority: -15, queue: "manual").
|
||||
perform_later({
|
||||
fa_id: fa_id,
|
||||
})
|
||||
Domain::Fa::Job::ScanPostJob.set(
|
||||
priority: -15,
|
||||
queue: "manual"
|
||||
).perform_later({ fa_id: fa_id })
|
||||
return true
|
||||
end
|
||||
|
||||
if post && post.file_uri && !post.file.present?
|
||||
Rails.logger.info "Enqueue file #{fa_id}"
|
||||
Domain::Fa::Job::ScanFileJob.
|
||||
set(priority: -15, queue: "manual").
|
||||
perform_later({
|
||||
post: post,
|
||||
})
|
||||
Domain::Fa::Job::ScanFileJob.set(
|
||||
priority: -15,
|
||||
queue: "manual"
|
||||
).perform_later({ post: post })
|
||||
return true
|
||||
end
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
class Domain::Fa::UsersController < ApplicationController
|
||||
before_action :set_user, only: %i[ show ]
|
||||
skip_before_action :validate_api_token, only: %i[ show ]
|
||||
before_action :set_user, only: %i[show]
|
||||
skip_before_action :validate_api_token, only: %i[show]
|
||||
|
||||
# GET /domain/fa/users or /domain/fa/users.json
|
||||
def index
|
||||
|
||||
@@ -1,14 +1,11 @@
|
||||
class Domain::Twitter::ApiController < ApplicationController
|
||||
skip_before_action :verify_authenticity_token,
|
||||
only: %i[ enqueue_objects ]
|
||||
skip_before_action :verify_authenticity_token, only: %i[enqueue_objects]
|
||||
|
||||
def enqueue_objects
|
||||
@enqueue_counts ||= Hash.new { |h, k| h[k] = 0 }
|
||||
|
||||
names = (params[:names] || [])
|
||||
names.each do |name|
|
||||
defer_user_timeline_scan(name, true)
|
||||
end
|
||||
names.each { |name| defer_user_timeline_scan(name, true) }
|
||||
enqueue_deferred!
|
||||
render json: @enqueue_counts.to_json
|
||||
end
|
||||
@@ -16,9 +13,11 @@ class Domain::Twitter::ApiController < ApplicationController
|
||||
private
|
||||
|
||||
def defer_user_timeline_scan(name, highpri)
|
||||
defer_manual(Domain::Twitter::Job::UserTimelineTweetsJob, {
|
||||
name: name,
|
||||
}, highpri ? -16 : -6)
|
||||
defer_manual(
|
||||
Domain::Twitter::Job::UserTimelineTweetsJob,
|
||||
{ name: name },
|
||||
highpri ? -16 : -6
|
||||
)
|
||||
end
|
||||
|
||||
def defer_manual(klass, args, priority, queue = nil)
|
||||
|
||||
@@ -10,9 +10,11 @@ class LogEntriesController < ApplicationController
|
||||
|
||||
if @uri_filter.path.present?
|
||||
if @uri_filter.query.present?
|
||||
query = query.
|
||||
where("uri_path = ?", @uri_filter.path).
|
||||
where("uri_query like ?", @uri_filter.query + "%")
|
||||
query =
|
||||
query.where("uri_path = ?", @uri_filter.path).where(
|
||||
"uri_query like ?",
|
||||
@uri_filter.query + "%"
|
||||
)
|
||||
else
|
||||
query = query.where("uri_path like ?", @uri_filter.path + "%")
|
||||
end
|
||||
@@ -21,13 +23,14 @@ class LogEntriesController < ApplicationController
|
||||
query = HttpLogEntry
|
||||
end
|
||||
|
||||
@log_entries = query.
|
||||
page(params[:page]).
|
||||
per(50).
|
||||
joins(:response).
|
||||
includes(:response).
|
||||
order(id: :desc).
|
||||
without_count
|
||||
@log_entries =
|
||||
query
|
||||
.page(params[:page])
|
||||
.per(50)
|
||||
.joins(:response)
|
||||
.includes(:response)
|
||||
.order(id: :desc)
|
||||
.without_count
|
||||
|
||||
formats.clear
|
||||
formats << :html
|
||||
@@ -39,44 +42,45 @@ class LogEntriesController < ApplicationController
|
||||
@last_window_count = 0
|
||||
@last_window_bytes = 0
|
||||
@last_window_bytes_stored = 0
|
||||
@content_type_counts = Hash.new do |hash, key|
|
||||
hash[key] = {
|
||||
count: 0,
|
||||
bytes: 0,
|
||||
bytes_stored: 0,
|
||||
}
|
||||
end
|
||||
@content_type_counts =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = { count: 0, bytes: 0, bytes_stored: 0 }
|
||||
end
|
||||
|
||||
@by_domain_counts = Hash.new do |hash, key|
|
||||
hash[key] = {
|
||||
count: 0,
|
||||
bytes: 0,
|
||||
bytes_stored: 0,
|
||||
}
|
||||
end
|
||||
@by_domain_counts =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = { count: 0, bytes: 0, bytes_stored: 0 }
|
||||
end
|
||||
|
||||
HttpLogEntry.includes(:response).find_each(batch_size: 100, order: :desc) do |log_entry|
|
||||
break if log_entry.created_at < @time_window.ago
|
||||
@last_window_count += 1
|
||||
@last_window_bytes += log_entry.response.size
|
||||
@last_window_bytes_stored += log_entry.response.bytes_stored
|
||||
content_type = log_entry.content_type.split(";").first
|
||||
HttpLogEntry
|
||||
.includes(:response)
|
||||
.find_each(batch_size: 100, order: :desc) do |log_entry|
|
||||
break if log_entry.created_at < @time_window.ago
|
||||
@last_window_count += 1
|
||||
@last_window_bytes += log_entry.response.size
|
||||
@last_window_bytes_stored += log_entry.response.bytes_stored
|
||||
content_type = log_entry.content_type.split(";").first
|
||||
|
||||
@content_type_counts[content_type][:count] += 1
|
||||
@content_type_counts[content_type][:bytes] += log_entry.response.size
|
||||
@content_type_counts[content_type][:bytes_stored] += log_entry.response.bytes_stored
|
||||
@content_type_counts[content_type][:count] += 1
|
||||
@content_type_counts[content_type][:bytes] += log_entry.response.size
|
||||
@content_type_counts[content_type][
|
||||
:bytes_stored
|
||||
] += log_entry.response.bytes_stored
|
||||
|
||||
@by_domain_counts[log_entry.uri_host][:count] += 1
|
||||
@by_domain_counts[log_entry.uri_host][:bytes] += log_entry.response.size
|
||||
@by_domain_counts[log_entry.uri_host][:bytes_stored] += log_entry.response.bytes_stored
|
||||
end
|
||||
@by_domain_counts[log_entry.uri_host][:count] += 1
|
||||
@by_domain_counts[log_entry.uri_host][:bytes] += log_entry.response.size
|
||||
@by_domain_counts[log_entry.uri_host][
|
||||
:bytes_stored
|
||||
] += log_entry.response.bytes_stored
|
||||
end
|
||||
end
|
||||
|
||||
def show
|
||||
@log_entry = HttpLogEntry.includes(
|
||||
:caused_by_entry,
|
||||
:triggered_entries,
|
||||
response: :base,
|
||||
).find(params[:id])
|
||||
@log_entry =
|
||||
HttpLogEntry.includes(
|
||||
:caused_by_entry,
|
||||
:triggered_entries,
|
||||
response: :base
|
||||
).find(params[:id])
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,7 +11,7 @@ class UserScriptsController < ApplicationController
|
||||
when "furecs.user.js"
|
||||
send_file(
|
||||
Rails.root.join("user_scripts/furecs.user.js"),
|
||||
type: "application/json",
|
||||
type: "application/json"
|
||||
)
|
||||
else
|
||||
render status: 404, text: "not found"
|
||||
|
||||
@@ -24,11 +24,8 @@ module Domain::Fa::PostsHelper
|
||||
def scanned_and_file_description(post)
|
||||
parts = []
|
||||
if post.scanned?
|
||||
time_ago = if post.scanned_at
|
||||
time_ago_in_words(post.scanned_at)
|
||||
else
|
||||
"(unknown)"
|
||||
end
|
||||
time_ago =
|
||||
(post.scanned_at ? time_ago_in_words(post.scanned_at) : "(unknown)")
|
||||
parts << "Scanned #{time_ago} ago"
|
||||
else
|
||||
parts << "Not scanned"
|
||||
@@ -43,15 +40,15 @@ module Domain::Fa::PostsHelper
|
||||
|
||||
def fa_post_description_sanitized(html)
|
||||
raw Sanitize.fragment(
|
||||
html,
|
||||
elements: %w[br img b i span strong],
|
||||
attributes: {
|
||||
"span" => %w[style],
|
||||
"a" => [],
|
||||
},
|
||||
css: {
|
||||
properties: %w[font-size color],
|
||||
},
|
||||
)
|
||||
html,
|
||||
elements: %w[br img b i span strong],
|
||||
attributes: {
|
||||
"span" => %w[style],
|
||||
"a" => []
|
||||
},
|
||||
css: {
|
||||
properties: %w[font-size color]
|
||||
}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
module Domain::Fa::UsersHelper
|
||||
def fa_user_avatar_path(user, thumb: nil)
|
||||
if (sha256 = user.avatar&.file_sha256)
|
||||
contents_blob_path(
|
||||
HexUtil.bin2hex(sha256),
|
||||
format: "jpg",
|
||||
thumb: thumb,
|
||||
)
|
||||
contents_blob_path(HexUtil.bin2hex(sha256), format: "jpg", thumb: thumb)
|
||||
else
|
||||
# default / 'not found' avatar image
|
||||
# "/blobs/9080fd4e7e23920eb2dccfe2d86903fc3e748eebb2e5aa8c657bbf6f3d941cdc/contents.jpg"
|
||||
@@ -15,95 +11,97 @@ module Domain::Fa::UsersHelper
|
||||
|
||||
def sanitized_fa_user_profile_html(html)
|
||||
# try to preload all the FA usernames in the profile
|
||||
maybe_url_names = Nokogiri::HTML(html).css("a").flat_map do |node|
|
||||
href = URI.parse(node["href"])
|
||||
right_host = href.host.nil? || href.host == "www.furaffinity.net"
|
||||
right_path = href.path =~ /\/user\/.+/
|
||||
if right_host && right_path
|
||||
[href]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end.map do |href|
|
||||
href.path.split("/")[2]&.downcase
|
||||
end
|
||||
maybe_url_names =
|
||||
Nokogiri
|
||||
.HTML(html)
|
||||
.css("a")
|
||||
.flat_map do |node|
|
||||
href = URI.parse(node["href"])
|
||||
right_host = href.host.nil? || href.host == "www.furaffinity.net"
|
||||
right_path = href.path =~ %r{/user/.+}
|
||||
if right_host && right_path
|
||||
[href]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
.map { |href| href.path.split("/")[2]&.downcase }
|
||||
|
||||
preloaded_users = Domain::Fa::User.
|
||||
where(url_name: maybe_url_names).
|
||||
select(:id, :state, :state_detail, :log_entry_detail, :url_name).
|
||||
joins(:avatar).
|
||||
includes(:avatar).
|
||||
index_by(&:url_name)
|
||||
preloaded_users =
|
||||
Domain::Fa::User
|
||||
.where(url_name: maybe_url_names)
|
||||
.select(:id, :state, :state_detail, :log_entry_detail, :url_name)
|
||||
.joins(:avatar)
|
||||
.includes(:avatar)
|
||||
.index_by(&:url_name)
|
||||
|
||||
raw Sanitize.fragment(
|
||||
html,
|
||||
elements: %w[br img b i span strong],
|
||||
attributes: {
|
||||
"span" => %w[style],
|
||||
"a" => [],
|
||||
},
|
||||
css: {
|
||||
properties: %w[font-size color],
|
||||
},
|
||||
transformers: lambda do |env|
|
||||
return unless env[:node_name] == "a"
|
||||
node = env[:node]
|
||||
href = URI.parse(node["href"])
|
||||
return unless href.host == nil || href.host == "www.furaffinity.net"
|
||||
return unless href.path =~ /\/user\/.+/
|
||||
url_name = href.path.split("/")[2]&.downcase
|
||||
Sanitize.node!(node, {
|
||||
:elements => %w[a],
|
||||
:attributes => {
|
||||
"a" => %w[href],
|
||||
html,
|
||||
elements: %w[br img b i span strong],
|
||||
attributes: {
|
||||
"span" => %w[style],
|
||||
"a" => []
|
||||
},
|
||||
})
|
||||
node["href"] = domain_fa_user_path(url_name)
|
||||
node["class"] =
|
||||
"text-slate-200 underline decoration-slate-200 " +
|
||||
"decoration-dashed decoration-dashed decoration-1"
|
||||
css: {
|
||||
properties: %w[font-size color]
|
||||
},
|
||||
transformers:
|
||||
lambda do |env|
|
||||
return unless env[:node_name] == "a"
|
||||
node = env[:node]
|
||||
href = URI.parse(node["href"])
|
||||
unless href.host == nil || href.host == "www.furaffinity.net"
|
||||
return
|
||||
end
|
||||
return unless href.path =~ %r{/user/.+}
|
||||
url_name = href.path.split("/")[2]&.downcase
|
||||
Sanitize.node!(
|
||||
node,
|
||||
{ elements: %w[a], attributes: { "a" => %w[href] } }
|
||||
)
|
||||
node["href"] = domain_fa_user_path(url_name)
|
||||
node["class"] = "text-slate-200 underline decoration-slate-200 " +
|
||||
"decoration-dashed decoration-dashed decoration-1"
|
||||
|
||||
whitelist = [node]
|
||||
whitelist = [node]
|
||||
|
||||
user = preloaded_users[url_name] || Domain::Fa::User.find_by(url_name: url_name)
|
||||
if user
|
||||
img = Nokogiri::XML::Node.new("img", node.document)
|
||||
img["class"] = "inline w-5"
|
||||
img["src"] = fa_user_avatar_path(user, thumb: "32-avatar")
|
||||
node.prepend_child(img)
|
||||
whitelist << img
|
||||
end
|
||||
user =
|
||||
preloaded_users[url_name] ||
|
||||
Domain::Fa::User.find_by(url_name: url_name)
|
||||
if user
|
||||
img = Nokogiri::XML::Node.new("img", node.document)
|
||||
img["class"] = "inline w-5"
|
||||
img["src"] = fa_user_avatar_path(user, thumb: "32-avatar")
|
||||
node.prepend_child(img)
|
||||
whitelist << img
|
||||
end
|
||||
|
||||
{ :node_allowlist => whitelist }
|
||||
end,
|
||||
)
|
||||
{ node_allowlist: whitelist }
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
def similar_users_by_followed(
|
||||
user,
|
||||
limit: 10,
|
||||
exclude_followed_by: nil
|
||||
)
|
||||
def similar_users_by_followed(user, limit: 10, exclude_followed_by: nil)
|
||||
if user.disco.nil?
|
||||
nil
|
||||
else
|
||||
ReduxApplicationRecord.connection.execute("SET ivfflat.probes = 32")
|
||||
user.similar_users_by_followed(
|
||||
exclude_followed_by: exclude_followed_by,
|
||||
exclude_followed_by: exclude_followed_by
|
||||
).limit(limit)
|
||||
end
|
||||
end
|
||||
|
||||
def fa_user_account_status(user)
|
||||
log_entry_id = user.log_entry_detail["last_user_page_id"]
|
||||
if log_entry_id.nil?
|
||||
return nil
|
||||
end
|
||||
return nil if log_entry_id.nil?
|
||||
log_entry = HttpLogEntry.find(log_entry_id)
|
||||
parser = Domain::Fa::Parser::Page.new(log_entry.response.contents, require_logged_in: false)
|
||||
unless parser.probably_user_page?
|
||||
return nil
|
||||
end
|
||||
parser =
|
||||
Domain::Fa::Parser::Page.new(
|
||||
log_entry.response.contents,
|
||||
require_logged_in: false
|
||||
)
|
||||
return nil unless parser.probably_user_page?
|
||||
parser.user_page.account_status
|
||||
end
|
||||
end
|
||||
|
||||
@@ -7,21 +7,23 @@ module LogEntriesHelper
|
||||
|
||||
def ext_for_content_type(content_type)
|
||||
case content_type
|
||||
when "image/jpeg" then "jpeg"
|
||||
when "image/jpg" then "jpg"
|
||||
when "image/png" then "png"
|
||||
when "image/gif" then "gif"
|
||||
else nil
|
||||
when "image/jpeg"
|
||||
"jpeg"
|
||||
when "image/jpg"
|
||||
"jpg"
|
||||
when "image/png"
|
||||
"png"
|
||||
when "image/gif"
|
||||
"gif"
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def is_renderable_image_type?(content_type)
|
||||
[
|
||||
"image/jpeg",
|
||||
"image/jpg",
|
||||
"image/png",
|
||||
"image/gif",
|
||||
].any? { |ct| content_type.starts_with?(ct) }
|
||||
%w[image/jpeg image/jpg image/png image/gif].any? do |ct|
|
||||
content_type.starts_with?(ct)
|
||||
end
|
||||
end
|
||||
|
||||
def is_thumbable_content_type?(content_type)
|
||||
@@ -29,12 +31,10 @@ module LogEntriesHelper
|
||||
end
|
||||
|
||||
def is_renderable_video_type?(content_type)
|
||||
[
|
||||
"video/mp4",
|
||||
].any? { |ct| content_type.starts_with?(ct) }
|
||||
["video/mp4"].any? { |ct| content_type.starts_with?(ct) }
|
||||
end
|
||||
|
||||
def is_flash_content_type?(content_type)
|
||||
content_type =~ /application\/x-shockwave-flash/
|
||||
content_type =~ %r{application/x-shockwave-flash}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -5,9 +5,11 @@ class ApplicationJob < ActiveJob::Base
|
||||
retry_on(
|
||||
StandardError,
|
||||
wait: :exponentially_longer,
|
||||
attempts: Float::INFINITY,
|
||||
attempts: Float::INFINITY
|
||||
) do |job, exception|
|
||||
job.logger.error("error: #{exception.message}\n#{exception.backtrace.join("\n")}")
|
||||
job.logger.error(
|
||||
"error: #{exception.message}\n#{exception.backtrace.join("\n")}"
|
||||
)
|
||||
end
|
||||
|
||||
# Automatically retry jobs that encountered a deadlock
|
||||
|
||||
@@ -7,13 +7,16 @@ module Domain::E621::Job
|
||||
|
||||
def perform(args)
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
response = http_client.get(
|
||||
"https://e621.net/posts.json",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://e621.net/posts.json",
|
||||
caused_by_entry: @caused_by_entry
|
||||
)
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error("non 200 response for /posts.json: #{response.status_code.to_s.underline}")
|
||||
fatal_error(
|
||||
"non 200 response for /posts.json: #{response.status_code.to_s.underline}"
|
||||
)
|
||||
end
|
||||
@log_entry = response.log_entry
|
||||
|
||||
@@ -22,22 +25,26 @@ module Domain::E621::Job
|
||||
fatal_error("no posts in response: HLE #{@log_entry.id}}")
|
||||
end
|
||||
|
||||
e621_id_to_post = Domain::E621::Post.where(
|
||||
e621_id: json["posts"].map { |post_json| post_json["id"] },
|
||||
).includes(taggings: :tag).index_by(&:e621_id)
|
||||
e621_id_to_post =
|
||||
Domain::E621::Post
|
||||
.where(e621_id: json["posts"].map { |post_json| post_json["id"] })
|
||||
.includes(taggings: :tag)
|
||||
.index_by(&:e621_id)
|
||||
|
||||
@num_updated = 0
|
||||
@num_created = 0
|
||||
@num_seen = 0
|
||||
|
||||
all_tag_names = Set.new json["posts"].map { |post_json|
|
||||
tag_and_cat_for_json(post_json)
|
||||
}.flatten.map(&:name)
|
||||
all_tag_names =
|
||||
Set.new json["posts"]
|
||||
.map { |post_json| tag_and_cat_for_json(post_json) }
|
||||
.flatten
|
||||
.map(&:name)
|
||||
|
||||
@name_to_tag_id, missing_tags =
|
||||
Domain::E621::TagUtil.tag_names_to_id_map(
|
||||
all_tag_names,
|
||||
posts: e621_id_to_post.values,
|
||||
posts: e621_id_to_post.values
|
||||
)
|
||||
@name_to_tag_id.merge!(
|
||||
Domain::E621::TagUtil.create_tags_from_names(missing_tags)
|
||||
@@ -46,18 +53,20 @@ module Domain::E621::Job
|
||||
json["posts"].each do |post_json|
|
||||
@num_seen += 1
|
||||
e621_id = post_json["id"]
|
||||
post = e621_id_to_post[e621_id] || begin
|
||||
@num_created += 1
|
||||
Domain::E621::Post.new({ e621_id: e621_id })
|
||||
end
|
||||
post =
|
||||
e621_id_to_post[e621_id] ||
|
||||
begin
|
||||
@num_created += 1
|
||||
Domain::E621::Post.new({ e621_id: e621_id })
|
||||
end
|
||||
logger.prefix = proc { "[e621_id #{post.e621_id.to_s.bold}]" }
|
||||
if update_post!(post, post_json)
|
||||
@num_updated += 1
|
||||
end
|
||||
@num_updated += 1 if update_post!(post, post_json)
|
||||
end
|
||||
|
||||
logger.prefix = nil
|
||||
logger.info("#{@num_updated} updated, #{@num_created} created, #{@num_seen} seen")
|
||||
logger.info(
|
||||
"#{@num_updated} updated, #{@num_created} created, #{@num_seen} seen"
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
@@ -79,7 +88,7 @@ module Domain::E621::Job
|
||||
post.state_detail["prev_md5s"] ||= []
|
||||
post.state_detail["prev_md5s"] << {
|
||||
"md5" => post.md5,
|
||||
"file_id" => post.file_id,
|
||||
"file_id" => post.file_id
|
||||
}
|
||||
post.file = nil
|
||||
file_changed = true
|
||||
@@ -111,14 +120,16 @@ module Domain::E621::Job
|
||||
Domain::E621::TagUtil.update_tags_on_post_with_cat(
|
||||
post,
|
||||
tag_and_cat_for_json(post_json),
|
||||
@name_to_tag_id,
|
||||
@name_to_tag_id
|
||||
)
|
||||
end
|
||||
|
||||
defer_job(Domain::E621::Job::StaticFileJob, {
|
||||
post: post,
|
||||
caused_by_entry: @log_entry,
|
||||
}) if new_record || file_changed
|
||||
if new_record || file_changed
|
||||
defer_job(
|
||||
Domain::E621::Job::StaticFileJob,
|
||||
{ post: post, caused_by_entry: @log_entry }
|
||||
)
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
@@ -126,11 +137,13 @@ module Domain::E621::Job
|
||||
TAG_CATEGORIES = %w[general species character copyright artist lore meta]
|
||||
|
||||
def tag_and_cat_for_json(post_json)
|
||||
TAG_CATEGORIES.map do |tc|
|
||||
post_json["tags"][tc].map do |name|
|
||||
TagAndCategory.new(name, "cat_#{tc}")
|
||||
TAG_CATEGORIES
|
||||
.map do |tc|
|
||||
post_json["tags"][tc].map do |name|
|
||||
TagAndCategory.new(name, "cat_#{tc}")
|
||||
end
|
||||
end
|
||||
end.flatten
|
||||
.flatten
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -12,27 +12,23 @@ module Domain::E621::Job
|
||||
file_url_str = post.file_url_str
|
||||
if file_url_str.blank?
|
||||
logger.warn("post has no file_url_str, enqueueing for scan")
|
||||
defer_job(Domain::E621::Job::ScanPostJob, {
|
||||
post: post,
|
||||
caused_by_entry: caused_by_entry,
|
||||
})
|
||||
defer_job(
|
||||
Domain::E621::Job::ScanPostJob,
|
||||
{ post: post, caused_by_entry: caused_by_entry }
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
if post.state == "file_error"
|
||||
retry_count = post.state_detail&.
|
||||
[]("file_error")&.
|
||||
[]("retry_count") || 0
|
||||
retry_count =
|
||||
post.state_detail&.[]("file_error")&.[]("retry_count") || 0
|
||||
if retry_count >= 3
|
||||
logger.error("file has been retried 3 times, giving up")
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
file_url_str,
|
||||
caused_by_entry: caused_by_entry,
|
||||
)
|
||||
response = http_client.get(file_url_str, caused_by_entry: caused_by_entry)
|
||||
|
||||
if response.status_code != 200
|
||||
post.state = :file_error
|
||||
|
||||
@@ -12,7 +12,8 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
|
||||
if build_user
|
||||
@user = find_or_build_user_from_args(args, caused_by_entry: @caused_by_entry)
|
||||
@user =
|
||||
find_or_build_user_from_args(args, caused_by_entry: @caused_by_entry)
|
||||
else
|
||||
@user = find_user_from_args(args)
|
||||
end
|
||||
@@ -32,32 +33,42 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
end
|
||||
|
||||
def find_or_build_user_from_args(args, caused_by_entry: nil)
|
||||
find_user_from_args(args) || begin
|
||||
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
||||
user = Domain::Fa::User.new
|
||||
user.url_name = url_name
|
||||
user.name = url_name
|
||||
user.state_detail ||= {}
|
||||
user.state_detail["first_seen_entry"] = caused_by_entry.id if caused_by_entry
|
||||
user
|
||||
end
|
||||
find_user_from_args(args) ||
|
||||
begin
|
||||
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
||||
user = Domain::Fa::User.new
|
||||
user.url_name = url_name
|
||||
user.name = url_name
|
||||
user.state_detail ||= {}
|
||||
user.state_detail[
|
||||
"first_seen_entry"
|
||||
] = caused_by_entry.id if caused_by_entry
|
||||
user
|
||||
end
|
||||
end
|
||||
|
||||
def find_user_from_args(args)
|
||||
args[:user] || begin
|
||||
fatal_error("arg 'url_name' is required if arg 'user' is nil") if args[:url_name].blank?
|
||||
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
||||
Domain::Fa::User.find_by(url_name: url_name)
|
||||
end
|
||||
args[:user] ||
|
||||
begin
|
||||
if args[:url_name].blank?
|
||||
fatal_error("arg 'url_name' is required if arg 'user' is nil")
|
||||
end
|
||||
url_name = Domain::Fa::User.name_to_url_name(args[:url_name])
|
||||
Domain::Fa::User.find_by(url_name: url_name)
|
||||
end
|
||||
end
|
||||
|
||||
def user_due_for_scan?(scan_type)
|
||||
unless @user.scan_due?(scan_type)
|
||||
if @force_scan
|
||||
logger.warn("scanned #{@user.scanned_ago_in_words(scan_type).bold} - force scanning")
|
||||
logger.warn(
|
||||
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - force scanning"
|
||||
)
|
||||
return true
|
||||
else
|
||||
logger.warn("scanned #{@user.scanned_ago_in_words(scan_type).bold} - skipping")
|
||||
logger.warn(
|
||||
"scanned #{@user.scanned_ago_in_words(scan_type).bold} - skipping"
|
||||
)
|
||||
return false
|
||||
end
|
||||
end
|
||||
@@ -65,11 +76,7 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
return true
|
||||
end
|
||||
|
||||
ListingsPageScanStats = Struct.new(
|
||||
:new_seen,
|
||||
:total_seen,
|
||||
:last_was_new,
|
||||
)
|
||||
ListingsPageScanStats = Struct.new(:new_seen, :total_seen, :last_was_new)
|
||||
|
||||
def update_and_enqueue_posts_from_listings_page(
|
||||
job_type,
|
||||
@@ -106,15 +113,16 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
min_fa_id = [max_fa_id - continue_for, 0].max
|
||||
fa_ids_to_manually_enqueue = Set.new(min_fa_id..max_fa_id)
|
||||
fa_ids_to_manually_enqueue.subtract(fa_ids)
|
||||
existing = Domain::Fa::Post.where("fa_id >= ? AND fa_id <= ?", min_fa_id, max_fa_id).pluck(:fa_id)
|
||||
existing =
|
||||
Domain::Fa::Post.where(
|
||||
"fa_id >= ? AND fa_id <= ?",
|
||||
min_fa_id,
|
||||
max_fa_id
|
||||
).pluck(:fa_id)
|
||||
fa_ids_to_manually_enqueue.subtract(existing)
|
||||
end
|
||||
|
||||
page_desc = if page_desc
|
||||
"page #{page_desc.to_s.bold}"
|
||||
else
|
||||
"page"
|
||||
end
|
||||
page_desc = (page_desc ? "page #{page_desc.to_s.bold}" : "page")
|
||||
|
||||
listing_page_stats = ListingsPageScanStats.new(0, 0, false)
|
||||
submissions.each do |submission|
|
||||
@@ -123,13 +131,20 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
listing_page_stats.new_seen += 1 if post.new_record?
|
||||
listing_page_stats.total_seen += 1
|
||||
|
||||
update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
enqueue_user_scan(
|
||||
post.creator,
|
||||
caused_by_entry,
|
||||
enqueue_page_scan: enqueue_page_scan,
|
||||
enqueue_gallery_scan: enqueue_gallery_scan,
|
||||
) if post.creator
|
||||
update_and_save_post_from_listings_page(
|
||||
job_type,
|
||||
post,
|
||||
submission,
|
||||
caused_by_entry
|
||||
)
|
||||
if post.creator
|
||||
enqueue_user_scan(
|
||||
post.creator,
|
||||
caused_by_entry,
|
||||
enqueue_page_scan: enqueue_page_scan,
|
||||
enqueue_gallery_scan: enqueue_gallery_scan
|
||||
)
|
||||
end
|
||||
|
||||
case post.state.to_sym
|
||||
when :ok
|
||||
@@ -164,7 +179,12 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
listing_page_stats
|
||||
end
|
||||
|
||||
def update_and_save_post_from_listings_page(job_type, post, submission, caused_by_entry)
|
||||
def update_and_save_post_from_listings_page(
|
||||
job_type,
|
||||
post,
|
||||
submission,
|
||||
caused_by_entry
|
||||
)
|
||||
if job_type == :browse_page
|
||||
post.log_entry_detail["first_browse_page_id"] ||= caused_by_entry.id
|
||||
elsif job_type == :gallery_page
|
||||
@@ -173,9 +193,11 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
fatal_error("unhandled job_type: #{job_type}")
|
||||
end
|
||||
|
||||
post.creator ||= Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
||||
post.creator ||=
|
||||
Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
||||
post.title = submission.title || fatal_error("blank title")
|
||||
post.thumbnail_uri = submission.thumb_path || fatal_error("blank thumb_path")
|
||||
post.thumbnail_uri =
|
||||
submission.thumb_path || fatal_error("blank thumb_path")
|
||||
post.save!
|
||||
end
|
||||
|
||||
@@ -188,16 +210,17 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
@users_enqueued_for_page_scan ||= Set.new
|
||||
@users_enqueued_for_gallery_scan ||= Set.new
|
||||
|
||||
args = if user.persisted?
|
||||
{ user: user }
|
||||
else
|
||||
unless user.url_name
|
||||
logger.warn "user does not have a url name and is not persisted, skipping (#{user.name})"
|
||||
return
|
||||
end
|
||||
args =
|
||||
if user.persisted?
|
||||
{ user: user }
|
||||
else
|
||||
unless user.url_name
|
||||
logger.warn "user does not have a url name and is not persisted, skipping (#{user.name})"
|
||||
return
|
||||
end
|
||||
|
||||
{ url_name: user.url_name }
|
||||
end.merge({ caused_by_entry: caused_by_entry })
|
||||
{ url_name: user.url_name }
|
||||
end.merge({ caused_by_entry: caused_by_entry })
|
||||
|
||||
if enqueue_page_scan && @users_enqueued_for_page_scan.add?(user.url_name)
|
||||
if user.due_for_page_scan?
|
||||
@@ -209,7 +232,8 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
end
|
||||
end
|
||||
|
||||
if enqueue_gallery_scan && @users_enqueued_for_gallery_scan.add?(user.url_name)
|
||||
if enqueue_gallery_scan &&
|
||||
@users_enqueued_for_gallery_scan.add?(user.url_name)
|
||||
if user.due_for_gallery_scan?
|
||||
logger.info(
|
||||
"enqueue user gallery job for #{user.url_name.bold}, " +
|
||||
@@ -222,9 +246,12 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
|
||||
def normalize_enqueue_pri(enqueue_pri)
|
||||
case enqueue_pri
|
||||
when :low then -5
|
||||
when :high then -15
|
||||
else -10
|
||||
when :low
|
||||
-5
|
||||
when :high
|
||||
-15
|
||||
else
|
||||
-10
|
||||
end
|
||||
end
|
||||
|
||||
@@ -234,10 +261,11 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
|
||||
if @posts_enqueued_for_scan.add?(fa_id)
|
||||
logger.info "enqueue post scan for fa_id #{fa_id}"
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, {
|
||||
fa_id: fa_id,
|
||||
caused_by_entry: caused_by_entry,
|
||||
}, { priority: enqueue_pri })
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanPostJob,
|
||||
{ fa_id: fa_id, caused_by_entry: caused_by_entry },
|
||||
{ priority: enqueue_pri }
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -249,16 +277,18 @@ class Domain::Fa::Job::Base < Scraper::JobBase
|
||||
fa_id_str = (post.fa_id || "(nil)").to_s.bold
|
||||
if !post.scanned?
|
||||
logger.info "enqueue post scan for fa_id #{fa_id_str}"
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, {
|
||||
post: post,
|
||||
caused_by_entry: caused_by_entry,
|
||||
}, { priority: enqueue_pri })
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanPostJob,
|
||||
{ post: post, caused_by_entry: caused_by_entry },
|
||||
{ priority: enqueue_pri }
|
||||
)
|
||||
elsif !post.have_file?
|
||||
logger.info "enqueue file scan for fa_id #{fa_id_str}"
|
||||
defer_job(Domain::Fa::Job::ScanFileJob, {
|
||||
post: post,
|
||||
caused_by_entry: caused_by_entry,
|
||||
}, { priority: enqueue_pri })
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanFileJob,
|
||||
{ post: post, caused_by_entry: caused_by_entry },
|
||||
{ priority: enqueue_pri }
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,15 +11,16 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
@first_job_entry = nil
|
||||
init_from_args!(args, build_user: false)
|
||||
@full_scan = !!args[:full_scan]
|
||||
@user || begin
|
||||
Domain::Fa::Job::UserPageJob.perform_later({
|
||||
url_name: args[:url_name],
|
||||
caused_by_entry: best_caused_by_entry,
|
||||
})
|
||||
fatal_error("user does not exist: #{args}")
|
||||
end
|
||||
@user ||
|
||||
begin
|
||||
Domain::Fa::Job::UserPageJob.perform_later(
|
||||
{ url_name: args[:url_name], caused_by_entry: best_caused_by_entry }
|
||||
)
|
||||
fatal_error("user does not exist: #{args}")
|
||||
end
|
||||
|
||||
logger.prefix = "[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
|
||||
logger.prefix =
|
||||
"[#{(@user.url_name || @user.name).bold} / #{@user.state.bold}]"
|
||||
return unless user_due_for_scan?(:favs)
|
||||
|
||||
@page_number = 0
|
||||
@@ -50,33 +51,38 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
|
||||
to_add = nil
|
||||
to_remove = nil
|
||||
measure(proc { |jobs|
|
||||
"add #{to_add.size.to_s.bold} favs, " +
|
||||
"remove #{to_remove.size.to_s.bold} favs"
|
||||
}) do
|
||||
measure(
|
||||
proc do |jobs|
|
||||
"add #{to_add.size.to_s.bold} favs, " +
|
||||
"remove #{to_remove.size.to_s.bold} favs"
|
||||
end
|
||||
) do
|
||||
to_remove = existing_faved_ids - @seen_post_ids
|
||||
to_add = @seen_post_ids - existing_faved_ids
|
||||
end
|
||||
|
||||
measure(proc {
|
||||
"updated favs list to #{@user.fav_post_joins.count.to_s.bold} posts"
|
||||
}) do
|
||||
measure(
|
||||
proc do
|
||||
"updated favs list to #{@user.fav_post_joins.count.to_s.bold} posts"
|
||||
end
|
||||
) do
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.fav_post_joins.where(post_id: to_remove).delete_all
|
||||
end
|
||||
|
||||
slice_size = if to_add.size <= 2500
|
||||
slice_size =
|
||||
if to_add.size <= 2500
|
||||
100
|
||||
else
|
||||
1000
|
||||
end
|
||||
|
||||
to_add.each_slice(slice_size) do |slice|
|
||||
@user.fav_post_joins.insert_all!(slice.map do |id|
|
||||
{ post_id: id }
|
||||
end)
|
||||
end if to_add.any?
|
||||
if to_add.any?
|
||||
to_add.each_slice(slice_size) do |slice|
|
||||
@user.fav_post_joins.insert_all!(slice.map { |id| { post_id: id } })
|
||||
end
|
||||
end
|
||||
|
||||
@user.scanned_favs_at = Time.now
|
||||
@user.save!
|
||||
@@ -89,7 +95,8 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
def scan_page
|
||||
ret = nil
|
||||
|
||||
url = if @page_id
|
||||
url =
|
||||
if @page_id
|
||||
"https://www.furaffinity.net/favorites/#{@user.url_name}/#{@page_id}/next"
|
||||
else
|
||||
"https://www.furaffinity.net/favorites/#{@user.url_name}/"
|
||||
@@ -99,11 +106,14 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
"http #{response.status_code.to_s.red.bold}, " +
|
||||
"log entry #{response.log_entry.id.to_s.bold}"
|
||||
"log entry #{response.log_entry.id.to_s.bold}"
|
||||
)
|
||||
end
|
||||
|
||||
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(@user, response)
|
||||
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
|
||||
@user,
|
||||
response
|
||||
)
|
||||
logger.error("account disabled / not found, abort")
|
||||
return :stop
|
||||
end
|
||||
@@ -116,35 +126,38 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
@total_items_seen += submissions.length
|
||||
|
||||
posts_to_create_hashes = []
|
||||
measure(proc {
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{submissions.length.to_s.bold} posts on page, " +
|
||||
"created #{posts_to_create_hashes.size.to_s.bold}"
|
||||
}) do
|
||||
existing_fa_id_to_post_id = Domain::Fa::Post.where(
|
||||
fa_id: submissions.map(&:id),
|
||||
).pluck(:fa_id, :id).to_h
|
||||
|
||||
posts_to_create_hashes = submissions.reject do |submission|
|
||||
existing_fa_id_to_post_id[submission.id]
|
||||
end.map do |submission|
|
||||
Domain::Fa::Post.hash_from_submission_parser_helper(
|
||||
submission,
|
||||
first_seen_log_entry: response.log_entry,
|
||||
)
|
||||
measure(
|
||||
proc do
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{submissions.length.to_s.bold} posts on page, " +
|
||||
"created #{posts_to_create_hashes.size.to_s.bold}"
|
||||
end
|
||||
) do
|
||||
existing_fa_id_to_post_id =
|
||||
Domain::Fa::Post
|
||||
.where(fa_id: submissions.map(&:id))
|
||||
.pluck(:fa_id, :id)
|
||||
.to_h
|
||||
|
||||
posts_to_create_hashes =
|
||||
submissions
|
||||
.reject { |submission| existing_fa_id_to_post_id[submission.id] }
|
||||
.map do |submission|
|
||||
Domain::Fa::Post.hash_from_submission_parser_helper(
|
||||
submission,
|
||||
first_seen_log_entry: response.log_entry
|
||||
)
|
||||
end
|
||||
|
||||
created_post_ids = []
|
||||
created_post_ids = Domain::Fa::Post.insert_all!(
|
||||
posts_to_create_hashes,
|
||||
returning: %i[id fa_id],
|
||||
).map do |row|
|
||||
row["id"]
|
||||
end unless posts_to_create_hashes.empty?
|
||||
created_post_ids =
|
||||
Domain::Fa::Post
|
||||
.insert_all!(posts_to_create_hashes, returning: %i[id fa_id])
|
||||
.map { |row| row["id"] } unless posts_to_create_hashes.empty?
|
||||
|
||||
enqueue_new_post_scan_jobs(posts_to_create_hashes.map do |hash|
|
||||
hash[:fa_id]
|
||||
end)
|
||||
enqueue_new_post_scan_jobs(
|
||||
posts_to_create_hashes.map { |hash| hash[:fa_id] }
|
||||
)
|
||||
|
||||
@last_page_post_ids = Set.new
|
||||
created_post_ids.each do |id|
|
||||
@@ -163,10 +176,9 @@ class Domain::Fa::Job::FavsJob < Domain::Fa::Job::Base
|
||||
def enqueue_new_post_scan_jobs(fa_ids)
|
||||
bulk_enqueue_jobs do
|
||||
fa_ids.each do |fa_id|
|
||||
Domain::Fa::Job::ScanPostJob.perform_later({
|
||||
fa_id: fa_id,
|
||||
caused_by_entry: best_caused_by_entry,
|
||||
})
|
||||
Domain::Fa::Job::ScanPostJob.perform_later(
|
||||
{ fa_id: fa_id, caused_by_entry: best_caused_by_entry }
|
||||
)
|
||||
# sleep 100000
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,28 +11,36 @@ class Domain::Fa::Job::HomePageJob < Domain::Fa::Job::Base
|
||||
@total_num_posts_seen = 0
|
||||
scan_home_page
|
||||
|
||||
logger.info("finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts")
|
||||
logger.info(
|
||||
"finished, #{@total_num_new_posts_seen.to_s.bold} new, #{@total_num_posts_seen.to_s.bold} total posts"
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def scan_home_page
|
||||
url = "https://www.furaffinity.net/"
|
||||
response = http_client.get(url, caused_by_entry: @first_entry || @caused_by_entry)
|
||||
response =
|
||||
http_client.get(url, caused_by_entry: @first_entry || @caused_by_entry)
|
||||
log_entry = response.log_entry
|
||||
@first_entry ||= log_entry
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error("non 200 response for /: #{response.status_code.to_s.underline}")
|
||||
fatal_error(
|
||||
"non 200 response for /: #{response.status_code.to_s.underline}"
|
||||
)
|
||||
end
|
||||
|
||||
page = Domain::Fa::Parser::Page.new(response.body)
|
||||
listing_page_stats = update_and_enqueue_posts_from_listings_page(
|
||||
:browse_page, page, log_entry,
|
||||
enqueue_posts_pri: :high,
|
||||
page_desc: "HomePage",
|
||||
continue_for: @continue_for,
|
||||
)
|
||||
listing_page_stats =
|
||||
update_and_enqueue_posts_from_listings_page(
|
||||
:browse_page,
|
||||
page,
|
||||
log_entry,
|
||||
enqueue_posts_pri: :high,
|
||||
page_desc: "HomePage",
|
||||
continue_for: @continue_for
|
||||
)
|
||||
|
||||
@total_num_new_posts_seen += listing_page_stats.new_seen
|
||||
@total_num_posts_seen += listing_page_stats.total_seen
|
||||
|
||||
@@ -9,10 +9,12 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
|
||||
|
||||
if @post.nil?
|
||||
logger.error "no post model - fa_id: #{args[:fa_id]}, enqueue scan"
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, {
|
||||
fa_id: args[:fa_id],
|
||||
caused_by_entry: @caused_by_entry,
|
||||
}) if args[:fa_id]
|
||||
if args[:fa_id]
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanPostJob,
|
||||
{ fa_id: args[:fa_id], caused_by_entry: @caused_by_entry }
|
||||
)
|
||||
end
|
||||
return
|
||||
end
|
||||
|
||||
@@ -39,7 +41,8 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
|
||||
is_unresolvable_host = false
|
||||
is_unresolvable_host ||= file_uri_host == "d9.facdn.net"
|
||||
uri_tld = file_uri_host.split(".").last
|
||||
is_unresolvable_host ||= uri_tld.length >= 6 && file_uri_host.start_with?("d.facdn.net")
|
||||
is_unresolvable_host ||=
|
||||
uri_tld.length >= 6 && file_uri_host.start_with?("d.facdn.net")
|
||||
|
||||
if is_unresolvable_host
|
||||
logger.error("host is #{file_uri_host}, which will not resolve")
|
||||
@@ -62,10 +65,8 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
|
||||
private
|
||||
|
||||
def download_file
|
||||
response = http_client.get(
|
||||
@post.file_uri.to_s,
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(@post.file_uri.to_s, caused_by_entry: @caused_by_entry)
|
||||
|
||||
if response.status_code == 404
|
||||
@post.state_detail["404_count"] ||= 0
|
||||
@@ -82,13 +83,13 @@ class Domain::Fa::Job::ScanFileJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
if response.status_code != 200
|
||||
defer_job(Domain::Fa::Job::ScanPostJob, {
|
||||
post: @post,
|
||||
caused_by_entry: response.log_entry,
|
||||
force_scan: true,
|
||||
})
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanPostJob,
|
||||
{ post: @post, caused_by_entry: response.log_entry, force_scan: true }
|
||||
)
|
||||
|
||||
err_msg = "error downloading - log entry #{response.log_entry.id} / status code #{response.status_code}"
|
||||
err_msg =
|
||||
"error downloading - log entry #{response.log_entry.id} / status code #{response.status_code}"
|
||||
@post.save!
|
||||
|
||||
if response.status_code == 404 && @post.state == "removed"
|
||||
|
||||
@@ -3,13 +3,16 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
ignore_signature_args :caused_by_entry
|
||||
|
||||
def perform(args)
|
||||
@post = args[:post] || begin
|
||||
Domain::Fa::Post.find_or_initialize_by(fa_id: args[:fa_id])
|
||||
end
|
||||
@post =
|
||||
args[:post] ||
|
||||
begin
|
||||
Domain::Fa::Post.find_or_initialize_by(fa_id: args[:fa_id])
|
||||
end
|
||||
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
@force_scan = !!args[:force_scan]
|
||||
logger.prefix = proc { "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]" }
|
||||
logger.prefix =
|
||||
proc { "[fa_id #{@post.fa_id.to_s.bold} / #{@post.state.bold}]" }
|
||||
|
||||
if @post.state == "ok" && !@post.scanned? || @force_scan
|
||||
if (@post.state_detail["404_count"] || 0) > 2
|
||||
@@ -22,33 +25,36 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
|
||||
if (@post.state == "ok" && @post.file_uri && @post.file.nil?) || @force_scan
|
||||
logger.info("enqueue file job (#{self.priority})")
|
||||
defer_job(Domain::Fa::Job::ScanFileJob, {
|
||||
post: @post,
|
||||
caused_by_entry: @submission_entry || @caused_by_entry,
|
||||
}, { priority: self.priority })
|
||||
defer_job(
|
||||
Domain::Fa::Job::ScanFileJob,
|
||||
{ post: @post, caused_by_entry: @submission_entry || @caused_by_entry },
|
||||
{ priority: self.priority }
|
||||
)
|
||||
end
|
||||
|
||||
enqueue_user_scan(@post.creator, @submission_entry) if @post.creator
|
||||
ensure
|
||||
enqueue_jobs_from_found_links(
|
||||
@submission_entry,
|
||||
suppress_jobs: [{
|
||||
job: self.class,
|
||||
fa_id: @post.fa_id,
|
||||
}],
|
||||
) if @submission_entry && @submission_entry.status_code == 200
|
||||
if @submission_entry && @submission_entry.status_code == 200
|
||||
enqueue_jobs_from_found_links(
|
||||
@submission_entry,
|
||||
suppress_jobs: [{ job: self.class, fa_id: @post.fa_id }]
|
||||
)
|
||||
end
|
||||
logger.info "finished post scan"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def scan_post
|
||||
response = http_client.get(
|
||||
"https://www.furaffinity.net/view/#{@post.fa_id}/",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/view/#{@post.fa_id}/",
|
||||
caused_by_entry: @caused_by_entry
|
||||
)
|
||||
if response.status_code != 200
|
||||
fatal_error("error scanning fa_id #{@post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}")
|
||||
fatal_error(
|
||||
"error scanning fa_id #{@post.fa_id}: #{response.status_code}, log entry #{response.log_entry.id}"
|
||||
)
|
||||
end
|
||||
|
||||
@submission_entry = response.log_entry
|
||||
@@ -62,7 +68,8 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
unless page.probably_submission?
|
||||
if response.body =~ /The page you are trying to reach is currently pending deletion/
|
||||
if response.body =~
|
||||
/The page you are trying to reach is currently pending deletion/
|
||||
logger.error("post is pending deletion")
|
||||
@post.state = :removed
|
||||
@post.save!
|
||||
@@ -80,9 +87,15 @@ class Domain::Fa::Job::ScanPostJob < Domain::Fa::Job::Base
|
||||
|
||||
@post.last_submission_page = @submission_entry
|
||||
@post.title = submission.title
|
||||
@post.creator = Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
||||
@post.creator =
|
||||
Domain::Fa::User.find_or_build_from_submission_parser(submission)
|
||||
@post.category = submission.category
|
||||
@post.description = submission.description_html.encode("UTF-8", :invalid => :replace, :undef => :replace)
|
||||
@post.description =
|
||||
submission.description_html.encode(
|
||||
"UTF-8",
|
||||
invalid: :replace,
|
||||
undef: :replace
|
||||
)
|
||||
@post.keywords = submission.keywords_array
|
||||
@post.file_uri = submission.full_res_img
|
||||
@post.theme = submission.theme
|
||||
|
||||
@@ -5,15 +5,13 @@ module Domain::Fa::Job
|
||||
missing = fa_ids - posts.map(&:fa_id)
|
||||
missing.each do |fa_id|
|
||||
post = Domain::Fa::Post.create!(fa_id: fa_id)
|
||||
Domain::Fa::Job::ScanPostJob.perform_later({
|
||||
post: post, caused_by_entry: caused_by_entry,
|
||||
})
|
||||
Domain::Fa::Job::ScanPostJob.perform_later(
|
||||
{ post: post, caused_by_entry: caused_by_entry }
|
||||
)
|
||||
posts << post
|
||||
end
|
||||
posts = posts.index_by(&:fa_id)
|
||||
fa_ids.map do |fa_id|
|
||||
posts[fa_id]
|
||||
end
|
||||
fa_ids.map { |fa_id| posts[fa_id] }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,14 +3,16 @@ module Domain::Fa::Job
|
||||
DISABLED_PAGE_PATTERNS = [
|
||||
/User ".+" has voluntarily disabled access/,
|
||||
/User ".+" was not found in our database./,
|
||||
/The page you are trying to reach is currently pending deletion/,
|
||||
/The page you are trying to reach is currently pending deletion/
|
||||
]
|
||||
|
||||
def self.user_disabled_or_not_found?(user, response)
|
||||
if DISABLED_PAGE_PATTERNS.any? { |pattern| response.body =~ pattern }
|
||||
user.state = :scan_error
|
||||
user.state_detail ||= {}
|
||||
user.state_detail["scan_error"] = "account disabled or not found, see last_scanned_page_id"
|
||||
user.state_detail[
|
||||
"scan_error"
|
||||
] = "account disabled or not found, see last_scanned_page_id"
|
||||
user.state_detail["last_scanned_page_id"] = response.log_entry.id
|
||||
user.save!
|
||||
true
|
||||
@@ -21,46 +23,49 @@ module Domain::Fa::Job
|
||||
|
||||
def self.check_disabled_or_not_found(user, response)
|
||||
if response.status_code != 200
|
||||
return [:fatal, {
|
||||
message: "http #{response.status_code}, log entry #{response.log_entry.id}",
|
||||
}]
|
||||
return [
|
||||
:fatal,
|
||||
{
|
||||
message:
|
||||
"http #{response.status_code}, log entry #{response.log_entry.id}"
|
||||
}
|
||||
]
|
||||
end
|
||||
|
||||
page = Domain::Fa::Parser::Page.new(response.body, require_logged_in: false)
|
||||
if page.probably_user_page?
|
||||
return [:ok, { page: page }]
|
||||
end
|
||||
page =
|
||||
Domain::Fa::Parser::Page.new(response.body, require_logged_in: false)
|
||||
return :ok, { page: page } if page.probably_user_page?
|
||||
|
||||
if response.body =~ /has voluntarily disabled access/
|
||||
user.state = :scan_error
|
||||
user.state_detail = {
|
||||
scan_error: "(user scan) user has disabled account, see last_user_page_id",
|
||||
last_user_page_id: response.log_entry.id,
|
||||
scan_error:
|
||||
"(user scan) user has disabled account, see last_user_page_id",
|
||||
last_user_page_id: response.log_entry.id
|
||||
}
|
||||
try_name = /User "(.+)" has voluntarily disabled/.match(response.body)
|
||||
user.name ||= try_name && try_name[1] || user.url_name
|
||||
user.save!
|
||||
return [:stop, {
|
||||
message: "account disabled",
|
||||
}]
|
||||
return :stop, { message: "account disabled" }
|
||||
end
|
||||
|
||||
if response.body =~ /This user cannot be found./ || response.body =~ /The page you are trying to reach is currently pending deletion/
|
||||
if response.body =~ /This user cannot be found./ ||
|
||||
response.body =~
|
||||
/The page you are trying to reach is currently pending deletion/
|
||||
user.state = :scan_error
|
||||
user.state_detail = {
|
||||
scan_error: "(user scan) user was not found, see last_user_page_id",
|
||||
last_user_page_id: response.log_entry.id,
|
||||
last_user_page_id: response.log_entry.id
|
||||
}
|
||||
user.name ||= user.url_name
|
||||
user.save!
|
||||
return [:stop, {
|
||||
message: "account not found",
|
||||
}]
|
||||
return :stop, { message: "account not found" }
|
||||
end
|
||||
|
||||
return [:fatal, {
|
||||
message: "not a user page - log entry #{response.log_entry.id}",
|
||||
}]
|
||||
return [
|
||||
:fatal,
|
||||
{ message: "not a user page - log entry #{response.log_entry.id}" }
|
||||
]
|
||||
end
|
||||
|
||||
def self.update_user_fields_from_page(user, page, response)
|
||||
@@ -73,30 +78,35 @@ module Domain::Fa::Job
|
||||
user.num_comments_given = user_page.num_comments_given
|
||||
user.num_journals = user_page.num_journals
|
||||
user.num_favorites = user_page.num_favorites
|
||||
user.profile_html = user_page.profile_html.encode("UTF-8", :invalid => :replace, :undef => :replace)
|
||||
user.profile_html =
|
||||
user_page.profile_html.encode(
|
||||
"UTF-8",
|
||||
invalid: :replace,
|
||||
undef: :replace
|
||||
)
|
||||
user.log_entry_detail["last_user_page_id"] = response.log_entry.id
|
||||
|
||||
avatar = user.avatar_or_create
|
||||
user.avatar.file_uri = user_page.profile_thumb_url
|
||||
if user.avatar.changed?
|
||||
user.avatar.save!
|
||||
Domain::Fa::Job::UserAvatarJob.perform_later({
|
||||
user: user,
|
||||
caused_by_entry: response.log_entry,
|
||||
})
|
||||
Domain::Fa::Job::UserAvatarJob.perform_later(
|
||||
{ user: user, caused_by_entry: response.log_entry }
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
# names is an array of structs - [Struct(:name, :url_name)]
|
||||
def self.find_or_create_by_names(names, caused_by_entry: nil)
|
||||
users = Domain::Fa::User.where(url_name: names.map(&:url_name)).to_a
|
||||
missing = names.reject { |name| users.any? { |u| u.url_name == name.url_name } }
|
||||
missing =
|
||||
names.reject { |name| users.any? { |u| u.url_name == name.url_name } }
|
||||
missing.each do |name|
|
||||
user = Domain::Fa::User.create!(url_name: name.url_name, name: name.name)
|
||||
Domain::Fa::Job::UserPageJob.perform_later({
|
||||
user: user,
|
||||
caused_by_entry: caused_by_entry,
|
||||
})
|
||||
user =
|
||||
Domain::Fa::User.create!(url_name: name.url_name, name: name.name)
|
||||
Domain::Fa::Job::UserPageJob.perform_later(
|
||||
{ user: user, caused_by_entry: caused_by_entry }
|
||||
)
|
||||
users << user
|
||||
end
|
||||
users
|
||||
|
||||
@@ -6,10 +6,15 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
init_from_args!(args, build_user: false)
|
||||
@user || raise("user must exist")
|
||||
@avatar = @user.avatar_or_create
|
||||
logger.prefix = proc { "[avatar #{@avatar.id.to_s.bold} / user #{@user.url_name.to_s.bold}]" }
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[avatar #{@avatar.id.to_s.bold} / user #{@user.url_name.to_s.bold}]"
|
||||
end
|
||||
|
||||
if @avatar.file_sha256 && !@force_scan
|
||||
logger.warn("downloaded #{time_ago_in_words(@avatar.downloaded_file_at)}, skipping")
|
||||
logger.warn(
|
||||
"downloaded #{time_ago_in_words(@avatar.downloaded_file_at)}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
@@ -20,10 +25,10 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
|
||||
if @avatar.file_uri.blank?
|
||||
if @user.due_for_page_scan?
|
||||
defer_job(Domain::Fa::Job::UserPageJob, {
|
||||
user: @user,
|
||||
caused_by_entry: @caused_by_entry,
|
||||
})
|
||||
defer_job(
|
||||
Domain::Fa::Job::UserPageJob,
|
||||
{ user: @user, caused_by_entry: @caused_by_entry }
|
||||
)
|
||||
logger.error("no file uri, scanning user page")
|
||||
else
|
||||
logger.error("no file uri")
|
||||
@@ -31,10 +36,8 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
return
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
@avatar.file_uri.to_s,
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(@avatar.file_uri.to_s, caused_by_entry: @caused_by_entry)
|
||||
@avatar.log_entry = response.log_entry
|
||||
|
||||
if [200, 404].include?(response.status_code)
|
||||
@@ -49,8 +52,12 @@ class Domain::Fa::Job::UserAvatarJob < Domain::Fa::Job::Base
|
||||
logger.info("downloaded avatar file")
|
||||
else
|
||||
@avatar.state = :download_error
|
||||
@avatar.state_detail["download_error"] = "http status #{response.status_code}"
|
||||
fatal_error("http #{response.status_code}, log entry #{response.log_entry.id}")
|
||||
@avatar.state_detail[
|
||||
"download_error"
|
||||
] = "http status #{response.status_code}"
|
||||
fatal_error(
|
||||
"http #{response.status_code}, log entry #{response.log_entry.id}"
|
||||
)
|
||||
end
|
||||
ensure
|
||||
@avatar.save! if @avatar
|
||||
|
||||
@@ -14,12 +14,14 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
init_from_args!(args)
|
||||
@first_job_entry = nil
|
||||
if !@user.due_for_follows_scan? && !@force_scan
|
||||
logger.warn("scanned #{time_ago_in_words(@user.scanned_follows_at)}, skipping")
|
||||
logger.warn(
|
||||
"scanned #{time_ago_in_words(@user.scanned_follows_at)}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
# buggy (sentinal) user
|
||||
return if @user.id == 117552 && @user.url_name == "click here"
|
||||
return if @user.id == 117_552 && @user.url_name == "click here"
|
||||
|
||||
@page_number = 1
|
||||
@total_follows_seen = 0
|
||||
@@ -35,25 +37,29 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
|
||||
to_add = nil
|
||||
to_remove = nil
|
||||
measure(proc { |jobs|
|
||||
"add #{to_add.size.to_s.bold} follows, " +
|
||||
"remove #{to_remove.size.to_s.bold} follows"
|
||||
}) do
|
||||
measure(
|
||||
proc do |jobs|
|
||||
"add #{to_add.size.to_s.bold} follows, " +
|
||||
"remove #{to_remove.size.to_s.bold} follows"
|
||||
end
|
||||
) do
|
||||
existing_followed_ids = Set.new(@user.follower_joins.pluck(:followed_id))
|
||||
to_remove = existing_followed_ids - @scanned_followed_ids
|
||||
to_add = @scanned_followed_ids - existing_followed_ids
|
||||
end
|
||||
|
||||
measure(proc {
|
||||
"updated follows list to #{@user.follows.count.to_s.bold} users"
|
||||
}) do
|
||||
measure(
|
||||
proc { "updated follows list to #{@user.follows.count.to_s.bold} users" }
|
||||
) do
|
||||
ReduxApplicationRecord.transaction do
|
||||
if to_remove.any?
|
||||
@user.follower_joins.where(followed_id: to_remove).delete_all
|
||||
end
|
||||
@user.follower_joins.insert_all!(to_add.map do |id|
|
||||
{ followed_id: id }
|
||||
end) if to_add.any?
|
||||
if to_add.any?
|
||||
@user.follower_joins.insert_all!(
|
||||
to_add.map { |id| { followed_id: id } }
|
||||
)
|
||||
end
|
||||
@user.scanned_follows_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
@@ -61,10 +67,9 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
|
||||
if @created_user
|
||||
logger.info("user was new record, enqueue page scan job")
|
||||
Domain::Fa::Job::UserPageJob.perform_later({
|
||||
user: @user,
|
||||
caused_by_entry: best_caused_by_entry,
|
||||
})
|
||||
Domain::Fa::Job::UserPageJob.perform_later(
|
||||
{ user: @user, caused_by_entry: best_caused_by_entry }
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -73,7 +78,8 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
def scan_follows_page
|
||||
ret = nil
|
||||
|
||||
url = if @page_number > 1
|
||||
url =
|
||||
if @page_number > 1
|
||||
"https://www.furaffinity.net/watchlist/by/#{@user.url_name}/#{@page_number}/?"
|
||||
else
|
||||
"https://www.furaffinity.net/watchlist/by/#{@user.url_name}/"
|
||||
@@ -83,7 +89,7 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
if response.status_code != 200
|
||||
fatal_error(
|
||||
"http #{response.status_code.to_s.red.bold}, " +
|
||||
"log entry #{response.log_entry.id.to_s.bold}"
|
||||
"log entry #{response.log_entry.id.to_s.bold}"
|
||||
)
|
||||
end
|
||||
|
||||
@@ -96,7 +102,9 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
if user_list.last.url_name == @last_in_user_list
|
||||
logger.info("page #{@page_number.to_s.bold} saw same user as last page, break")
|
||||
logger.info(
|
||||
"page #{@page_number.to_s.bold} saw same user as last page, break"
|
||||
)
|
||||
return :break
|
||||
end
|
||||
|
||||
@@ -107,43 +115,49 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
@total_follows_seen += user_list.length
|
||||
|
||||
users_to_create_hashes = []
|
||||
followed_user_ids = measure(proc {
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{user_list.length.to_s.bold} users on page, " +
|
||||
"created #{users_to_create_hashes.size.to_s.bold}"
|
||||
}) do
|
||||
existing_url_name_to_id = Domain::Fa::User.where(
|
||||
url_name: user_list.map(&:url_name),
|
||||
).pluck(:id, :url_name).map do |id, url_name|
|
||||
[url_name, id]
|
||||
end.to_h
|
||||
followed_user_ids =
|
||||
measure(
|
||||
proc do
|
||||
"page #{@page_number.to_s.bold} - " +
|
||||
"#{user_list.length.to_s.bold} users on page, " +
|
||||
"created #{users_to_create_hashes.size.to_s.bold}"
|
||||
end
|
||||
) do
|
||||
existing_url_name_to_id =
|
||||
Domain::Fa::User
|
||||
.where(url_name: user_list.map(&:url_name))
|
||||
.pluck(:id, :url_name)
|
||||
.map { |id, url_name| [url_name, id] }
|
||||
.to_h
|
||||
|
||||
users_to_create_hashes = user_list.reject do |user|
|
||||
existing_url_name_to_id[user.url_name]
|
||||
end.map do |user|
|
||||
{
|
||||
url_name: user.url_name,
|
||||
name: user.name,
|
||||
state_detail: { "first_seen_entry" => response.log_entry.id },
|
||||
}
|
||||
users_to_create_hashes =
|
||||
user_list
|
||||
.reject { |user| existing_url_name_to_id[user.url_name] }
|
||||
.map do |user|
|
||||
{
|
||||
url_name: user.url_name,
|
||||
name: user.name,
|
||||
state_detail: {
|
||||
"first_seen_entry" => response.log_entry.id
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
created_user_ids =
|
||||
Domain::Fa::User
|
||||
.upsert_all(
|
||||
users_to_create_hashes,
|
||||
unique_by: :url_name,
|
||||
update_only: :url_name,
|
||||
returning: %i[id url_name]
|
||||
)
|
||||
.map { |row| row["id"] } unless users_to_create_hashes.empty?
|
||||
|
||||
enqueue_new_user_pagescan_jobs(users_to_create_hashes)
|
||||
(created_user_ids || []) + existing_url_name_to_id.values
|
||||
end
|
||||
|
||||
created_user_ids = Domain::Fa::User.upsert_all(
|
||||
users_to_create_hashes,
|
||||
unique_by: :url_name,
|
||||
update_only: :url_name,
|
||||
returning: %i[id url_name],
|
||||
).map do |row|
|
||||
row["id"]
|
||||
end unless users_to_create_hashes.empty?
|
||||
|
||||
enqueue_new_user_pagescan_jobs(users_to_create_hashes)
|
||||
(created_user_ids || []) + existing_url_name_to_id.values
|
||||
end
|
||||
|
||||
followed_user_ids.each do |user_id|
|
||||
@scanned_followed_ids.add(user_id)
|
||||
end
|
||||
followed_user_ids.each { |user_id| @scanned_followed_ids.add(user_id) }
|
||||
|
||||
ret
|
||||
end
|
||||
@@ -151,10 +165,12 @@ class Domain::Fa::Job::UserFollowsJob < Domain::Fa::Job::Base
|
||||
def enqueue_new_user_pagescan_jobs(user_hashes)
|
||||
bulk_enqueue_jobs do
|
||||
user_hashes.each do |user_hash|
|
||||
Domain::Fa::Job::UserPageJob.perform_later({
|
||||
url_name: user_hash[:url_name],
|
||||
caused_by_entry: best_caused_by_entry,
|
||||
})
|
||||
Domain::Fa::Job::UserPageJob.perform_later(
|
||||
{
|
||||
url_name: user_hash[:url_name],
|
||||
caused_by_entry: best_caused_by_entry
|
||||
}
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -13,27 +13,31 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
end
|
||||
|
||||
# buggy (sentinal) user
|
||||
return if @user.id == 117552 && @user.url_name == "click here"
|
||||
return if @user.id == 117_552 && @user.url_name == "click here"
|
||||
|
||||
@go_until_end = @user.scanned_gallery_at.nil?
|
||||
@first_gallery_page_entry = nil
|
||||
@max_page_number = MAX_PAGE_NUMBER
|
||||
if @user.num_submissions && @user.scanned_page_at && @user.scanned_page_at > 3.days.ago
|
||||
if @user.num_submissions && @user.scanned_page_at &&
|
||||
@user.scanned_page_at > 3.days.ago
|
||||
@max_page_number = (@user.num_submissions * 72) + 3
|
||||
end
|
||||
|
||||
if !@user.due_for_gallery_scan? && !@force_scan
|
||||
logger.warn("gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping")
|
||||
logger.warn(
|
||||
"gallery scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
@visited = Set.new
|
||||
@folders = Set.new(
|
||||
[
|
||||
{ href: "/gallery/#{@user.url_name}/", title: "Main Gallery" },
|
||||
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" },
|
||||
]
|
||||
)
|
||||
@folders =
|
||||
Set.new(
|
||||
[
|
||||
{ href: "/gallery/#{@user.url_name}/", title: "Main Gallery" },
|
||||
{ href: "/scraps/#{@user.url_name}/", title: "Scraps" }
|
||||
]
|
||||
)
|
||||
|
||||
until (@folders - @visited).empty?
|
||||
folder = (@folders - @visited).first
|
||||
@@ -41,7 +45,9 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
break if scan_folder(folder) == :break
|
||||
end
|
||||
|
||||
@user.log_entry_detail["last_gallery_page_id"] = @first_gallery_page_entry&.id
|
||||
@user.log_entry_detail[
|
||||
"last_gallery_page_id"
|
||||
] = @first_gallery_page_entry&.id
|
||||
@user.scanned_gallery_at = Time.now
|
||||
@user.save!
|
||||
end
|
||||
@@ -54,23 +60,31 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
total_num_posts_seen = 0
|
||||
while true
|
||||
if page_number >= @max_page_number
|
||||
fatal_error("hit max #{page_number}) pages, num submissions: #{@user.num_submissions}")
|
||||
fatal_error(
|
||||
"hit max #{page_number}) pages, num submissions: #{@user.num_submissions}"
|
||||
)
|
||||
end
|
||||
|
||||
folder_href = folder[:href]
|
||||
folder_href += "/" unless folder_href.end_with?("/")
|
||||
folder_href = "/" + folder_href unless folder_href.start_with?("/")
|
||||
page_url = "https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
|
||||
page_url =
|
||||
"https://www.furaffinity.net#{folder_href}#{page_number}?perpage=72"
|
||||
response = http_client.get(page_url, caused_by_entry: @caused_by_entry)
|
||||
|
||||
@first_gallery_page_entry ||= response.log_entry
|
||||
@caused_by_entry = @first_gallery_page_entry
|
||||
|
||||
if response.status_code != 200
|
||||
fatal_error("http #{response.status_code}, log entry #{response.log_entry.id}")
|
||||
fatal_error(
|
||||
"http #{response.status_code}, log entry #{response.log_entry.id}"
|
||||
)
|
||||
end
|
||||
|
||||
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(@user, response)
|
||||
if Domain::Fa::Job::ScanUserUtils.user_disabled_or_not_found?(
|
||||
@user,
|
||||
response
|
||||
)
|
||||
logger.error("account disabled / not found, abort")
|
||||
return :break
|
||||
end
|
||||
@@ -80,19 +94,20 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
# newly instantiated users don't have a name yet, but can derive it from the gallery page
|
||||
@user.name ||= page.user_page.name || @user.url_name
|
||||
|
||||
listing_page_stats = update_and_enqueue_posts_from_listings_page(
|
||||
:gallery_page, page, response.log_entry,
|
||||
enqueue_posts_pri: :low,
|
||||
enqueue_gallery_scan: false,
|
||||
enqueue_page_scan: false,
|
||||
page_desc: "#{folder[:title]}@#{page_number}",
|
||||
)
|
||||
listing_page_stats =
|
||||
update_and_enqueue_posts_from_listings_page(
|
||||
:gallery_page,
|
||||
page,
|
||||
response.log_entry,
|
||||
enqueue_posts_pri: :low,
|
||||
enqueue_gallery_scan: false,
|
||||
enqueue_page_scan: false,
|
||||
page_desc: "#{folder[:title]}@#{page_number}"
|
||||
)
|
||||
total_num_new_posts_seen += listing_page_stats.new_seen
|
||||
total_num_posts_seen += listing_page_stats.total_seen
|
||||
|
||||
page.submission_folders.each do |sf|
|
||||
@folders.add?(sf)
|
||||
end if @force_scan
|
||||
page.submission_folders.each { |sf| @folders.add?(sf) } if @force_scan
|
||||
|
||||
page_number += 1
|
||||
break if listing_page_stats.new_seen == 0 && !@go_until_end
|
||||
@@ -101,12 +116,11 @@ class Domain::Fa::Job::UserGalleryJob < Domain::Fa::Job::Base
|
||||
|
||||
logger.info "folder `#{folder[:title].bold}` complete - #{total_num_new_posts_seen.to_s.bold} new, #{total_num_posts_seen.to_s.bold} total"
|
||||
ensure
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{
|
||||
job: self.class,
|
||||
url_name: @user.url_name,
|
||||
}],
|
||||
) if response && response.status_code == 200
|
||||
if response && response.status_code == 200
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -6,8 +6,9 @@ module Domain::Fa::Job
|
||||
def perform(args)
|
||||
init_from_args!(args)
|
||||
# buggy (sentinal) user
|
||||
return if @user.id == 117552 && @user.url_name == "click here"
|
||||
logger.prefix = proc { "[ #{@user.id.to_s.bold} / #{@user.url_name.bold} ]" }
|
||||
return if @user.id == 117_552 && @user.url_name == "click here"
|
||||
logger.prefix =
|
||||
proc { "[ #{@user.id.to_s.bold} / #{@user.url_name.bold} ]" }
|
||||
|
||||
# this is similar to a user page job, and will update the user page
|
||||
# however, it will incrementally update user favs & follows / following:
|
||||
@@ -18,18 +19,24 @@ module Domain::Fa::Job
|
||||
# and add new follows.
|
||||
|
||||
if !@user.due_for_incremental_scan? && !@force_scan
|
||||
logger.warn("scanned #{@user.time_ago_for_incremental_scan.bold}, skipping")
|
||||
logger.warn(
|
||||
"scanned #{@user.time_ago_for_incremental_scan.bold}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
"https://www.furaffinity.net/user/#{@user.url_name}/",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/user/#{@user.url_name}/",
|
||||
caused_by_entry: @caused_by_entry
|
||||
)
|
||||
@log_entry = response.log_entry
|
||||
|
||||
ret, opts = Domain::Fa::Job::ScanUserUtils.
|
||||
check_disabled_or_not_found(@user, response)
|
||||
ret, opts =
|
||||
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
|
||||
@user,
|
||||
response
|
||||
)
|
||||
case ret
|
||||
when :ok
|
||||
page = opts[:page]
|
||||
@@ -40,8 +47,11 @@ module Domain::Fa::Job
|
||||
fatal_error(opts[:message])
|
||||
end
|
||||
|
||||
Domain::Fa::Job::ScanUserUtils.
|
||||
update_user_fields_from_page(@user, page, response)
|
||||
Domain::Fa::Job::ScanUserUtils.update_user_fields_from_page(
|
||||
@user,
|
||||
page,
|
||||
response
|
||||
)
|
||||
|
||||
check_favs(@user, page.user_page.recent_fav_fa_ids)
|
||||
check_watchers(@user, page.user_page.recent_watchers)
|
||||
@@ -52,21 +62,24 @@ module Domain::Fa::Job
|
||||
@user.save!
|
||||
logger.info "completed page scan"
|
||||
ensure
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{
|
||||
job: self.class,
|
||||
url_name: @user.url_name,
|
||||
}],
|
||||
) if response && response.status_code == 200
|
||||
if response && response.status_code == 200
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def check_favs(user, recent_fav_fa_ids)
|
||||
recent_fav_posts = Domain::Fa::Job::ScanPostUtils.
|
||||
find_or_create_by_fa_ids(recent_fav_fa_ids, caused_by_entry: @log_entry)
|
||||
recent_fav_posts =
|
||||
Domain::Fa::Job::ScanPostUtils.find_or_create_by_fa_ids(
|
||||
recent_fav_fa_ids,
|
||||
caused_by_entry: @log_entry
|
||||
)
|
||||
recent_fav_post_ids = recent_fav_posts.map(&:id)
|
||||
|
||||
existing_fav_post_ids = user.fav_post_joins.where(post_id: recent_fav_post_ids).pluck(:post_id)
|
||||
existing_fav_post_ids =
|
||||
user.fav_post_joins.where(post_id: recent_fav_post_ids).pluck(:post_id)
|
||||
missing_fav_post_ids = recent_fav_post_ids - existing_fav_post_ids
|
||||
if missing_fav_post_ids.empty?
|
||||
logger.info("no new favs for user")
|
||||
@@ -77,17 +90,19 @@ module Domain::Fa::Job
|
||||
num_missing = missing_fav_post_ids.size
|
||||
if num_missing >= 0
|
||||
logger.info("add #{num_missing.to_s.bold} new favs for user")
|
||||
@user.fav_post_joins.insert_all!(missing_fav_post_ids.map do |post_id|
|
||||
{ post_id: post_id }
|
||||
end)
|
||||
@user.fav_post_joins.insert_all!(
|
||||
missing_fav_post_ids.map { |post_id| { post_id: post_id } }
|
||||
)
|
||||
end
|
||||
|
||||
if missing_fav_post_ids.include? recent_fav_post_ids.last
|
||||
logger.info("last fav is new (#{num_missing.to_s.bold} missing), enqueue full favs scan")
|
||||
defer_job(Domain::Fa::Job::FavsJob, {
|
||||
user: user,
|
||||
caused_by_entry: @log_entry,
|
||||
})
|
||||
logger.info(
|
||||
"last fav is new (#{num_missing.to_s.bold} missing), enqueue full favs scan"
|
||||
)
|
||||
defer_job(
|
||||
Domain::Fa::Job::FavsJob,
|
||||
{ user: user, caused_by_entry: @log_entry }
|
||||
)
|
||||
else
|
||||
@user.scanned_favs_at = Time.now
|
||||
end
|
||||
@@ -98,8 +113,13 @@ module Domain::Fa::Job
|
||||
# TODO - may be useful to have a separate 'scan full followed by' job
|
||||
# to handle users who are watched by a large number of others
|
||||
def check_watchers(user, recent_watchers)
|
||||
recent_models = Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watchers)
|
||||
existing = user.followed_joins.where(follower_id: recent_models.map(&:id)).pluck(:follower_id)
|
||||
recent_models =
|
||||
Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watchers)
|
||||
existing =
|
||||
user
|
||||
.followed_joins
|
||||
.where(follower_id: recent_models.map(&:id))
|
||||
.pluck(:follower_id)
|
||||
missing = recent_models.reject { |w| existing.include? w.id }
|
||||
if missing.empty?
|
||||
logger.info("no new watchers")
|
||||
@@ -107,15 +127,20 @@ module Domain::Fa::Job
|
||||
end
|
||||
|
||||
num_missing = missing.size
|
||||
user.followed_joins.insert_all!(missing.map do |watcher|
|
||||
{ follower_id: watcher.id }
|
||||
end)
|
||||
user.followed_joins.insert_all!(
|
||||
missing.map { |watcher| { follower_id: watcher.id } }
|
||||
)
|
||||
logger.info("added #{num_missing.to_s.bold} new watchers")
|
||||
end
|
||||
|
||||
def check_watching(user, recent_watching)
|
||||
recent_models = Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watching)
|
||||
existing = user.follower_joins.where(followed_id: recent_models.map(&:id)).pluck(:followed_id)
|
||||
recent_models =
|
||||
Domain::Fa::Job::ScanUserUtils.find_or_create_by_names(recent_watching)
|
||||
existing =
|
||||
user
|
||||
.follower_joins
|
||||
.where(followed_id: recent_models.map(&:id))
|
||||
.pluck(:followed_id)
|
||||
missing = recent_models.reject { |w| existing.include? w.id }
|
||||
if missing.empty?
|
||||
logger.info("no new users watched")
|
||||
@@ -124,17 +149,16 @@ module Domain::Fa::Job
|
||||
end
|
||||
|
||||
num_missing = missing.size
|
||||
user.follower_joins.insert_all!(missing.map do |watcher|
|
||||
{ followed_id: watcher.id }
|
||||
end)
|
||||
user.follower_joins.insert_all!(
|
||||
missing.map { |watcher| { followed_id: watcher.id } }
|
||||
)
|
||||
logger.info("added #{num_missing.to_s.bold} new users watched")
|
||||
|
||||
if missing.any? { |w| w.url_name == recent_watching.last.url_name }
|
||||
logger.info("last user watched is new, enqueue full follows scan")
|
||||
Domain::Fa::Job::UserFollowsJob.perform_later({
|
||||
user: user,
|
||||
caused_by_entry: @log_entry,
|
||||
})
|
||||
Domain::Fa::Job::UserFollowsJob.perform_later(
|
||||
{ user: user, caused_by_entry: @log_entry }
|
||||
)
|
||||
else
|
||||
@user.scanned_follows_at = Time.now
|
||||
end
|
||||
|
||||
@@ -6,20 +6,26 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
init_from_args!(args)
|
||||
|
||||
# buggy (sentinal) user
|
||||
return if @user.id == 117552 && @user.url_name == "click here"
|
||||
return if @user.id == 117_552 && @user.url_name == "click here"
|
||||
|
||||
if !@user.due_for_page_scan? && !@force_scan
|
||||
logger.warn("scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping")
|
||||
logger.warn(
|
||||
"scanned #{time_ago_in_words(@user.scanned_page_at)}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
"https://www.furaffinity.net/user/#{@user.url_name}/",
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(
|
||||
"https://www.furaffinity.net/user/#{@user.url_name}/",
|
||||
caused_by_entry: @caused_by_entry
|
||||
)
|
||||
|
||||
ret, opts = Domain::Fa::Job::ScanUserUtils.
|
||||
check_disabled_or_not_found(@user, response)
|
||||
ret, opts =
|
||||
Domain::Fa::Job::ScanUserUtils.check_disabled_or_not_found(
|
||||
@user,
|
||||
response
|
||||
)
|
||||
case ret
|
||||
when :ok
|
||||
page = opts[:page]
|
||||
@@ -30,18 +36,20 @@ class Domain::Fa::Job::UserPageJob < Domain::Fa::Job::Base
|
||||
fatal_error(opts[:message])
|
||||
end
|
||||
|
||||
Domain::Fa::Job::ScanUserUtils.
|
||||
update_user_fields_from_page(@user, page, response)
|
||||
Domain::Fa::Job::ScanUserUtils.update_user_fields_from_page(
|
||||
@user,
|
||||
page,
|
||||
response
|
||||
)
|
||||
@user.scanned_page_at = Time.now
|
||||
@user.save!
|
||||
logger.info "completed page scan"
|
||||
ensure
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{
|
||||
job: self.class,
|
||||
url_name: @user.url_name,
|
||||
}],
|
||||
) if response && response.status_code == 200
|
||||
if response && response.status_code == 200
|
||||
enqueue_jobs_from_found_links(
|
||||
response.log_entry,
|
||||
suppress_jobs: [{ job: self.class, url_name: @user.url_name }]
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,27 +3,22 @@ module Domain::Inkbunny::Job
|
||||
def perform(args)
|
||||
file = args[:file] || fatal_error("file is required")
|
||||
caused_by_entry = args[:caused_by_entry]
|
||||
logger.prefix = proc {
|
||||
"[#{file.id.to_s.bold} / " +
|
||||
"#{file.ib_file_id.to_s.bold} / " +
|
||||
"#{file.state.to_s.bold}] "
|
||||
}
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[#{file.id.to_s.bold} / " + "#{file.ib_file_id.to_s.bold} / " +
|
||||
"#{file.state.to_s.bold}] "
|
||||
end
|
||||
|
||||
url_str = file.url_str
|
||||
if file.state == "error"
|
||||
retry_count = file.state_detail&.
|
||||
[]("error")&.
|
||||
[]("retry_count") || 0
|
||||
retry_count = file.state_detail&.[]("error")&.[]("retry_count") || 0
|
||||
if retry_count >= 3
|
||||
logger.error("file has been retried 3 times, giving up")
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
url_str,
|
||||
caused_by_entry: caused_by_entry,
|
||||
)
|
||||
response = http_client.get(url_str, caused_by_entry: caused_by_entry)
|
||||
|
||||
if response.status_code != 200
|
||||
file.state = :error
|
||||
|
||||
@@ -1,8 +1,13 @@
|
||||
module Domain::Inkbunny::Job
|
||||
class LatestPostsJob < Base
|
||||
def perform(args)
|
||||
url = "https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
|
||||
@api_search_response = http_client.post(url, caused_by_entry: @first_browse_page_entry || @caused_by_entry)
|
||||
url =
|
||||
"https://inkbunny.net/api_search.php?orderby=create_datetime&keywords=no&title=no&description=no"
|
||||
@api_search_response =
|
||||
http_client.post(
|
||||
url,
|
||||
caused_by_entry: @first_browse_page_entry || @caused_by_entry
|
||||
)
|
||||
if @api_search_response.status_code != 200
|
||||
fatal_error("api_search failed: #{@api_search_response.status_code}")
|
||||
end
|
||||
@@ -14,11 +19,13 @@ module Domain::Inkbunny::Job
|
||||
|
||||
def handle_search_response(api_search_json)
|
||||
ib_submission_jsons = api_search_json["submissions"]
|
||||
ib_submission_ids = ib_submission_jsons.map { |j| j["submission_id"]&.to_i }
|
||||
@ib_post_id_to_model = Domain::Inkbunny::Post
|
||||
.where(ib_post_id: ib_submission_ids)
|
||||
.includes(:files, :creator)
|
||||
.index_by(&:ib_post_id)
|
||||
ib_submission_ids =
|
||||
ib_submission_jsons.map { |j| j["submission_id"]&.to_i }
|
||||
@ib_post_id_to_model =
|
||||
Domain::Inkbunny::Post
|
||||
.where(ib_post_id: ib_submission_ids)
|
||||
.includes(:files, :creator)
|
||||
.index_by(&:ib_post_id)
|
||||
|
||||
new_posts = []
|
||||
users = []
|
||||
@@ -26,15 +33,12 @@ module Domain::Inkbunny::Job
|
||||
ib_submission_jsons.each do |submission_json|
|
||||
ib_post_id = submission_json["submission_id"]&.to_i
|
||||
unless @ib_post_id_to_model[ib_post_id]
|
||||
post = Domain::Inkbunny::Post.new({
|
||||
ib_post_id: ib_post_id,
|
||||
})
|
||||
post = Domain::Inkbunny::Post.new({ ib_post_id: ib_post_id })
|
||||
|
||||
user = Domain::Inkbunny::User.find_or_initialize_by({
|
||||
ib_user_id: submission_json["user_id"].to_i,
|
||||
}) do |user|
|
||||
user.name = submission_json["username"]
|
||||
end
|
||||
user =
|
||||
Domain::Inkbunny::User.find_or_initialize_by(
|
||||
{ ib_user_id: submission_json["user_id"].to_i }
|
||||
) { |user| user.name = submission_json["username"] }
|
||||
user.save!
|
||||
post.creator = user
|
||||
new_posts << post
|
||||
@@ -43,9 +47,7 @@ module Domain::Inkbunny::Job
|
||||
end
|
||||
|
||||
Domain::Inkbunny::Post.transaction do
|
||||
users.select do |user|
|
||||
user.new_record? || user.changed?
|
||||
end.each(&:save!)
|
||||
users.select { |user| user.new_record? || user.changed? }.each(&:save!)
|
||||
new_posts.each(&:save!)
|
||||
end
|
||||
|
||||
@@ -53,7 +55,8 @@ module Domain::Inkbunny::Job
|
||||
needs_deep_update_posts = []
|
||||
Domain::Inkbunny::Post.transaction do
|
||||
ib_submission_jsons.each do |submission_json|
|
||||
needs_deep_update, post = shallow_update_post_from_submission_json(submission_json)
|
||||
needs_deep_update, post =
|
||||
shallow_update_post_from_submission_json(submission_json)
|
||||
needs_deep_update_posts << post if needs_deep_update
|
||||
end
|
||||
end
|
||||
@@ -67,15 +70,16 @@ module Domain::Inkbunny::Job
|
||||
|
||||
if needs_deep_update_posts.any?
|
||||
ids_list = needs_deep_update_posts.map(&:ib_post_id).join(",")
|
||||
url = "https://inkbunny.net/api_submissions.php?" +
|
||||
"submission_ids=#{ids_list}" +
|
||||
"&show_description=yes&show_writing=yes&show_pools=yes"
|
||||
@api_submissions_response = http_client.get(
|
||||
url,
|
||||
caused_by_entry: @api_search_response.log_entry,
|
||||
)
|
||||
url =
|
||||
"https://inkbunny.net/api_submissions.php?" +
|
||||
"submission_ids=#{ids_list}" +
|
||||
"&show_description=yes&show_writing=yes&show_pools=yes"
|
||||
@api_submissions_response =
|
||||
http_client.get(url, caused_by_entry: @api_search_response.log_entry)
|
||||
if @api_submissions_response.status_code != 200
|
||||
fatal_error("api_submissions failed: #{@api_submissions_response.status_code}")
|
||||
fatal_error(
|
||||
"api_submissions failed: #{@api_submissions_response.status_code}"
|
||||
)
|
||||
end
|
||||
api_submissions_json = JSON.parse(@api_submissions_response.body)
|
||||
api_submissions_json["submissions"].each do |submission_json|
|
||||
@@ -96,9 +100,9 @@ module Domain::Inkbunny::Job
|
||||
post.rating = json["rating_id"]&.to_i
|
||||
post.submission_type = json["submission_type_id"]&.to_i
|
||||
post.ib_detail_raw = json
|
||||
needs_deep_update = post.last_file_updated_at_changed? ||
|
||||
post.num_files_changed? ||
|
||||
post.files.count != post.num_files
|
||||
needs_deep_update =
|
||||
post.last_file_updated_at_changed? || post.num_files_changed? ||
|
||||
post.files.count != post.num_files
|
||||
post.save!
|
||||
[needs_deep_update, post]
|
||||
end
|
||||
@@ -122,41 +126,45 @@ module Domain::Inkbunny::Job
|
||||
next if post_files_by_md5[md5_initial]
|
||||
|
||||
md5_full = file_json["full_file_md5"]
|
||||
file = post.files.create!({
|
||||
ib_file_id: file_json["file_id"]&.to_i,
|
||||
ib_created_at: Time.parse(file_json["create_datetime"]),
|
||||
file_order: file_json["submission_file_order"]&.to_i,
|
||||
ib_detail_raw: file_json,
|
||||
file_name: file_json["file_name"],
|
||||
url_str: file_json["file_url_full"],
|
||||
md5_initial: md5_initial,
|
||||
md5_full: md5_full,
|
||||
md5s: {
|
||||
"initial_file_md5": md5_initial,
|
||||
"full_file_md5": file_json["full_file_md5"],
|
||||
"large_file_md5": file_json["large_file_md5"],
|
||||
"small_file_md5": file_json["small_file_md5"],
|
||||
"thumbnail_md5": file_json["thumbnail_md5"],
|
||||
},
|
||||
})
|
||||
file =
|
||||
post.files.create!(
|
||||
{
|
||||
ib_file_id: file_json["file_id"]&.to_i,
|
||||
ib_created_at: Time.parse(file_json["create_datetime"]),
|
||||
file_order: file_json["submission_file_order"]&.to_i,
|
||||
ib_detail_raw: file_json,
|
||||
file_name: file_json["file_name"],
|
||||
url_str: file_json["file_url_full"],
|
||||
md5_initial: md5_initial,
|
||||
md5_full: md5_full,
|
||||
md5s: {
|
||||
initial_file_md5: md5_initial,
|
||||
full_file_md5: file_json["full_file_md5"],
|
||||
large_file_md5: file_json["large_file_md5"],
|
||||
small_file_md5: file_json["small_file_md5"],
|
||||
thumbnail_md5: file_json["thumbnail_md5"]
|
||||
}
|
||||
}
|
||||
)
|
||||
logger.info "[post #{post.ib_post_id.to_s.bold}]: " +
|
||||
"new file #{file.ib_file_id.to_s.bold} - #{file.file_name.black.bold}"
|
||||
|
||||
defer_job(Domain::Inkbunny::Job::FileJob, {
|
||||
file: file,
|
||||
caused_by_entry: @api_submissions_response.log_entry,
|
||||
})
|
||||
defer_job(
|
||||
Domain::Inkbunny::Job::FileJob,
|
||||
{ file: file, caused_by_entry: @api_submissions_response.log_entry }
|
||||
)
|
||||
end
|
||||
post.save!
|
||||
end
|
||||
|
||||
def post_for_json(submission_json)
|
||||
post_id = submission_json["submission_id"]&.to_i || fatal_error(
|
||||
"submission_id not found in submission_json: #{submission_json.keys.join(", ")}"
|
||||
)
|
||||
@ib_post_id_to_model[post_id] || fatal_error(
|
||||
"post not found for ib_post_id #{post_id}"
|
||||
)
|
||||
post_id =
|
||||
submission_json["submission_id"]&.to_i ||
|
||||
fatal_error(
|
||||
"submission_id not found in submission_json: #{submission_json.keys.join(", ")}"
|
||||
)
|
||||
@ib_post_id_to_model[post_id] ||
|
||||
fatal_error("post not found for ib_post_id #{post_id}")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -7,17 +7,16 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
|
||||
@media = args[:media]
|
||||
@caused_by_entry = args[:caused_by_entry]
|
||||
|
||||
logger.prefix = "[tweet #{@media.tweet_id.to_s.bold} / media #{@media.id.bold}]"
|
||||
logger.prefix =
|
||||
"[tweet #{@media.tweet_id.to_s.bold} / media #{@media.id.bold}]"
|
||||
|
||||
if @media.file
|
||||
@logger.warn("already have file, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
response = http_client.get(
|
||||
@media.url_str,
|
||||
caused_by_entry: @caused_by_entry,
|
||||
)
|
||||
response =
|
||||
http_client.get(@media.url_str, caused_by_entry: @caused_by_entry)
|
||||
|
||||
logger.debug "#{HexUtil.humansize(response.log_entry.response.size)} / " +
|
||||
"#{response.log_entry.content_type} / " +
|
||||
@@ -27,9 +26,8 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
|
||||
if [403, 404].include?(status_code)
|
||||
@media.inc_error_status_count(status_code)
|
||||
@media.state = "error"
|
||||
@media.state_detail = ({
|
||||
status_code_error: "got #{status_code} too many times, bailing",
|
||||
})
|
||||
@media.state_detail =
|
||||
({ status_code_error: "got #{status_code} too many times, bailing" })
|
||||
@media.save!
|
||||
return
|
||||
end
|
||||
|
||||
@@ -18,23 +18,30 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
|
||||
logger.info("start tweet timeline scan (force: #{@force_scan.to_s.bold})")
|
||||
|
||||
if @user.state == "error" && @user.tweets_protected_error_proxies.include?(@proxy_name)
|
||||
fatal_error("abort scan, this proxy (#{@proxy_name}) is in error proxies: #{@user.tweets_protected_error_proxies}")
|
||||
if @user.state == "error" &&
|
||||
@user.tweets_protected_error_proxies.include?(@proxy_name)
|
||||
fatal_error(
|
||||
"abort scan, this proxy (#{@proxy_name}) is in error proxies: #{@user.tweets_protected_error_proxies}"
|
||||
)
|
||||
end
|
||||
|
||||
if !@force_scan && !@user.due_for_timeline_tweets_scan?
|
||||
logger.warn("scanned #{time_ago_in_words(@user.scanned_timeline_at)}, skipping")
|
||||
logger.warn(
|
||||
"scanned #{time_ago_in_words(@user.scanned_timeline_at)}, skipping"
|
||||
)
|
||||
return
|
||||
end
|
||||
|
||||
gallery_dl_client.start_twitter_user(
|
||||
@name || @user.name, caused_by_entry: @caused_by_entry,
|
||||
@name || @user.name,
|
||||
caused_by_entry: @caused_by_entry
|
||||
)
|
||||
|
||||
while true
|
||||
event = gallery_dl_client.next_message(
|
||||
caused_by_entry: @first_twitter_caused_by || @caused_by_entry,
|
||||
)
|
||||
event =
|
||||
gallery_dl_client.next_message(
|
||||
caused_by_entry: @first_twitter_caused_by || @caused_by_entry
|
||||
)
|
||||
fatal_error("nil event from gallery_dl_client") if event.nil?
|
||||
|
||||
case event
|
||||
@@ -56,11 +63,13 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
end
|
||||
end
|
||||
|
||||
logger.info([
|
||||
"created #{@num_created_tweets.to_s.bold} tweets",
|
||||
"scanned #{@num_scanned_tweets.to_s.bold} tweets",
|
||||
"created #{@num_created_medias.to_s.bold} medias",
|
||||
].join(", "))
|
||||
logger.info(
|
||||
[
|
||||
"created #{@num_created_tweets.to_s.bold} tweets",
|
||||
"scanned #{@num_scanned_tweets.to_s.bold} tweets",
|
||||
"created #{@num_created_medias.to_s.bold} medias"
|
||||
].join(", ")
|
||||
)
|
||||
|
||||
@user.scanned_timeline_at = Time.now
|
||||
@user.state = "ok"
|
||||
@@ -89,7 +98,11 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
|
||||
def maybe_extract_user_info(http_event)
|
||||
return unless http_event.response_code == 200
|
||||
return unless http_event.response_headers[:'content-type'].starts_with?("application/json")
|
||||
unless http_event.response_headers[:"content-type"].starts_with?(
|
||||
"application/json"
|
||||
)
|
||||
return
|
||||
end
|
||||
json = JSON.parse(http_event.body)
|
||||
return unless json
|
||||
typename = json.dig("data", "user", "result", "__typename")
|
||||
@@ -131,15 +144,19 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
return nil
|
||||
end
|
||||
|
||||
logger.info("+ tweet (#{@num_created_tweets.to_s.bold}) #{tweet_hash[:id].to_s.bold}")
|
||||
logger.info(
|
||||
"+ tweet (#{@num_created_tweets.to_s.bold}) #{tweet_hash[:id].to_s.bold}"
|
||||
)
|
||||
|
||||
Domain::Twitter::Tweet.new({
|
||||
id: tweet_hash[:id],
|
||||
author: @user,
|
||||
content: tweet_hash[:content],
|
||||
reply_to_tweet_id: tweet_hash[:reply_to],
|
||||
tweeted_at: Time.at(tweet_hash[:date]),
|
||||
}).save!
|
||||
Domain::Twitter::Tweet.new(
|
||||
{
|
||||
id: tweet_hash[:id],
|
||||
author: @user,
|
||||
content: tweet_hash[:content],
|
||||
reply_to_tweet_id: tweet_hash[:reply_to],
|
||||
tweeted_at: Time.at(tweet_hash[:date])
|
||||
}
|
||||
).save!
|
||||
|
||||
@num_created_tweets += 1
|
||||
end
|
||||
@@ -152,23 +169,29 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
return
|
||||
end
|
||||
|
||||
logger.info("+ media (#{@num_created_medias.to_s.bold}) #{media_event.filename.bold}")
|
||||
logger.info(
|
||||
"+ media (#{@num_created_medias.to_s.bold}) #{media_event.filename.bold}"
|
||||
)
|
||||
|
||||
media = Domain::Twitter::Media.new({
|
||||
id: media_event.filename,
|
||||
tweet_id: media_event.tweet_id,
|
||||
url_str: media_event.file_url,
|
||||
})
|
||||
media =
|
||||
Domain::Twitter::Media.new(
|
||||
{
|
||||
id: media_event.filename,
|
||||
tweet_id: media_event.tweet_id,
|
||||
url_str: media_event.file_url
|
||||
}
|
||||
)
|
||||
media.save!
|
||||
@num_created_medias += 1
|
||||
enqueue_media_file(media)
|
||||
end
|
||||
|
||||
def enqueue_media_file(media)
|
||||
defer_job(Domain::Twitter::Job::MediaJob, {
|
||||
media: media || raise,
|
||||
caused_by_entry: @first_twitter_caused_by,
|
||||
}, { priority: self.priority })
|
||||
defer_job(
|
||||
Domain::Twitter::Job::MediaJob,
|
||||
{ media: media || raise, caused_by_entry: @first_twitter_caused_by },
|
||||
{ priority: self.priority }
|
||||
)
|
||||
end
|
||||
|
||||
def update_user_from_tweet(tweet_event)
|
||||
@@ -176,7 +199,9 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
|
||||
author = tweet_event.author
|
||||
raise("no tw_id on user") unless @user.tw_id
|
||||
raise("tw_id mismatch: #{@user.tw_id} != #{author[:id]}") unless @user.tw_id == author[:id]
|
||||
unless @user.tw_id == author[:id]
|
||||
raise("tw_id mismatch: #{@user.tw_id} != #{author[:id]}")
|
||||
end
|
||||
|
||||
@user.name = author[:name]
|
||||
@user.nick = author[:nick]
|
||||
|
||||
@@ -1,25 +1,19 @@
|
||||
class DbSampler
|
||||
SCHEMA = {
|
||||
::Domain::Fa::User => [
|
||||
:avatar,
|
||||
:posts,
|
||||
:disco,
|
||||
:follower_joins,
|
||||
:fav_post_joins,
|
||||
],
|
||||
::Domain::Fa::UserAvatar => [:file, :log_entry],
|
||||
::Domain::Fa::Post => [:file, :creator],
|
||||
::Domain::Fa::Follow => [:follower, :followed],
|
||||
::Domain::Fa::Fav => [:user, :post],
|
||||
::Domain::Fa::User => %i[avatar posts disco follower_joins fav_post_joins],
|
||||
::Domain::Fa::UserAvatar => %i[file log_entry],
|
||||
::Domain::Fa::Post => %i[file creator],
|
||||
::Domain::Fa::Follow => %i[follower followed],
|
||||
::Domain::Fa::Fav => %i[user post],
|
||||
::Domain::Fa::UserFactor => [],
|
||||
::BlobEntryP => [:base],
|
||||
::HttpLogEntry => [
|
||||
:request_headers,
|
||||
:response_headers,
|
||||
:response,
|
||||
:caused_by_entry,
|
||||
::HttpLogEntry => %i[
|
||||
request_headers
|
||||
response_headers
|
||||
response
|
||||
caused_by_entry
|
||||
],
|
||||
::HttpLogEntryHeader => [],
|
||||
::HttpLogEntryHeader => []
|
||||
}
|
||||
|
||||
def initialize(file)
|
||||
@@ -28,13 +22,18 @@ class DbSampler
|
||||
end
|
||||
|
||||
def export(url_names)
|
||||
Domain::Fa::User.includes({
|
||||
avatar: [:file, :log_entry],
|
||||
follower_joins: [:follower, :followed],
|
||||
posts: { file: :response },
|
||||
}).where(url_name: url_names).each do |user|
|
||||
handle_model(user, 0, 0)
|
||||
end
|
||||
Domain::Fa::User
|
||||
.includes(
|
||||
{
|
||||
avatar: %i[file log_entry],
|
||||
follower_joins: %i[follower followed],
|
||||
posts: {
|
||||
file: :response
|
||||
}
|
||||
}
|
||||
)
|
||||
.where(url_name: url_names)
|
||||
.each { |user| handle_model(user, 0, 0) }
|
||||
end
|
||||
|
||||
def import
|
||||
@@ -57,7 +56,7 @@ class DbSampler
|
||||
ReduxApplicationRecord.transaction do
|
||||
deferred.each do |model|
|
||||
import_model(model)
|
||||
rescue
|
||||
rescue StandardError
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -82,9 +81,12 @@ class DbSampler
|
||||
$stderr.puts("skipped existing #{model_id(model)}")
|
||||
else
|
||||
model2 = model.class.new
|
||||
model.attribute_names.map(&:to_sym).each do |attr|
|
||||
model2.write_attribute(attr, model.read_attribute(attr))
|
||||
end
|
||||
model
|
||||
.attribute_names
|
||||
.map(&:to_sym)
|
||||
.each do |attr|
|
||||
model2.write_attribute(attr, model.read_attribute(attr))
|
||||
end
|
||||
model2.save(validate: false)
|
||||
$stderr.puts("imported #{model_id(model)}")
|
||||
end
|
||||
@@ -99,15 +101,13 @@ class DbSampler
|
||||
assocs = SCHEMA[model.class] || raise("invalid: #{model.class.name}")
|
||||
assocs.each do |assoc|
|
||||
if user_depth > 1
|
||||
next unless [:avatar, :disco].include?(assoc)
|
||||
next unless %i[avatar disco].include?(assoc)
|
||||
end
|
||||
|
||||
model2 = model.send(assoc)
|
||||
next unless model2
|
||||
if model2.respond_to? :each
|
||||
model2.each do |model3|
|
||||
handle_model(model3, level + 1, user_depth)
|
||||
end
|
||||
model2.each { |model3| handle_model(model3, level + 1, user_depth) }
|
||||
else
|
||||
handle_model(model2, level + 1, user_depth)
|
||||
end
|
||||
|
||||
@@ -1,11 +1,7 @@
|
||||
require "csv"
|
||||
|
||||
class Domain::E621::CsvPostImporter < LegacyImport::BulkImportJob
|
||||
def initialize(
|
||||
csv_path:,
|
||||
start_at:,
|
||||
limit: nil
|
||||
)
|
||||
def initialize(csv_path:, start_at:, limit: nil)
|
||||
logger.info "loading..."
|
||||
@csv_file = CSV.new(File.open(csv_path, "r+"), headers: true)
|
||||
logger.info "loaded"
|
||||
@@ -57,12 +53,9 @@ class Domain::E621::CsvPostImporter < LegacyImport::BulkImportJob
|
||||
|
||||
logger.prefix = proc { "[e621_id #{e621_id.to_s.bold}]" }
|
||||
|
||||
post = Domain::E621::Post.find_by({
|
||||
e621_id: e621_id,
|
||||
}) || Domain::E621::Post.new({
|
||||
e621_id: e621_id,
|
||||
md5: md5,
|
||||
})
|
||||
post =
|
||||
Domain::E621::Post.find_by({ e621_id: e621_id }) ||
|
||||
Domain::E621::Post.new({ e621_id: e621_id, md5: md5 })
|
||||
|
||||
if post.md5 && post.md5 != md5
|
||||
logger.error("md5 mismatch, skipping")
|
||||
@@ -81,7 +74,8 @@ class Domain::E621::CsvPostImporter < LegacyImport::BulkImportJob
|
||||
end
|
||||
|
||||
file_ext = row["file_ext"]
|
||||
post.file_url_str = "https://static1.e621.net/data/#{md5[0...2]}/#{md5[2...4]}/#{md5}.#{file_ext}"
|
||||
post.file_url_str =
|
||||
"https://static1.e621.net/data/#{md5[0...2]}/#{md5[2...4]}/#{md5}.#{file_ext}"
|
||||
post.description = row["description"]
|
||||
post.rating = row["rating"]
|
||||
post.score = row["score"].to_i
|
||||
@@ -101,10 +95,7 @@ class Domain::E621::CsvPostImporter < LegacyImport::BulkImportJob
|
||||
post.tags_array = row["tag_string"].split(/[\n\s]/).map(&:strip).sort
|
||||
|
||||
name_to_tag_id, missing_tags =
|
||||
Domain::E621::TagUtil.tag_names_to_id_map(
|
||||
post.tags_array,
|
||||
posts: [post],
|
||||
)
|
||||
Domain::E621::TagUtil.tag_names_to_id_map(post.tags_array, posts: [post])
|
||||
name_to_tag_id.merge!(
|
||||
Domain::E621::TagUtil.create_tags_from_names(missing_tags)
|
||||
)
|
||||
@@ -115,13 +106,11 @@ class Domain::E621::CsvPostImporter < LegacyImport::BulkImportJob
|
||||
Domain::E621::TagUtil.update_tags_on_post(
|
||||
post,
|
||||
post.tags_array,
|
||||
name_to_tag_id,
|
||||
name_to_tag_id
|
||||
)
|
||||
end
|
||||
|
||||
Domain::E621::Job::StaticFileJob.perform_later({
|
||||
post: post,
|
||||
}) if new_record
|
||||
Domain::E621::Job::StaticFileJob.perform_later({ post: post }) if new_record
|
||||
|
||||
logger.info("updated post (new? #{new_record ? "yes" : "no"})")
|
||||
|
||||
|
||||
@@ -8,15 +8,13 @@ module Domain::E621::TagUtil
|
||||
tags = Domain::E621::Tag.where(name: tag_names)
|
||||
name_to_tag_id = tags.map { |tag| [tag.name, tag.id] }.to_h
|
||||
posts.each do |post|
|
||||
post.tags.each do |tag|
|
||||
name_to_tag_id[tag.name] = tag.id
|
||||
end
|
||||
post.tags.each { |tag| name_to_tag_id[tag.name] = tag.id }
|
||||
end
|
||||
[
|
||||
# main tag map
|
||||
name_to_tag_id,
|
||||
# missing tags
|
||||
tag_names - tags.map(&:name),
|
||||
tag_names - tags.map(&:name)
|
||||
]
|
||||
end
|
||||
|
||||
@@ -24,18 +22,16 @@ module Domain::E621::TagUtil
|
||||
return {} unless tag_names.any?
|
||||
|
||||
logger.info("creating #{tag_names.size.to_s.bold} missing tags")
|
||||
upsert_hashes = tag_names.map do |name|
|
||||
{ name: name }
|
||||
end
|
||||
upsert_hashes = tag_names.map { |name| { name: name } }
|
||||
name_to_tag_id = {}
|
||||
Domain::E621::Tag.upsert_all(
|
||||
upsert_hashes,
|
||||
unique_by: :name,
|
||||
update_only: :name,
|
||||
returning: %i[id name],
|
||||
).each do |row|
|
||||
name_to_tag_id[row["name"]] = row["id"]
|
||||
end
|
||||
Domain::E621::Tag
|
||||
.upsert_all(
|
||||
upsert_hashes,
|
||||
unique_by: :name,
|
||||
update_only: :name,
|
||||
returning: %i[id name]
|
||||
)
|
||||
.each { |row| name_to_tag_id[row["name"]] = row["id"] }
|
||||
name_to_tag_id
|
||||
end
|
||||
|
||||
@@ -63,43 +59,30 @@ module Domain::E621::TagUtil
|
||||
raise("post must be persisted") if post.new_record?
|
||||
|
||||
if to_remove.any?
|
||||
to_remove_ids = to_remove.
|
||||
map { |tacos|
|
||||
name = if tacos.is_a?(TagAndCategory)
|
||||
tacos.name
|
||||
else
|
||||
tacos
|
||||
end
|
||||
name_to_tag_id[name] || raise(
|
||||
"invariant: #{name} not in id map"
|
||||
)
|
||||
}
|
||||
to_remove_ids =
|
||||
to_remove.map do |tacos|
|
||||
name = (tacos.is_a?(TagAndCategory) ? tacos.name : tacos)
|
||||
name_to_tag_id[name] || raise("invariant: #{name} not in id map")
|
||||
end
|
||||
|
||||
post.
|
||||
taggings.
|
||||
where(tag_id: to_remove_ids).
|
||||
delete_all
|
||||
post.taggings.where(tag_id: to_remove_ids).delete_all
|
||||
end
|
||||
|
||||
post.taggings.insert_all!(to_add.map do |tacos|
|
||||
name = tag_and_cat_or_str_to_name(tacos)
|
||||
id = name_to_tag_id[name] || raise(
|
||||
"invariant: #{name} not in id map"
|
||||
)
|
||||
post.taggings.insert_all!(
|
||||
to_add.map do |tacos|
|
||||
name = tag_and_cat_or_str_to_name(tacos)
|
||||
id = name_to_tag_id[name] || raise("invariant: #{name} not in id map")
|
||||
|
||||
if tacos.is_a?(TagAndCategory)
|
||||
{ tag_id: id, category: tacos.category }
|
||||
else
|
||||
{ tag_id: id, category: "cat_general" }
|
||||
if tacos.is_a?(TagAndCategory)
|
||||
{ tag_id: id, category: tacos.category }
|
||||
else
|
||||
{ tag_id: id, category: "cat_general" }
|
||||
end
|
||||
end
|
||||
end) if to_add.any?
|
||||
) if to_add.any?
|
||||
end
|
||||
|
||||
def self.tag_and_cat_or_str_to_name(tacos)
|
||||
if tacos.is_a?(TagAndCategory)
|
||||
tacos.name
|
||||
else
|
||||
tacos
|
||||
end
|
||||
tacos.is_a?(TagAndCategory) ? tacos.name : tacos
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,20 +3,19 @@ class Domain::Fa::FactorCalculator
|
||||
|
||||
def initialize(epochs = 20)
|
||||
factors = Domain::Fa::UserFactor::FACTORS_WIDTHS
|
||||
@recommender = Disco::Recommender.new(
|
||||
factors: factors,
|
||||
epochs: epochs,
|
||||
)
|
||||
@recommender = Disco::Recommender.new(factors: factors, epochs: epochs)
|
||||
logger.info "epochs=#{epochs.to_s.bold} factors=#{factors.to_s.bold}"
|
||||
end
|
||||
|
||||
def fit
|
||||
logger.info "loading follow rows..."
|
||||
dataset = measure(proc { |r| "loaded #{r.length.to_s.bold} follows" }) do
|
||||
Domain::Fa::Follow.all.pluck(:follower_id, :followed_id).map do |id1, id2|
|
||||
{ user_id: id1, item_id: id2 }
|
||||
dataset =
|
||||
measure(proc { |r| "loaded #{r.length.to_s.bold} follows" }) do
|
||||
Domain::Fa::Follow
|
||||
.all
|
||||
.pluck(:follower_id, :followed_id)
|
||||
.map { |id1, id2| { user_id: id1, item_id: id2 } }
|
||||
end
|
||||
end
|
||||
|
||||
measure("fit #{dataset.length.to_s.bold} follows") do
|
||||
@recommender.fit(dataset)
|
||||
@@ -25,8 +24,10 @@ class Domain::Fa::FactorCalculator
|
||||
|
||||
def write_factors
|
||||
total = 0
|
||||
for_followed_width = Domain::Fa::UserFactor.native_factor_width("for_followed")
|
||||
for_follower_width = Domain::Fa::UserFactor.native_factor_width("for_follower")
|
||||
for_followed_width =
|
||||
Domain::Fa::UserFactor.native_factor_width("for_followed")
|
||||
for_follower_width =
|
||||
Domain::Fa::UserFactor.native_factor_width("for_follower")
|
||||
|
||||
measure("#{"for_followed".bold} - done") do
|
||||
write_factors_col(:item_ids, :item_factors, :for_followed)
|
||||
@@ -39,30 +40,31 @@ class Domain::Fa::FactorCalculator
|
||||
def write_factors_col(id_list_name, getter_name, factors_col_name)
|
||||
total = 0
|
||||
id_list = @recommender.send(id_list_name)
|
||||
native_col_width = Domain::Fa::UserFactor.
|
||||
columns_hash[factors_col_name.to_s].
|
||||
sql_type_metadata.
|
||||
limit
|
||||
native_col_width =
|
||||
Domain::Fa::UserFactor.columns_hash[
|
||||
factors_col_name.to_s
|
||||
].sql_type_metadata.limit
|
||||
|
||||
logger.info "#{factors_col_name.to_s.bold} - writing #{id_list.length.to_s.bold} factors"
|
||||
|
||||
id_list.map do |user_id|
|
||||
factors = @recommender.send(getter_name, user_id)
|
||||
padding = [0.0] * (native_col_width - factors.length)
|
||||
{
|
||||
user_id: user_id,
|
||||
factors_col_name => padding + factors.to_a,
|
||||
}
|
||||
end.each_slice(20000) do |chunk|
|
||||
total += chunk.size
|
||||
measure(" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)") do
|
||||
Domain::Fa::UserFactor.upsert_all(
|
||||
chunk,
|
||||
unique_by: :user_id,
|
||||
update_only: factors_col_name,
|
||||
returning: :id,
|
||||
)
|
||||
id_list
|
||||
.map do |user_id|
|
||||
factors = @recommender.send(getter_name, user_id)
|
||||
padding = [0.0] * (native_col_width - factors.length)
|
||||
{ :user_id => user_id, factors_col_name => padding + factors.to_a }
|
||||
end
|
||||
.each_slice(20_000) do |chunk|
|
||||
total += chunk.size
|
||||
measure(
|
||||
" -> wrote chunk of #{chunk.size.to_s.bold} - (#{total.to_s.bold} total)"
|
||||
) do
|
||||
Domain::Fa::UserFactor.upsert_all(
|
||||
chunk,
|
||||
unique_by: :user_id,
|
||||
update_only: factors_col_name,
|
||||
returning: :id
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,17 +2,20 @@ module Domain::Fa::HasCountFailedInQueue
|
||||
extend ActiveSupport::Concern
|
||||
included do
|
||||
def count_failed_in_queue(queue_name)
|
||||
GoodJob::Job.where(
|
||||
queue_name: queue_name,
|
||||
finished_at: nil,
|
||||
performed_at: nil,
|
||||
error: nil,
|
||||
).where(
|
||||
[
|
||||
"(serialized_params->'exception_executions' = '{}')",
|
||||
"(serialized_params->'exception_executions' is null)",
|
||||
].join(" OR ")
|
||||
).count
|
||||
GoodJob::Job
|
||||
.where(
|
||||
queue_name: queue_name,
|
||||
finished_at: nil,
|
||||
performed_at: nil,
|
||||
error: nil
|
||||
)
|
||||
.where(
|
||||
[
|
||||
"(serialized_params->'exception_executions' = '{}')",
|
||||
"(serialized_params->'exception_executions' is null)"
|
||||
].join(" OR ")
|
||||
)
|
||||
.count
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -8,11 +8,12 @@ class Domain::Fa::Parser::ListedSubmissionParserHelper
|
||||
end
|
||||
|
||||
def id
|
||||
@id ||= /\/view\/(\d+)/.match(view_path).try(:[], 1).try(:to_i)
|
||||
@id ||= %r{/view/(\d+)}.match(view_path).try(:[], 1).try(:to_i)
|
||||
end
|
||||
|
||||
def artist
|
||||
@artist ||= if !@new_parse_mode
|
||||
@artist ||=
|
||||
if !@new_parse_mode
|
||||
@elem.css("small a").first.try(:text)
|
||||
else
|
||||
@elem.css("figcaption p").last.css("a").text
|
||||
@@ -20,7 +21,8 @@ class Domain::Fa::Parser::ListedSubmissionParserHelper
|
||||
end
|
||||
|
||||
def artist_user_page_path
|
||||
@artist_user_page_path ||= if !@new_parse_mode
|
||||
@artist_user_page_path ||=
|
||||
if !@new_parse_mode
|
||||
@elem.css("small a").first.try(:[], "href")
|
||||
else
|
||||
@elem.css("figcaption p").last.css("a").first["href"]
|
||||
@@ -32,7 +34,8 @@ class Domain::Fa::Parser::ListedSubmissionParserHelper
|
||||
end
|
||||
|
||||
def title
|
||||
@title ||= if !@new_parse_mode
|
||||
@title ||=
|
||||
if !@new_parse_mode
|
||||
@elem.css("> span").first["title"]
|
||||
else
|
||||
@elem.css("figcaption p").first.css("a").first["title"]
|
||||
@@ -40,7 +43,8 @@ class Domain::Fa::Parser::ListedSubmissionParserHelper
|
||||
end
|
||||
|
||||
def view_path
|
||||
@view_path ||= if !@new_parse_mode
|
||||
@view_path ||=
|
||||
if !@new_parse_mode
|
||||
@elem.css("u s a").first["href"]
|
||||
else
|
||||
@elem.css("b u a").first["href"]
|
||||
@@ -48,7 +52,8 @@ class Domain::Fa::Parser::ListedSubmissionParserHelper
|
||||
end
|
||||
|
||||
def thumb_path
|
||||
@thumb_path ||= if !@new_parse_mode
|
||||
@thumb_path ||=
|
||||
if !@new_parse_mode
|
||||
@elem.css("u s a img").first["src"]
|
||||
else
|
||||
@elem.css("b u a img").first["src"]
|
||||
|
||||
@@ -11,16 +11,19 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
VERSION_2 = :redux
|
||||
|
||||
def initialize(page_html, require_logged_in: true)
|
||||
@page = if page_html.is_a? Nokogiri::HTML::Document
|
||||
@page =
|
||||
if page_html.is_a? Nokogiri::HTML::Document
|
||||
page_html
|
||||
else
|
||||
phtml = page_html.delete("\u0000")
|
||||
@phtml = phtml
|
||||
Nokogiri::HTML(phtml)
|
||||
Nokogiri.HTML(phtml)
|
||||
end
|
||||
@page_version = if @page.css("link[href='/themes/beta/img/favicon.ico']").first
|
||||
@page_version =
|
||||
if @page.css("link[href='/themes/beta/img/favicon.ico']").first
|
||||
VERSION_2
|
||||
elsif @page.css(".submission-list section").first || @page.css("meta[property='twitter:site']").first
|
||||
elsif @page.css(".submission-list section").first ||
|
||||
@page.css("meta[property='twitter:site']").first
|
||||
VERSION_1
|
||||
else
|
||||
VERSION_0
|
||||
@@ -40,19 +43,18 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
def submission_not_found?
|
||||
# the username elem is never shown on a "not found" page
|
||||
return false if logged_in_user_elem
|
||||
not_found_text = "The submission you are trying to find is not in our database"
|
||||
not_found_text =
|
||||
"The submission you are trying to find is not in our database"
|
||||
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@page.css("body .section-body")&.
|
||||
first&.
|
||||
text&.
|
||||
include?(not_found_text)
|
||||
@page.css("body .section-body")&.first&.text&.include?(not_found_text)
|
||||
else
|
||||
@page.css("table.maintable td.alt1 font[size=1]")&.
|
||||
first&.
|
||||
text&.
|
||||
include?(not_found_text)
|
||||
@page
|
||||
.css("table.maintable td.alt1 font[size=1]")
|
||||
&.first
|
||||
&.text
|
||||
&.include?(not_found_text)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -61,31 +63,41 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def submissions_parsed
|
||||
@submissions_hashes ||= submission_elems.map do |elem|
|
||||
Domain::Fa::Parser::ListedSubmissionParserHelper.new(elem, @page_version)
|
||||
end
|
||||
@submissions_hashes ||=
|
||||
submission_elems.map do |elem|
|
||||
Domain::Fa::Parser::ListedSubmissionParserHelper.new(
|
||||
elem,
|
||||
@page_version
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def favorites_next_button_id
|
||||
button = @page.css(".gallery-section .pagination a.button.right").first
|
||||
if button
|
||||
href = button["href"]
|
||||
match = /\/favorites\/.+\/(\d+)\/next\/?/.match(href)
|
||||
match = %r{/favorites/.+/(\d+)/next/?}.match(href)
|
||||
raise("invalid favs button uri #{href}") unless match
|
||||
match[1].to_i
|
||||
end
|
||||
end
|
||||
|
||||
def submission_folders
|
||||
@submission_folders ||= @page.css(".folder-list a.dotted").map do |folder_link|
|
||||
{ href: folder_link["href"], title: folder_link.text }
|
||||
end
|
||||
@submission_folders ||=
|
||||
@page
|
||||
.css(".folder-list a.dotted")
|
||||
.map do |folder_link|
|
||||
{ href: folder_link["href"], title: folder_link.text }
|
||||
end
|
||||
end
|
||||
|
||||
def submission_elems
|
||||
@submission_elems ||= case @page_version
|
||||
when VERSION_0 then @page.css(".t-image")
|
||||
when VERSION_1 then @page.css(".submission-list > .gallery > figure")
|
||||
@submission_elems ||=
|
||||
case @page_version
|
||||
when VERSION_0
|
||||
@page.css(".t-image")
|
||||
when VERSION_1
|
||||
@page.css(".submission-list > .gallery > figure")
|
||||
when VERSION_2
|
||||
[
|
||||
# user gallery pages
|
||||
@@ -98,11 +110,10 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
"#gallery-frontpage-submissions > figure",
|
||||
"#gallery-frontpage-writing > figure",
|
||||
"#gallery-frontpage-music > figure",
|
||||
"#gallery-frontpage-crafts > figure",
|
||||
].lazy.map do |css|
|
||||
@page.css(css)
|
||||
end.reject(&:empty?).to_a.flatten
|
||||
else unimplemented_version!
|
||||
"#gallery-frontpage-crafts > figure"
|
||||
].lazy.map { |css| @page.css(css) }.reject(&:empty?).to_a.flatten
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -118,7 +129,8 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def logged_in_user_elem
|
||||
@logged_in_user_elem ||= case @page_version
|
||||
@logged_in_user_elem ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@page.css("li a#my-username").first
|
||||
when VERSION_2
|
||||
@@ -132,17 +144,29 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
# center.flow seems to wrap all lists of submissions
|
||||
case @page_version
|
||||
when VERSION_0
|
||||
first_matching_css(@page, [".submission-list", "td > section.gallery"]) ? true : false
|
||||
if first_matching_css(@page, [".submission-list", "td > section.gallery"])
|
||||
true
|
||||
else
|
||||
false
|
||||
end
|
||||
when VERSION_1
|
||||
(@page.css(".submission-list").first) ? true : false
|
||||
when VERSION_2
|
||||
first_matching_css(@page, [
|
||||
".submission-list",
|
||||
"#gallery-browse",
|
||||
"#gallery-favorites",
|
||||
"#gallery-frontpage-submissions",
|
||||
]) ? true : false
|
||||
else unimplemented_version!
|
||||
if first_matching_css(
|
||||
@page,
|
||||
%w[
|
||||
.submission-list
|
||||
#gallery-browse
|
||||
#gallery-favorites
|
||||
#gallery-frontpage-submissions
|
||||
]
|
||||
)
|
||||
true
|
||||
else
|
||||
false
|
||||
end
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -166,12 +190,18 @@ class Domain::Fa::Parser::Page < Domain::Fa::Parser::Base
|
||||
@page.css("#submissionImg").first ? true : false
|
||||
when VERSION_2
|
||||
@page.css(".submission-content").first ? true : false
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def submission
|
||||
@submission ||= Domain::Fa::Parser::SubmissionParserHelper.new(@page, @phtml, @page_version)
|
||||
@submission ||=
|
||||
Domain::Fa::Parser::SubmissionParserHelper.new(
|
||||
@page,
|
||||
@phtml,
|
||||
@page_version
|
||||
)
|
||||
end
|
||||
|
||||
def user_list
|
||||
@@ -189,7 +219,5 @@ def elem_after_text_match(children, regex)
|
||||
end
|
||||
|
||||
def elem_idx_after_text_match(children, regex)
|
||||
children.find_index do |child|
|
||||
child.text.match(regex)
|
||||
end
|
||||
children.find_index { |child| child.text.match(regex) }
|
||||
end
|
||||
|
||||
@@ -11,7 +11,8 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
|
||||
def id
|
||||
# @elem.css("form[name=myform]").first['action'].split("/").last.to_i
|
||||
@id ||= begin
|
||||
@id ||=
|
||||
begin
|
||||
elem = @elem.css("meta[property='og:url']").first
|
||||
elem["content"].split("/").reject(&:empty?).last.to_i if elem
|
||||
end
|
||||
@@ -28,28 +29,33 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
@elem.css("#page-submission td.cat b").first.text.strip
|
||||
when VERSION_2
|
||||
@elem.css(".submission-title p").first.text.strip
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def artist
|
||||
# @elem.css(".cat a").first.text.strip
|
||||
@artist ||= case @page_version
|
||||
@artist ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#page-submission table.maintable td.cat a").first.text.strip
|
||||
when VERSION_2
|
||||
@elem.css(".submission-id-sub-container a")&.first&.text&.strip
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def artist_user_page_path
|
||||
@artist_user_page_path ||= case @page_version
|
||||
@artist_user_page_path ||=
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@elem.css(".submission-id-sub-container a")&.first["href"]
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("table[align=center] td.cat a")&.first["href"]
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -58,12 +64,14 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def artist_avatar_url
|
||||
@artist_avatar_url ||= case @page_version
|
||||
@artist_avatar_url ||=
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@elem.css(".submission-user-icon.avatar")&.first&.[]("src")
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("a img.avatar")&.first&.[]("src")
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -75,7 +83,8 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
@elem.css("#page-submission td.alt1 .maintable tr .alt1").last.inner_html
|
||||
when VERSION_2
|
||||
@elem.css(".submission-description").first.inner_html
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -87,15 +96,19 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
when VERSION_1
|
||||
@elem.css("#page-submission div b a")[1]["href"].strip
|
||||
when VERSION_2
|
||||
@elem.css("a.button.standard.mobile-fix").find do |elem|
|
||||
elem.text.strip == "Download"
|
||||
end["href"]
|
||||
else unimplemented_version!
|
||||
@elem
|
||||
.css("a.button.standard.mobile-fix")
|
||||
.find { |elem| elem.text.strip == "Download" }[
|
||||
"href"
|
||||
]
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def posted_date
|
||||
@posted_date ||= case @page_version
|
||||
@posted_date ||=
|
||||
case @page_version
|
||||
# when VERSION_0
|
||||
# info_child(6)["title"].strip
|
||||
when VERSION_0, VERSION_1
|
||||
@@ -123,110 +136,138 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
else
|
||||
raise("unable to determine rating")
|
||||
end
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def category
|
||||
@category ||= case @page_version
|
||||
@category ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Category/).text.strip
|
||||
when VERSION_2
|
||||
category_full_str_redux&.split(" / ")&.first&.strip
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def theme
|
||||
@theme ||= case @page_version
|
||||
@theme ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Theme/).text.strip
|
||||
when VERSION_2
|
||||
category_full_str_redux&.split(" / ")&.last&.strip
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
# FA started combining "Category / Theme" string into one
|
||||
def category_full_str_redux
|
||||
@category_full_str_redux ||= case @page_version
|
||||
@category_full_str_redux ||=
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
info_text_value_redux("Category")
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def species
|
||||
@species ||= case @page_version
|
||||
@species ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Species/).try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
info_text_value_redux("Species")
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def gender
|
||||
@gender ||= case @page_version
|
||||
@gender ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Gender/).try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
info_text_value_redux("Gender")
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_favorites
|
||||
@num_favorites ||= case @page_version
|
||||
@num_favorites ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Favorites/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".favorites .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
stats_container_redux
|
||||
.css(".favorites .font-large")
|
||||
.first
|
||||
.text
|
||||
.strip
|
||||
.to_i
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_comments
|
||||
@num_comments ||= case @page_version
|
||||
@num_comments ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Comments/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".comments .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def num_views
|
||||
@num_views ||= case @page_version
|
||||
@num_views ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(info_children, /Views/).text.strip.to_i
|
||||
when VERSION_2
|
||||
stats_container_redux.css(".views .font-large").first.text.strip.to_i
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def resolution_str
|
||||
@resolution_str ||= case @page_version
|
||||
@resolution_str ||=
|
||||
case @page_version
|
||||
when VERSION_0
|
||||
elem_after_text_match(info_children, /Resolution/).try(:text).try(:strip)
|
||||
elem_after_text_match(info_children, /Resolution/).try(:text).try(
|
||||
:strip
|
||||
)
|
||||
when VERSION_1
|
||||
idx = elem_idx_after_text_match(info_children, /Resolution/)
|
||||
info_children[idx + 1].try(:text).try(:strip)
|
||||
when VERSION_2
|
||||
parts = info_text_value_redux("Size").split(" ")
|
||||
parts.first + "x" + parts.last
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def keywords_array
|
||||
@keywords_array ||= case @page_version
|
||||
@keywords_array ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#keywords a").map(&:text)
|
||||
when VERSION_2
|
||||
@elem.css(".tags-row .tags a").map(&:text).map(&:strip)
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -241,12 +282,14 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def information_elem
|
||||
@information_elem ||= case @page_version
|
||||
@information_elem ||=
|
||||
case @page_version
|
||||
# when VERSION_0
|
||||
# @elem.css("td td td td td td.alt1[align=left]").first
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css("#page-submission td td td td.alt1[align=left]").first
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -255,10 +298,14 @@ class Domain::Fa::Parser::SubmissionParserHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def info_text_value_redux(info_section)
|
||||
info_text_elem_redux.
|
||||
css(".highlight").
|
||||
find { |e| e.text == info_section }&.
|
||||
parent&.children&.slice(1..)&.text&.strip
|
||||
info_text_elem_redux
|
||||
.css(".highlight")
|
||||
.find { |e| e.text == info_section }
|
||||
&.parent
|
||||
&.children
|
||||
&.slice(1..)
|
||||
&.text
|
||||
&.strip
|
||||
end
|
||||
|
||||
def stats_container_redux
|
||||
|
||||
@@ -1,15 +1,10 @@
|
||||
class Domain::Fa::Parser::UserListParserHelper
|
||||
User = Struct.new(
|
||||
:name,
|
||||
:url_name,
|
||||
:href,
|
||||
keyword_init: true,
|
||||
)
|
||||
User = Struct.new(:name, :url_name, :href, keyword_init: true)
|
||||
|
||||
def self.user_list(page)
|
||||
page.css(".watch-list .watch-list-items").map do |elem|
|
||||
watch_list_item_to_user_struct(elem)
|
||||
end
|
||||
page
|
||||
.css(".watch-list .watch-list-items")
|
||||
.map { |elem| watch_list_item_to_user_struct(elem) }
|
||||
end
|
||||
|
||||
private
|
||||
@@ -21,10 +16,6 @@ class Domain::Fa::Parser::UserListParserHelper
|
||||
# strip off '/user/' prefix
|
||||
url_name = href.split("/").reject(&:blank?).last
|
||||
|
||||
User.new(
|
||||
name: link.text,
|
||||
url_name: url_name,
|
||||
href: href,
|
||||
)
|
||||
User.new(name: link.text, url_name: url_name, href: href)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -13,13 +13,15 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
"~", # regular user
|
||||
"!", # suspended
|
||||
"-", # banned (permanent)
|
||||
"@", # admin
|
||||
"∞", # deceased
|
||||
"@", # admin
|
||||
"∞" # deceased
|
||||
]
|
||||
|
||||
def name
|
||||
@name ||= begin
|
||||
elem = case @page_version
|
||||
@name ||=
|
||||
begin
|
||||
elem =
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem = @elem.css(".addpad.lead b")
|
||||
when VERSION_2
|
||||
@@ -28,16 +30,15 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
unimplemented_version!
|
||||
end
|
||||
|
||||
name = elem&.
|
||||
first&.
|
||||
text&.
|
||||
strip
|
||||
name = elem&.first&.text&.strip
|
||||
|
||||
if name
|
||||
if @elem.css("userpage-nav-header img.userIcon.type-admin").first
|
||||
# in newer versions of FA user pages, admins have no '@' prefix,
|
||||
# but rather an indicator image icon
|
||||
raise("invalid prefix for admin user name: #{name}") if PREFIXES.include?(name[0])
|
||||
if PREFIXES.include?(name[0])
|
||||
raise("invalid prefix for admin user name: #{name}")
|
||||
end
|
||||
name
|
||||
elsif PREFIXES.include?(name[0])
|
||||
name[1..]
|
||||
@@ -54,27 +55,31 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def account_status
|
||||
@account_status ||= begin
|
||||
@account_status ||=
|
||||
begin
|
||||
if @elem.css("userpage-nav-header img.userIcon.type-admin").first
|
||||
:admin
|
||||
else
|
||||
elem = case @page_version
|
||||
elem =
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@elem.css("userpage-nav-user-details username")
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
name = elem&.
|
||||
first&.
|
||||
text&.
|
||||
strip || ""
|
||||
name = elem&.first&.text&.strip || ""
|
||||
|
||||
case name[0]
|
||||
when "~" then :active
|
||||
when "!" then :suspended
|
||||
when "-" then :banned
|
||||
when "∞" then :deceased
|
||||
else nil
|
||||
when "~"
|
||||
:active
|
||||
when "!"
|
||||
:suspended
|
||||
when "-"
|
||||
:banned
|
||||
when "∞"
|
||||
:deceased
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -85,29 +90,38 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def artist_type
|
||||
@artist_type ||= elem_after_text_match(main_about.children, /Type/).try(:text).try(:strip)
|
||||
@artist_type ||=
|
||||
elem_after_text_match(main_about.children, /Type/).try(:text).try(:strip)
|
||||
end
|
||||
|
||||
def profile_thumb_url
|
||||
@profile_thumb_url ||= case @page_version
|
||||
@profile_thumb_url ||=
|
||||
case @page_version
|
||||
when VERSION_0
|
||||
@elem.css(".addpad.alt1 a img.avatar").first.try(:[], "src")
|
||||
when VERSION_2
|
||||
@elem.css("userpage-nav-avatar a.current img").first.try(:[], "src")
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def registered_since
|
||||
@registered_since ||= case @page_version
|
||||
@registered_since ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
elem_after_text_match(main_about.children, /Registered/).text.strip
|
||||
when VERSION_2
|
||||
date_str = @elem.css("username span").find do |elem|
|
||||
elem&.text&.strip == "Registered:"
|
||||
end&.next_sibling&.text&.strip
|
||||
date_str =
|
||||
@elem
|
||||
.css("username span")
|
||||
.find { |elem| elem&.text&.strip == "Registered:" }
|
||||
&.next_sibling
|
||||
&.text
|
||||
&.strip
|
||||
DateTime.parse(date_str) if date_str
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -144,23 +158,29 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def recent_fav_fa_ids
|
||||
@recent_favs ||= case @page_version
|
||||
@recent_favs ||=
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
@elem.css("#gallery-latest-favorites").first&.css("figure a")&.map do |elem|
|
||||
href = elem["href"]
|
||||
/\/view\/(\d+)/.match(href)[1]&.to_i || raise(
|
||||
"invalid url: #{href}"
|
||||
)
|
||||
end || []
|
||||
else unimplemented_version!
|
||||
@elem
|
||||
.css("#gallery-latest-favorites")
|
||||
.first
|
||||
&.css("figure a")
|
||||
&.map do |elem|
|
||||
href = elem["href"]
|
||||
%r{/view/(\d+)}.match(href)[1]&.to_i ||
|
||||
raise("invalid url: #{href}")
|
||||
end || []
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
RecentUser = Struct.new(:name, :url_name) do
|
||||
def to_a
|
||||
[name, url_name]
|
||||
RecentUser =
|
||||
Struct.new(:name, :url_name) do
|
||||
def to_a
|
||||
[name, url_name]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def recent_watchers
|
||||
@recent_watchers ||= recent_users_for_section("Recent Watchers")
|
||||
@@ -175,54 +195,48 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
def recent_users_for_section(section_name)
|
||||
case @page_version
|
||||
when VERSION_2
|
||||
section_elem = @elem.css(".userpage-section-left").find do |elem|
|
||||
elem.css(".section-header h2")&.first&.text&.strip == section_name
|
||||
end
|
||||
section_elem =
|
||||
@elem
|
||||
.css(".userpage-section-left")
|
||||
.find do |elem|
|
||||
elem.css(".section-header h2")&.first&.text&.strip == section_name
|
||||
end
|
||||
|
||||
section_elem = section_elem.css(".section-body").first
|
||||
section_elem.css("a").map do |link_elem|
|
||||
href = link_elem["href"]
|
||||
url_name = /\/user\/(.+)\//.match(href)&.[](1) || raise(
|
||||
"invalid url: #{href}"
|
||||
)
|
||||
name = link_elem.css(".artist_name").first.text.strip
|
||||
RecentUser.new(name, url_name)
|
||||
end
|
||||
else unimplemented_version!
|
||||
section_elem
|
||||
.css("a")
|
||||
.map do |link_elem|
|
||||
href = link_elem["href"]
|
||||
url_name =
|
||||
%r{/user/(.+)/}.match(href)&.[](1) || raise("invalid url: #{href}")
|
||||
name = link_elem.css(".artist_name").first.text.strip
|
||||
RecentUser.new(name, url_name)
|
||||
end
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def stat_value(legacy_name, redux_idx)
|
||||
legacy_map = if false # old mode?
|
||||
{
|
||||
pvs: 2,
|
||||
subs: 5,
|
||||
crec: 8,
|
||||
cgiv: 11,
|
||||
njr: 14,
|
||||
nfav: 17,
|
||||
}
|
||||
legacy_map =
|
||||
if false # old mode?
|
||||
{ pvs: 2, subs: 5, crec: 8, cgiv: 11, njr: 14, nfav: 17 }
|
||||
else
|
||||
{
|
||||
pvs: 2,
|
||||
subs: 6,
|
||||
crec: 10,
|
||||
cgiv: 14,
|
||||
njr: 18,
|
||||
nfav: 22,
|
||||
}
|
||||
{ pvs: 2, subs: 6, crec: 10, cgiv: 14, njr: 18, nfav: 22 }
|
||||
end
|
||||
|
||||
value = case @page_version
|
||||
value =
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
statistics.children[legacy_map[legacy_name] || raise].text.strip.to_i
|
||||
when VERSION_2
|
||||
statistics.css(".highlight")[redux_idx]&.next_sibling&.text&.strip&.to_i
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
|
||||
# FA databases can be a little weird
|
||||
if value >= (2 ** 32 - 1)
|
||||
if value >= (2**32 - 1)
|
||||
0
|
||||
else
|
||||
value
|
||||
@@ -230,24 +244,30 @@ class Domain::Fa::Parser::UserPageHelper < Domain::Fa::Parser::Base
|
||||
end
|
||||
|
||||
def statistics
|
||||
@statistics ||= case @page_version
|
||||
@statistics ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css(".ldot table tr:last-child td").first
|
||||
when VERSION_2
|
||||
@elem.css(".userpage-layout-right-col-content .userpage-section-right").find do |child|
|
||||
child.css(".section-header h2")&.first&.text&.strip == "Stats"
|
||||
end
|
||||
else unimplemented_version!
|
||||
@elem
|
||||
.css(".userpage-layout-right-col-content .userpage-section-right")
|
||||
.find do |child|
|
||||
child.css(".section-header h2")&.first&.text&.strip == "Stats"
|
||||
end
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
|
||||
def main_about
|
||||
@main_about ||= case @page_version
|
||||
@main_about ||=
|
||||
case @page_version
|
||||
when VERSION_0, VERSION_1
|
||||
@elem.css(".alt1.addpad .ldot").first
|
||||
when VERSION_2
|
||||
@elem.css(".section-body.userpage-profile").first
|
||||
else unimplemented_version!
|
||||
else
|
||||
unimplemented_version!
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -4,42 +4,52 @@ class Domain::Fa::PostEnqueuer
|
||||
include HasMeasureDuration
|
||||
include Domain::Fa::HasCountFailedInQueue
|
||||
|
||||
def initialize(reverse_scan_holes:, start_at:, low_water_mark:, high_water_mark:)
|
||||
def initialize(
|
||||
reverse_scan_holes:,
|
||||
start_at:,
|
||||
low_water_mark:,
|
||||
high_water_mark:
|
||||
)
|
||||
@low_water_mark = low_water_mark
|
||||
@high_water_mark = high_water_mark
|
||||
raise if @high_water_mark <= @low_water_mark
|
||||
@post_iterator = Enumerator.new do |e|
|
||||
if reverse_scan_holes
|
||||
while start_at > 0
|
||||
if !Domain::Fa::Post.exists?(fa_id: start_at)
|
||||
e << [nil, start_at, nil]
|
||||
@post_iterator =
|
||||
Enumerator.new do |e|
|
||||
if reverse_scan_holes
|
||||
while start_at > 0
|
||||
if !Domain::Fa::Post.exists?(fa_id: start_at)
|
||||
e << [nil, start_at, nil]
|
||||
end
|
||||
start_at -= 1
|
||||
end
|
||||
start_at -= 1
|
||||
end
|
||||
else
|
||||
Domain::Fa::Post.
|
||||
where("id >= ?", start_at).
|
||||
where("file_id is null").
|
||||
where(state: "ok").
|
||||
pluck_each(:id, :fa_id, :file_url_str) do |p|
|
||||
e << p
|
||||
else
|
||||
Domain::Fa::Post
|
||||
.where("id >= ?", start_at)
|
||||
.where("file_id is null")
|
||||
.where(state: "ok")
|
||||
.pluck_each(:id, :fa_id, :file_url_str) { |p| e << p }
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def run_once
|
||||
already_enqueued = enqueued_count
|
||||
if already_enqueued <= @low_water_mark
|
||||
to_enqueue = @high_water_mark - already_enqueued
|
||||
logger.info("enqueuing #{to_enqueue.to_s.bold} more posts - #{already_enqueued.to_s.bold} already enqueued")
|
||||
rows = measure(proc { |p| "gather #{p.length.to_s.bold} posts to enqueue" }) do
|
||||
to_enqueue.times.map do
|
||||
@post_iterator.next
|
||||
rescue StopIteration
|
||||
nil
|
||||
end.reject(&:nil?)
|
||||
end
|
||||
logger.info(
|
||||
"enqueuing #{to_enqueue.to_s.bold} more posts - #{already_enqueued.to_s.bold} already enqueued"
|
||||
)
|
||||
rows =
|
||||
measure(proc { |p| "gather #{p.length.to_s.bold} posts to enqueue" }) do
|
||||
to_enqueue
|
||||
.times
|
||||
.map do
|
||||
@post_iterator.next
|
||||
rescue StopIteration
|
||||
nil
|
||||
end
|
||||
.reject(&:nil?)
|
||||
end
|
||||
|
||||
measure("enqueue jobs") do
|
||||
rows.each do |post_id, fa_id, file_url_str|
|
||||
@@ -72,6 +82,6 @@ class Domain::Fa::PostEnqueuer
|
||||
private
|
||||
|
||||
def enqueued_count
|
||||
count_failed_in_queue(["static_file", "fa_post"])
|
||||
count_failed_in_queue(%w[static_file fa_post])
|
||||
end
|
||||
end
|
||||
|
||||
@@ -8,41 +8,35 @@ class Domain::Fa::SqliteExporter
|
||||
end
|
||||
|
||||
def run
|
||||
measure("created tables") do
|
||||
migrate
|
||||
measure("created tables") { migrate }
|
||||
|
||||
measure("drop indexes") { drop_indexes }
|
||||
|
||||
if dump_table?(:users)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa users" }) do
|
||||
dump_fa_users
|
||||
end
|
||||
end
|
||||
|
||||
measure("drop indexes") do
|
||||
drop_indexes
|
||||
if dump_table?(:follows)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa follows" }) do
|
||||
dump_fa_follows
|
||||
end
|
||||
end
|
||||
|
||||
measure(proc do |num|
|
||||
"dumped #{num&.to_s&.bold} fa users"
|
||||
end) do
|
||||
dump_fa_users
|
||||
end if dump_table?(:users)
|
||||
|
||||
measure(proc do |num|
|
||||
"dumped #{num&.to_s&.bold} fa follows"
|
||||
end) do
|
||||
dump_fa_follows
|
||||
end if dump_table?(:follows)
|
||||
|
||||
measure(proc do |num|
|
||||
"dumped #{num&.to_s&.bold} fa favs"
|
||||
end) do
|
||||
dump_fa_favs
|
||||
end if dump_table?(:favs)
|
||||
|
||||
measure(proc do |num|
|
||||
"dumped #{num&.to_s&.bold} fa posts"
|
||||
end) do
|
||||
dump_fa_posts
|
||||
end if dump_table?(:posts)
|
||||
|
||||
measure("created indexes") do
|
||||
create_indexes
|
||||
if dump_table?(:favs)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa favs" }) do
|
||||
dump_fa_favs
|
||||
end
|
||||
end
|
||||
|
||||
if dump_table?(:posts)
|
||||
measure(proc { |num| "dumped #{num&.to_s&.bold} fa posts" }) do
|
||||
dump_fa_posts
|
||||
end
|
||||
end
|
||||
|
||||
measure("created indexes") { create_indexes }
|
||||
ensure
|
||||
@db.close
|
||||
end
|
||||
@@ -110,24 +104,20 @@ class Domain::Fa::SqliteExporter
|
||||
["fa_favs", "user_id", false],
|
||||
["fa_favs", "post_id", false],
|
||||
["fa_posts", "creator_id", false],
|
||||
["fa_posts", "fa_id", true],
|
||||
["fa_posts", "fa_id", true]
|
||||
]
|
||||
|
||||
def create_indexes
|
||||
@db.execute_batch2(INDEXES.map do |table, col, unique|
|
||||
<<-SQL
|
||||
@db.execute_batch2(INDEXES.map { |table, col, unique| <<-SQL }.join("\n"))
|
||||
create #{unique ? "unique" : ""} index if not exists #{col}_on_#{table}
|
||||
on #{table} (#{col});
|
||||
SQL
|
||||
end.join("\n"))
|
||||
end
|
||||
|
||||
def drop_indexes
|
||||
@db.execute_batch2(INDEXES.map do |table, col, unique|
|
||||
<<-SQL
|
||||
@db.execute_batch2(INDEXES.map { |table, col, unique| <<-SQL }.join("\n"))
|
||||
drop index if exists #{col}_on_#{table};
|
||||
SQL
|
||||
end.join("\n"))
|
||||
end
|
||||
|
||||
def dump_fa_users
|
||||
@@ -135,12 +125,10 @@ class Domain::Fa::SqliteExporter
|
||||
model: Domain::Fa::User,
|
||||
table: "fa_users",
|
||||
columns: %w[id url_name name num_favorites registered_at],
|
||||
batch_size: 512,
|
||||
batch_size: 512
|
||||
) do |batch|
|
||||
# format registered_at
|
||||
batch.each do |row|
|
||||
row[4] = row[4]&.iso8601
|
||||
end
|
||||
batch.each { |row| row[4] = row[4]&.iso8601 }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -148,8 +136,8 @@ class Domain::Fa::SqliteExporter
|
||||
dump_table_common(
|
||||
model: Domain::Fa::Follow,
|
||||
table: "fa_follows",
|
||||
columns: ["id", "follower_id", "followed_id"],
|
||||
batch_size: 4096,
|
||||
columns: %w[id follower_id followed_id],
|
||||
batch_size: 4096
|
||||
)
|
||||
end
|
||||
|
||||
@@ -157,8 +145,8 @@ class Domain::Fa::SqliteExporter
|
||||
dump_table_common(
|
||||
model: Domain::Fa::Fav,
|
||||
table: "fa_favs",
|
||||
columns: ["id", "user_id", "post_id"],
|
||||
batch_size: 4096,
|
||||
columns: %w[id user_id post_id],
|
||||
batch_size: 4096
|
||||
)
|
||||
end
|
||||
|
||||
@@ -167,16 +155,19 @@ class Domain::Fa::SqliteExporter
|
||||
model: Domain::Fa::Post.where("file_url_str is not null"),
|
||||
table: "fa_posts",
|
||||
columns: %w[
|
||||
id fa_id title creator_id
|
||||
num_views num_comments num_favorites
|
||||
id
|
||||
fa_id
|
||||
title
|
||||
creator_id
|
||||
num_views
|
||||
num_comments
|
||||
num_favorites
|
||||
posted_at
|
||||
],
|
||||
batch_size: 4096,
|
||||
batch_size: 4096
|
||||
) do |batch|
|
||||
# format posted_at
|
||||
batch.each do |row|
|
||||
row[7] = row[7]&.iso8601
|
||||
end
|
||||
batch.each { |row| row[7] = row[7]&.iso8601 }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -186,7 +177,9 @@ class Domain::Fa::SqliteExporter
|
||||
num_models = 0
|
||||
start_id = max_id(table, "id")
|
||||
start_id += 1 if start_id
|
||||
logger.info("dumping #{table.bold}, start at #{(start_id || "first").to_s.bold}...")
|
||||
logger.info(
|
||||
"dumping #{table.bold}, start at #{(start_id || "first").to_s.bold}..."
|
||||
)
|
||||
inserter = create_inserter(batch_size, table, columns)
|
||||
|
||||
load_duration = 0.0
|
||||
@@ -198,7 +191,12 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
@db.transaction
|
||||
|
||||
pluck_rows(model, columns, start_id: start_id, batch_size: batch_size) do |rows|
|
||||
pluck_rows(
|
||||
model,
|
||||
columns,
|
||||
start_id: start_id,
|
||||
batch_size: batch_size
|
||||
) do |rows|
|
||||
load_duration += Time.now - load_start
|
||||
|
||||
map_start = Time.now
|
||||
@@ -265,11 +263,7 @@ class Domain::Fa::SqliteExporter
|
||||
colss = colss[@bulk_size...]
|
||||
end
|
||||
|
||||
if colss.any?
|
||||
colss.each do |col|
|
||||
insert_single(col)
|
||||
end
|
||||
end
|
||||
colss.each { |col| insert_single(col) } if colss.any?
|
||||
end
|
||||
|
||||
private
|
||||
@@ -301,9 +295,7 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
def bind_bulk(stmt, binds)
|
||||
# stmt.bind_params(binds)
|
||||
binds.each do |arr|
|
||||
bind_single(stmt, arr)
|
||||
end
|
||||
binds.each { |arr| bind_single(stmt, arr) }
|
||||
end
|
||||
|
||||
def dimensionality!
|
||||
@@ -320,7 +312,10 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
relation = relation.all unless relation.is_a?(ActiveRecord::Relation)
|
||||
relation = relation.where("id >= ?", start_id) if start_id
|
||||
relation.pluck_in_batches(*cols.map(&:to_sym), batch_size: batch_size) do |batch|
|
||||
relation.pluck_in_batches(
|
||||
*cols.map(&:to_sym),
|
||||
batch_size: batch_size
|
||||
) do |batch|
|
||||
yield batch
|
||||
|
||||
num_models += batch.size
|
||||
@@ -349,9 +344,7 @@ class Domain::Fa::SqliteExporter
|
||||
|
||||
def dump_table?(table)
|
||||
ret = @tables.include?(:all) || @tables.include?(table)
|
||||
if !ret
|
||||
logger.info("skipping #{table.to_s.bold}...")
|
||||
end
|
||||
logger.info("skipping #{table.to_s.bold}...") if !ret
|
||||
ret
|
||||
end
|
||||
end
|
||||
|
||||
@@ -13,16 +13,17 @@ class Domain::Fa::UserAvatarFixer < LegacyImport::BulkImportJob
|
||||
@processed = 0
|
||||
|
||||
if @url_name
|
||||
user = Domain::Fa::User.find_by(url_name: @url_name) || raise("user not found")
|
||||
user =
|
||||
Domain::Fa::User.find_by(url_name: @url_name) || raise("user not found")
|
||||
process_avatar(user.avatar)
|
||||
else
|
||||
Domain::Fa::UserAvatar.
|
||||
where(state: "no_file_on_guessed_user_page_error").
|
||||
find_each(start: @start_at, batch_size: 5) do |avatar|
|
||||
@processed += 1
|
||||
break if @limit && @processed > @limit
|
||||
process_avatar(avatar)
|
||||
end
|
||||
Domain::Fa::UserAvatar
|
||||
.where(state: "no_file_on_guessed_user_page_error")
|
||||
.find_each(start: @start_at, batch_size: 5) do |avatar|
|
||||
@processed += 1
|
||||
break if @limit && @processed > @limit
|
||||
process_avatar(avatar)
|
||||
end
|
||||
end
|
||||
|
||||
@processed
|
||||
@@ -32,7 +33,10 @@ class Domain::Fa::UserAvatarFixer < LegacyImport::BulkImportJob
|
||||
|
||||
def process_avatar(avatar)
|
||||
user = avatar.user
|
||||
logger.prefix = proc { "[avatar #{avatar.id.to_s.bold}, user #{user.url_name.to_s.bold}]" }
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[avatar #{avatar.id.to_s.bold}, user #{user.url_name.to_s.bold}]"
|
||||
end
|
||||
|
||||
logger.info("guessing...")
|
||||
|
||||
|
||||
@@ -8,30 +8,31 @@ class Domain::Fa::UserEnqueuer
|
||||
@low_water_mark = low_water_mark
|
||||
@high_water_mark = high_water_mark
|
||||
raise if @high_water_mark <= @low_water_mark
|
||||
@user_iterator = Enumerator.new do |e|
|
||||
Domain::Fa::User.
|
||||
where("id >= ?", start_at).
|
||||
find_each do |user|
|
||||
e << user
|
||||
@user_iterator =
|
||||
Enumerator.new do |e|
|
||||
Domain::Fa::User
|
||||
.where("id >= ?", start_at)
|
||||
.find_each { |user| e << user }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def run_once
|
||||
already_enqueued = enqueued_count
|
||||
if already_enqueued <= @low_water_mark
|
||||
to_enqueue = @high_water_mark - already_enqueued
|
||||
logger.info("enqueuing #{to_enqueue.to_s.bold} more users - #{already_enqueued.to_s.bold} already enqueued")
|
||||
rows = measure(proc { |p| "gather #{p.length.to_s.bold} users to enqueue" }) do
|
||||
to_enqueue.times.map do
|
||||
@user_iterator.next
|
||||
logger.info(
|
||||
"enqueuing #{to_enqueue.to_s.bold} more users - #{already_enqueued.to_s.bold} already enqueued"
|
||||
)
|
||||
rows =
|
||||
measure(proc { |p| "gather #{p.length.to_s.bold} users to enqueue" }) do
|
||||
to_enqueue.times.map { @user_iterator.next }
|
||||
end
|
||||
end
|
||||
measure("enqueue jobs") do
|
||||
rows.each do |user|
|
||||
types = []
|
||||
if user.state == "ok"
|
||||
if user.due_for_favs_scan? || user.due_for_page_scan? || user.due_for_follows_scan?
|
||||
if user.due_for_favs_scan? || user.due_for_page_scan? ||
|
||||
user.due_for_follows_scan?
|
||||
Domain::Fa::Job::UserIncrementalJob.perform_later({ user: user })
|
||||
types << "incremental"
|
||||
end
|
||||
@@ -82,16 +83,14 @@ class Domain::Fa::UserEnqueuer
|
||||
return SpecUtil.enqueued_jobs(Domain::Fa::Job::UserFollowsJob).count
|
||||
end
|
||||
|
||||
[
|
||||
"fa_post",
|
||||
"fa_user_avatar",
|
||||
"fa_user_favs",
|
||||
"fa_user_follows",
|
||||
"fa_user_gallery",
|
||||
"fa_user_page",
|
||||
"static_file",
|
||||
].map do |queue_name|
|
||||
count_failed_in_queue(queue_name)
|
||||
end.max
|
||||
%w[
|
||||
fa_post
|
||||
fa_user_avatar
|
||||
fa_user_favs
|
||||
fa_user_follows
|
||||
fa_user_gallery
|
||||
fa_user_page
|
||||
static_file
|
||||
].map { |queue_name| count_failed_in_queue(queue_name) }.max
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,56 +2,63 @@ class ForkFuture
|
||||
def initialize(&block)
|
||||
read, write = ::IO.pipe
|
||||
@read = read
|
||||
pid = ::Process.fork do
|
||||
start = Time.now
|
||||
read.close
|
||||
begin
|
||||
result = block.call
|
||||
rescue
|
||||
result = RuntimeError.new([$!.message, $!.backtrace])
|
||||
pid =
|
||||
::Process.fork do
|
||||
start = Time.now
|
||||
read.close
|
||||
begin
|
||||
result = block.call
|
||||
rescue StandardError
|
||||
result = RuntimeError.new([$!.message, $!.backtrace])
|
||||
end
|
||||
duration = Time.now - start
|
||||
::Marshal.dump({ duration: duration, result: result }, write)
|
||||
::Process.exit!(true)
|
||||
end
|
||||
duration = Time.now - start
|
||||
::Marshal.dump({
|
||||
duration: duration,
|
||||
result: result,
|
||||
}, write)
|
||||
::Process.exit!(true)
|
||||
end
|
||||
write.close
|
||||
end
|
||||
|
||||
def self.parallel_map(num_processes, enumerator, &block)
|
||||
ForkFuture.each_slice_impl(num_processes, enumerator).map do |slice|
|
||||
ForkFuture.new do
|
||||
slice.map(&block)
|
||||
end
|
||||
end.to_a.map(&:join).flatten(1)
|
||||
ForkFuture
|
||||
.each_slice_impl(num_processes, enumerator)
|
||||
.map { |slice| ForkFuture.new { slice.map(&block) } }
|
||||
.to_a
|
||||
.map(&:join)
|
||||
.flatten(1)
|
||||
end
|
||||
|
||||
def self.parallel_each(num_processes, enumerator, &block)
|
||||
ForkFuture.each_slice_impl(num_processes, enumerator).map do |slice|
|
||||
ForkFuture.new do
|
||||
slice.each(&block)
|
||||
nil
|
||||
ForkFuture
|
||||
.each_slice_impl(num_processes, enumerator)
|
||||
.map do |slice|
|
||||
ForkFuture.new do
|
||||
slice.each(&block)
|
||||
nil
|
||||
end
|
||||
end
|
||||
end.to_a.map(&:join)
|
||||
.to_a
|
||||
.map(&:join)
|
||||
end
|
||||
|
||||
def self.parallel_each_slice(num_processes, enumerator, &block)
|
||||
ForkFuture.each_slice_impl(num_processes, enumerator).map do |slice|
|
||||
ForkFuture.new do
|
||||
block.call(slice)
|
||||
nil
|
||||
ForkFuture
|
||||
.each_slice_impl(num_processes, enumerator)
|
||||
.map do |slice|
|
||||
ForkFuture.new do
|
||||
block.call(slice)
|
||||
nil
|
||||
end
|
||||
end
|
||||
end.to_a.map(&:join)
|
||||
.to_a
|
||||
.map(&:join)
|
||||
end
|
||||
|
||||
def self.parallel_map_slice(num_processes, enumerator, &block)
|
||||
ForkFuture.each_slice_impl(num_processes, enumerator).map do |slice|
|
||||
ForkFuture.new do
|
||||
block.call(slice)
|
||||
end
|
||||
end.to_a.map(&:join)
|
||||
ForkFuture
|
||||
.each_slice_impl(num_processes, enumerator)
|
||||
.map { |slice| ForkFuture.new { block.call(slice) } }
|
||||
.to_a
|
||||
.map(&:join)
|
||||
end
|
||||
|
||||
def join
|
||||
@@ -77,7 +84,8 @@ class ForkFuture
|
||||
end
|
||||
|
||||
def wait!
|
||||
@result ||= begin
|
||||
@result ||=
|
||||
begin
|
||||
result_buffer = @read.read
|
||||
@read.close
|
||||
::Marshal.load(result_buffer)
|
||||
|
||||
@@ -7,17 +7,18 @@ module HasBulkEnqueueJobs
|
||||
old_limit = Scraper::JobBase.good_job_concurrency_config[:total_limit]
|
||||
Scraper::JobBase.good_job_concurrency_config[:total_limit] = nil
|
||||
|
||||
key_to_job = GoodJob::Bulk.capture(&block).map do |job|
|
||||
[job.good_job_concurrency_key, job]
|
||||
end.to_h
|
||||
key_to_job =
|
||||
GoodJob::Bulk
|
||||
.capture(&block)
|
||||
.map { |job| [job.good_job_concurrency_key, job] }
|
||||
.to_h
|
||||
|
||||
ReduxApplicationRecord.transaction do
|
||||
existing_keys = GoodJob::Job.
|
||||
where(concurrency_key: key_to_job.keys).
|
||||
pluck(:concurrency_key)
|
||||
existing_keys.each do |key|
|
||||
key_to_job.delete(key)
|
||||
end
|
||||
existing_keys =
|
||||
GoodJob::Job.where(concurrency_key: key_to_job.keys).pluck(
|
||||
:concurrency_key
|
||||
)
|
||||
existing_keys.each { |key| key_to_job.delete(key) }
|
||||
GoodJob::Bulk.enqueue(key_to_job.values)
|
||||
end
|
||||
ensure
|
||||
|
||||
@@ -10,9 +10,7 @@ module HasColorLogger
|
||||
end
|
||||
|
||||
included do
|
||||
define_method(:logger) do
|
||||
@logger ||= ColorLogger.make(sink, self)
|
||||
end
|
||||
define_method(:logger) { @logger ||= ColorLogger.make(sink, self) }
|
||||
|
||||
define_singleton_method(:logger) do
|
||||
@logger ||= ColorLogger.make(sink, self.name)
|
||||
@@ -23,7 +21,5 @@ module HasColorLogger
|
||||
|
||||
# by default, write to stdout
|
||||
extend ActiveSupport::Concern
|
||||
included do
|
||||
include HasColorLogger[$stdout]
|
||||
end
|
||||
included { include HasColorLogger[$stdout] }
|
||||
end
|
||||
|
||||
@@ -9,8 +9,8 @@ class HexUtil
|
||||
units = %w[B KiB MiB GiB TiB Pib EiB ZiB]
|
||||
return "0.0 B" if size == 0
|
||||
exp = (Math.log(size) / Math.log(1024)).to_i
|
||||
exp += 1 if (size.to_f / 1024 ** exp >= 1024 - 0.05)
|
||||
exp += 1 if (size.to_f / 1024**exp >= 1024 - 0.05)
|
||||
exp = units.size - 1 if exp > units.size - 1
|
||||
"%.1f %s" % [size.to_f / 1024 ** exp, units[exp]]
|
||||
"%.1f %s" % [size.to_f / 1024**exp, units[exp]]
|
||||
end
|
||||
end
|
||||
|
||||
@@ -45,15 +45,15 @@ class LegacyImport::AdaptiveCache
|
||||
@candidates.insert(idx, new_entry)
|
||||
end
|
||||
|
||||
while @candidates.size > @max_size
|
||||
@candidates.pop
|
||||
end
|
||||
@candidates.pop while @candidates.size > @max_size
|
||||
end
|
||||
|
||||
def to_s
|
||||
@candidates.map do |entry|
|
||||
" - #{entry.score.round(1)} score, id #{entry.id} - #{entry.extra}"
|
||||
end.join("\n")
|
||||
@candidates
|
||||
.map do |entry|
|
||||
" - #{entry.score.round(1)} score, id #{entry.id} - #{entry.extra}"
|
||||
end
|
||||
.join("\n")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
@@ -21,15 +21,22 @@ class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
|
||||
query = ::Legacy::E621::Post.includes(:blob_entry, { taggings: :tag })
|
||||
# finish = @start_at + (2 * 32 * 32)
|
||||
finish = nil
|
||||
query.find_in_batches(start: @start_at, finish: finish, batch_size: @batch_size * @forks) do |batch|
|
||||
query.find_in_batches(
|
||||
start: @start_at,
|
||||
finish: finish,
|
||||
batch_size: @batch_size * @forks
|
||||
) do |batch|
|
||||
last_id = batch.last&.id
|
||||
|
||||
if @forks <= 1
|
||||
progress += import_e621_posts(batch)
|
||||
else
|
||||
progress += ForkFuture.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_e621_posts(fork_batch)
|
||||
end.sum
|
||||
progress +=
|
||||
ForkFuture
|
||||
.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_e621_posts(fork_batch)
|
||||
end
|
||||
.sum
|
||||
end
|
||||
|
||||
rate = progress.to_f / (Time.now - @start_time)
|
||||
@@ -44,11 +51,13 @@ class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
|
||||
def import_e621_posts(legacy_posts)
|
||||
progress = 0
|
||||
|
||||
skip_posts_ids = Set.new(
|
||||
::Domain::E621::Post.select(:e621_id).
|
||||
where(e621_id: legacy_posts.map(&:e621_id)).
|
||||
pluck(:e621_id)
|
||||
)
|
||||
skip_posts_ids =
|
||||
Set.new(
|
||||
::Domain::E621::Post
|
||||
.select(:e621_id)
|
||||
.where(e621_id: legacy_posts.map(&:e621_id))
|
||||
.pluck(:e621_id)
|
||||
)
|
||||
|
||||
legacy_posts.reject! do |legacy_post|
|
||||
skip_posts_ids.include?(legacy_post.e621_id)
|
||||
@@ -60,12 +69,14 @@ class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
|
||||
ReduxApplicationRecord.transaction do
|
||||
post = ::Domain::E621::Post.find_or_build_from_legacy(legacy_post)
|
||||
unless post.valid?
|
||||
raise("error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}")
|
||||
raise(
|
||||
"error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}"
|
||||
)
|
||||
end
|
||||
post.save!
|
||||
progress += 1
|
||||
end
|
||||
rescue
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
sleep 0.1 and retry if retries < 3
|
||||
raise
|
||||
|
||||
@@ -27,9 +27,12 @@ class LegacyImport::FaPostImporter < LegacyImport::BulkImportJob
|
||||
if @forks <= 1
|
||||
progress += import_fa_posts(batch)
|
||||
else
|
||||
progress += ForkFuture.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_fa_posts(fork_batch)
|
||||
end.sum
|
||||
progress +=
|
||||
ForkFuture
|
||||
.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_fa_posts(fork_batch)
|
||||
end
|
||||
.sum
|
||||
end
|
||||
|
||||
rate = progress.to_f / (Time.now - @start_time)
|
||||
@@ -42,12 +45,14 @@ class LegacyImport::FaPostImporter < LegacyImport::BulkImportJob
|
||||
|
||||
def import_fa_posts(legacy_posts)
|
||||
progress = 0
|
||||
skip_posts_ids = Set.new(
|
||||
::Domain::Fa::Post.select(:fa_id, :creator_id).
|
||||
where(fa_id: legacy_posts.map(&:fa_id)).
|
||||
where("creator_id is not null").
|
||||
pluck(:fa_id)
|
||||
)
|
||||
skip_posts_ids =
|
||||
Set.new(
|
||||
::Domain::Fa::Post
|
||||
.select(:fa_id, :creator_id)
|
||||
.where(fa_id: legacy_posts.map(&:fa_id))
|
||||
.where("creator_id is not null")
|
||||
.pluck(:fa_id)
|
||||
)
|
||||
|
||||
legacy_posts.reject! do |legacy_post|
|
||||
skip_posts_ids.include?(legacy_post.fa_id)
|
||||
@@ -59,12 +64,14 @@ class LegacyImport::FaPostImporter < LegacyImport::BulkImportJob
|
||||
ReduxApplicationRecord.transaction do
|
||||
post = ::Domain::Fa::Post.find_or_build_from_legacy(legacy_post)
|
||||
unless post.valid?
|
||||
raise(" !! error building post #{post.id} / #{post.fa_id}: #{post.errors.full_messages}")
|
||||
raise(
|
||||
" !! error building post #{post.id} / #{post.fa_id}: #{post.errors.full_messages}"
|
||||
)
|
||||
end
|
||||
post.save!
|
||||
progress += 1
|
||||
end
|
||||
rescue
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
sleep 0.1 and retry if retries < 3
|
||||
raise
|
||||
|
||||
@@ -14,9 +14,10 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
|
||||
# key is content_type|domain
|
||||
# value is the adaptive cache
|
||||
@blob_entry_cache = Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(cache_size, 1.0, 0.1)
|
||||
end
|
||||
@blob_entry_cache =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(cache_size, 1.0, 0.1)
|
||||
end
|
||||
end
|
||||
|
||||
def run
|
||||
@@ -28,23 +29,24 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
start_at = Time.now
|
||||
|
||||
last_model_id = nil
|
||||
stats_printer = Thread.new do
|
||||
Thread.current.name = "stats-printer"
|
||||
i = 0
|
||||
loop do
|
||||
sleep 3
|
||||
duration = Time.now - start_at
|
||||
rate = @insert_stats.http_entries_inserted / duration
|
||||
hr
|
||||
puts "insert stats: #{@insert_stats} - " +
|
||||
"#{rate.round(2)}/sec (last id: #{last_model_id})"
|
||||
i += 1
|
||||
if i % 5 == 0
|
||||
stats_printer =
|
||||
Thread.new do
|
||||
Thread.current.name = "stats-printer"
|
||||
i = 0
|
||||
loop do
|
||||
sleep 3
|
||||
duration = Time.now - start_at
|
||||
rate = @insert_stats.http_entries_inserted / duration
|
||||
hr
|
||||
dump_timings
|
||||
puts "insert stats: #{@insert_stats} - " +
|
||||
"#{rate.round(2)}/sec (last id: #{last_model_id})"
|
||||
i += 1
|
||||
if i % 5 == 0
|
||||
hr
|
||||
dump_timings
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
query = Legacy::HttpLogEntry.order(id: :asc)
|
||||
|
||||
@@ -52,7 +54,7 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
query.find_in_batches(
|
||||
batch_size: @batch_size,
|
||||
start: @start_id,
|
||||
finish: @end_id,
|
||||
finish: @end_id
|
||||
) do |legacy_models|
|
||||
@timings.finish :bulk_load
|
||||
import_legacy_models(legacy_models)
|
||||
@@ -98,68 +100,82 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
private
|
||||
|
||||
def dump_timings
|
||||
tp(@timings.entries.map do |entry|
|
||||
{
|
||||
key: entry[:key],
|
||||
duration: "#{entry[:key_secs].round(1)} sec",
|
||||
percent: "#{(100 * entry[:proportion]).round(1)}%".rjust(5),
|
||||
}
|
||||
end)
|
||||
tp(
|
||||
@timings.entries.map do |entry|
|
||||
{
|
||||
key: entry[:key],
|
||||
duration: "#{entry[:key_secs].round(1)} sec",
|
||||
percent: "#{(100 * entry[:proportion]).round(1)}%".rjust(5)
|
||||
}
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
def import_legacy_models(legacy_models)
|
||||
@timings.start :lookup_existing_http
|
||||
already_exist_ids = ::HttpLogEntry.
|
||||
where(id: legacy_models.map(&:id)).
|
||||
pluck(:id).
|
||||
to_set
|
||||
already_exist_ids =
|
||||
::HttpLogEntry.where(id: legacy_models.map(&:id)).pluck(:id).to_set
|
||||
@timings.finish :lookup_existing_http
|
||||
|
||||
# ignore the models which have no stored content (for now)
|
||||
@timings.start :reject_empty_legacy
|
||||
legacy_models = ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
next nil if already_exist_ids.include?(legacy_model.id)
|
||||
begin
|
||||
next nil if legacy_model.response_body.nil?
|
||||
rescue
|
||||
puts "legacy model #{legacy_model.id} (#{legacy_model.full_path}): error reading response body"
|
||||
next nil
|
||||
end
|
||||
legacy_models =
|
||||
ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
next nil if already_exist_ids.include?(legacy_model.id)
|
||||
begin
|
||||
next nil if legacy_model.response_body.nil?
|
||||
rescue StandardError
|
||||
puts "legacy model #{legacy_model.id} (#{legacy_model.full_path}): error reading response body"
|
||||
next nil
|
||||
end
|
||||
|
||||
# legacy model now has response body loaded
|
||||
legacy_model
|
||||
end
|
||||
# legacy model now has response body loaded
|
||||
legacy_model
|
||||
end
|
||||
legacy_models.reject!(&:nil?)
|
||||
@timings.finish :reject_empty_legacy
|
||||
|
||||
blob_creation_future = ForkFuture.new do
|
||||
bulk_import_blob_entries(legacy_models)
|
||||
end
|
||||
header_creation_future = ForkFuture.new do
|
||||
bulk_import_headers(legacy_models)
|
||||
end
|
||||
blob_creation_future =
|
||||
ForkFuture.new { bulk_import_blob_entries(legacy_models) }
|
||||
header_creation_future =
|
||||
ForkFuture.new { bulk_import_headers(legacy_models) }
|
||||
|
||||
insert_stats, timings, cache_ops, legacy_model_id_to_response_sha256 = blob_creation_future.join
|
||||
insert_stats, timings, cache_ops, legacy_model_id_to_response_sha256 =
|
||||
blob_creation_future.join
|
||||
@insert_stats.merge!(insert_stats)
|
||||
@timings.merge!(timings)
|
||||
cache_ops.each do |op|
|
||||
@blob_entry_cache[op[0]].send(op[1], *op[2..])
|
||||
end
|
||||
cache_ops.each { |op| @blob_entry_cache[op[0]].send(op[1], *op[2..]) }
|
||||
|
||||
insert_stats, timings, legacy_model_id_to_header_sha256s, header_sha256_to_header_id = header_creation_future.join
|
||||
insert_stats,
|
||||
timings,
|
||||
legacy_model_id_to_header_sha256s,
|
||||
header_sha256_to_header_id =
|
||||
header_creation_future.join
|
||||
@insert_stats.merge!(insert_stats)
|
||||
@timings.merge!(timings)
|
||||
|
||||
@timings.start :build_new_https
|
||||
http_models = legacy_models.map do |legacy_model|
|
||||
request_headers_id = header_sha256_to_header_id[legacy_model_id_to_header_sha256s[legacy_model.id][:req_sha256]]
|
||||
response_headers_id = header_sha256_to_header_id[legacy_model_id_to_header_sha256s[legacy_model.id][:res_sha256]]
|
||||
response_sha256 = legacy_model_id_to_response_sha256[legacy_model.id]
|
||||
request_headers_id || raise("no request header id")
|
||||
response_headers_id || raise("no response header id")
|
||||
response_sha256 || raise("no response sha256")
|
||||
build_http_log_entry(legacy_model, request_headers_id, response_headers_id, response_sha256)
|
||||
end
|
||||
http_models =
|
||||
legacy_models.map do |legacy_model|
|
||||
request_headers_id =
|
||||
header_sha256_to_header_id[
|
||||
legacy_model_id_to_header_sha256s[legacy_model.id][:req_sha256]
|
||||
]
|
||||
response_headers_id =
|
||||
header_sha256_to_header_id[
|
||||
legacy_model_id_to_header_sha256s[legacy_model.id][:res_sha256]
|
||||
]
|
||||
response_sha256 = legacy_model_id_to_response_sha256[legacy_model.id]
|
||||
request_headers_id || raise("no request header id")
|
||||
response_headers_id || raise("no response header id")
|
||||
response_sha256 || raise("no response sha256")
|
||||
build_http_log_entry(
|
||||
legacy_model,
|
||||
request_headers_id,
|
||||
response_headers_id,
|
||||
response_sha256
|
||||
)
|
||||
end
|
||||
@timings.finish :build_new_https
|
||||
|
||||
@timings.start :insert_new_https
|
||||
@@ -179,78 +195,113 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
# compute all blob entries for the legacy models, removing duplicates
|
||||
timings.start :lookup_existing_bes
|
||||
|
||||
legacy_model_id_to_response_sha256 = legacy_models.map do |m|
|
||||
[m.id, Digest::SHA256.digest(m.response_body)]
|
||||
end.to_h
|
||||
legacy_model_id_to_response_sha256 =
|
||||
legacy_models
|
||||
.map { |m| [m.id, Digest::SHA256.digest(m.response_body)] }
|
||||
.to_h
|
||||
|
||||
sha256_to_existing_blob_entry = ::BlobEntryP.where(sha256: legacy_model_id_to_response_sha256.values).map do |be|
|
||||
[be.sha256, be]
|
||||
end.to_h
|
||||
sha256_to_existing_blob_entry =
|
||||
::BlobEntryP
|
||||
.where(sha256: legacy_model_id_to_response_sha256.values)
|
||||
.map { |be| [be.sha256, be] }
|
||||
.to_h
|
||||
timings.finish :lookup_existing_bes
|
||||
|
||||
timings.start :build_new_bes
|
||||
blob_entries_to_insert = ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
sha256 = legacy_model_id_to_response_sha256[legacy_model.id] || raise
|
||||
next nil if sha256_to_existing_blob_entry[sha256]
|
||||
blob_entries_to_insert =
|
||||
ForkFuture
|
||||
.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
sha256 = legacy_model_id_to_response_sha256[legacy_model.id] || raise
|
||||
next nil if sha256_to_existing_blob_entry[sha256]
|
||||
|
||||
content_type = legacy_model.content_type
|
||||
cache_key = "#{legacy_model.host}|#{content_type}"
|
||||
cache = @blob_entry_cache[cache_key]
|
||||
content_type = legacy_model.content_type
|
||||
cache_key = "#{legacy_model.host}|#{content_type}"
|
||||
cache = @blob_entry_cache[cache_key]
|
||||
|
||||
# N% chance (if we're not at cache capacity) to not supply any candidates,
|
||||
# to give new entries in the cache a chance to replace poor performing ones
|
||||
candidates = if cache.at_capacity? # && rand(0..100) >= 5
|
||||
cache.candidates
|
||||
else
|
||||
[]
|
||||
# N% chance (if we're not at cache capacity) to not supply any candidates,
|
||||
# to give new entries in the cache a chance to replace poor performing ones
|
||||
candidates =
|
||||
if cache.at_capacity? # && rand(0..100) >= 5
|
||||
cache.candidates
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
blob_entry =
|
||||
::BlobEntryP.build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: legacy_model.response_body,
|
||||
candidates: candidates
|
||||
)
|
||||
|
||||
# reward the base if it was used, if not, insert this blob into the
|
||||
# cache so it'll be a future candidate (unless it's not a new model)
|
||||
# cache keys are hex encoded for easier viewing / debugging
|
||||
cache_op = nil
|
||||
if !blob_entry.persisted? && @cache_size > 0
|
||||
if blob_entry.base_sha256
|
||||
cache_op = [
|
||||
cache_key,
|
||||
:reward,
|
||||
HexUtil.bin2hex(blob_entry.base_sha256)[0..8]
|
||||
]
|
||||
else
|
||||
cache_op = [
|
||||
cache_key,
|
||||
:insert,
|
||||
HexUtil.bin2hex(blob_entry.sha256)[0..8],
|
||||
blob_entry,
|
||||
legacy_model.full_path
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
blob_entry.valid? ||
|
||||
raise(
|
||||
"invalid blob entry (legacy model id #{legacy_model.id}): #{blob_entry.errors.full_messages}"
|
||||
)
|
||||
cache.send(cache_op[1], *cache_op[2..]) if cache_op
|
||||
[blob_entry, cache_op]
|
||||
end
|
||||
|
||||
blob_entry = ::BlobEntryP.build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: legacy_model.response_body,
|
||||
candidates: candidates,
|
||||
)
|
||||
|
||||
# reward the base if it was used, if not, insert this blob into the
|
||||
# cache so it'll be a future candidate (unless it's not a new model)
|
||||
# cache keys are hex encoded for easier viewing / debugging
|
||||
cache_op = nil
|
||||
if !blob_entry.persisted? && @cache_size > 0
|
||||
if blob_entry.base_sha256
|
||||
cache_op = [cache_key, :reward, HexUtil.bin2hex(blob_entry.base_sha256)[0..8]]
|
||||
else
|
||||
cache_op = [cache_key, :insert, HexUtil.bin2hex(blob_entry.sha256)[0..8], blob_entry, legacy_model.full_path]
|
||||
.reject(&:nil?)
|
||||
.map do |pair|
|
||||
blob_entry = pair[0]
|
||||
cache_op = pair[1]
|
||||
merged_cache_ops << cache_op if cache_op
|
||||
blob_entry
|
||||
end
|
||||
end
|
||||
|
||||
blob_entry.valid? || raise("invalid blob entry (legacy model id #{legacy_model.id}): #{blob_entry.errors.full_messages}")
|
||||
cache.send(cache_op[1], *cache_op[2..]) if cache_op
|
||||
[blob_entry, cache_op]
|
||||
end.reject(&:nil?).map do |pair|
|
||||
blob_entry = pair[0]
|
||||
cache_op = pair[1]
|
||||
merged_cache_ops << cache_op if cache_op
|
||||
blob_entry
|
||||
end.uniq do |blob_entry|
|
||||
blob_entry.sha256
|
||||
end
|
||||
.uniq { |blob_entry| blob_entry.sha256 }
|
||||
timings.finish :build_new_bes
|
||||
|
||||
# bulk-insert all the new blob entries
|
||||
timings.start :insert_new_bes
|
||||
slice_size = [(blob_entries_to_insert.size.to_f / @fork_amount).ceil, 1].max
|
||||
blob_entries_to_insert.each_slice(slice_size).map do |slice|
|
||||
ForkFuture.new do
|
||||
BlobEntryP.insert_all!(slice.map(&:to_bulk_insert_hash)) if slice.any?
|
||||
end
|
||||
end.to_a.map(&:join) if blob_entries_to_insert.any?
|
||||
if blob_entries_to_insert.any?
|
||||
blob_entries_to_insert
|
||||
.each_slice(slice_size)
|
||||
.map do |slice|
|
||||
ForkFuture.new do
|
||||
if slice.any?
|
||||
BlobEntryP.insert_all!(slice.map(&:to_bulk_insert_hash))
|
||||
end
|
||||
end
|
||||
end
|
||||
.to_a
|
||||
.map(&:join)
|
||||
end
|
||||
insert_stats.blob_entries_inserted += blob_entries_to_insert.size
|
||||
insert_stats.bytes_length += blob_entries_to_insert.map(&:contents).map(&:size).sum
|
||||
insert_stats.bytes_length +=
|
||||
blob_entries_to_insert.map(&:contents).map(&:size).sum
|
||||
insert_stats.bytes_stored += blob_entries_to_insert.map(&:bytes_stored).sum
|
||||
timings.finish :insert_new_bes
|
||||
|
||||
[insert_stats, timings, merged_cache_ops, legacy_model_id_to_response_sha256]
|
||||
[
|
||||
insert_stats,
|
||||
timings,
|
||||
merged_cache_ops,
|
||||
legacy_model_id_to_response_sha256
|
||||
]
|
||||
end
|
||||
|
||||
def bulk_import_headers(legacy_models)
|
||||
@@ -260,75 +311,95 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
timings.start :build_new_headers
|
||||
header_sha256_to_header_model = {}
|
||||
legacy_model_id_to_header_sha256s =
|
||||
ForkFuture.parallel_map(@fork_amount / 2, legacy_models) do |legacy_model|
|
||||
req_headers = ::HttpLogEntryHeader.build_record(headers: legacy_model.req_headers)
|
||||
res_headers = ::HttpLogEntryHeader.build_record(headers: legacy_model.res_headers)
|
||||
[legacy_model.id, {
|
||||
req: req_headers,
|
||||
res: res_headers,
|
||||
}]
|
||||
end.map do |pair|
|
||||
legacy_model_id = pair[0]
|
||||
req_headers = pair[1][:req]
|
||||
res_headers = pair[1][:res]
|
||||
header_sha256_to_header_model[req_headers.sha256] = req_headers
|
||||
header_sha256_to_header_model[res_headers.sha256] = res_headers
|
||||
[legacy_model_id, {
|
||||
req_sha256: req_headers.sha256,
|
||||
res_sha256: res_headers.sha256,
|
||||
}]
|
||||
end.to_h
|
||||
ForkFuture
|
||||
.parallel_map(@fork_amount / 2, legacy_models) do |legacy_model|
|
||||
req_headers =
|
||||
::HttpLogEntryHeader.build_record(headers: legacy_model.req_headers)
|
||||
res_headers =
|
||||
::HttpLogEntryHeader.build_record(headers: legacy_model.res_headers)
|
||||
[legacy_model.id, { req: req_headers, res: res_headers }]
|
||||
end
|
||||
.map do |pair|
|
||||
legacy_model_id = pair[0]
|
||||
req_headers = pair[1][:req]
|
||||
res_headers = pair[1][:res]
|
||||
header_sha256_to_header_model[req_headers.sha256] = req_headers
|
||||
header_sha256_to_header_model[res_headers.sha256] = res_headers
|
||||
[
|
||||
legacy_model_id,
|
||||
{ req_sha256: req_headers.sha256, res_sha256: res_headers.sha256 }
|
||||
]
|
||||
end
|
||||
.to_h
|
||||
timings.finish :build_new_headers
|
||||
|
||||
# excluding existing headers, and bulk-insert the new headers
|
||||
timings.start :insert_new_headers
|
||||
header_sha256_to_header_id = ::HttpLogEntryHeader.where(sha256: header_sha256_to_header_model.keys).map do |model|
|
||||
[model.sha256, model.id]
|
||||
end.to_h
|
||||
header_sha256_to_header_id =
|
||||
::HttpLogEntryHeader
|
||||
.where(sha256: header_sha256_to_header_model.keys)
|
||||
.map { |model| [model.sha256, model.id] }
|
||||
.to_h
|
||||
|
||||
headers_to_insert = header_sha256_to_header_model.map do |sha256, header_model|
|
||||
next nil if header_sha256_to_header_id[sha256]
|
||||
header_model.valid? || raise("invalid header models")
|
||||
header_model
|
||||
end.reject(&:nil?).uniq do |header_model|
|
||||
header_model.sha256
|
||||
end
|
||||
headers_to_insert =
|
||||
header_sha256_to_header_model
|
||||
.map do |sha256, header_model|
|
||||
next nil if header_sha256_to_header_id[sha256]
|
||||
header_model.valid? || raise("invalid header models")
|
||||
header_model
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.uniq { |header_model| header_model.sha256 }
|
||||
|
||||
::HttpLogEntryHeader.insert_all!(
|
||||
headers_to_insert.map(&:to_bulk_insert_hash),
|
||||
returning: [:id, :sha256],
|
||||
).rows.each do |row|
|
||||
id, sha256 = row
|
||||
# rails does not deserialize the returned sha256 - we have to do that ourselves
|
||||
# postgres prefixes hex-encoded binaries with "\x", must strip that first
|
||||
raise("invariant") unless sha256[0..1] == "\\x"
|
||||
sha256 = ::HexUtil.hex2bin(sha256[2..])
|
||||
header_sha256_to_header_id[sha256] = id
|
||||
end if headers_to_insert.any?
|
||||
::HttpLogEntryHeader
|
||||
.insert_all!(
|
||||
headers_to_insert.map(&:to_bulk_insert_hash),
|
||||
returning: %i[id sha256]
|
||||
)
|
||||
.rows
|
||||
.each do |row|
|
||||
id, sha256 = row
|
||||
# rails does not deserialize the returned sha256 - we have to do that ourselves
|
||||
# postgres prefixes hex-encoded binaries with "\x", must strip that first
|
||||
raise("invariant") unless sha256[0..1] == "\\x"
|
||||
sha256 = ::HexUtil.hex2bin(sha256[2..])
|
||||
header_sha256_to_header_id[sha256] = id
|
||||
end if headers_to_insert.any?
|
||||
insert_stats.header_entries_inserted += headers_to_insert.size
|
||||
timings.finish :insert_new_headers
|
||||
|
||||
[insert_stats, timings, legacy_model_id_to_header_sha256s, header_sha256_to_header_id]
|
||||
[
|
||||
insert_stats,
|
||||
timings,
|
||||
legacy_model_id_to_header_sha256s,
|
||||
header_sha256_to_header_id
|
||||
]
|
||||
end
|
||||
|
||||
def build_http_log_entry(legacy_model, request_headers_id, response_headers_id, response_sha256)
|
||||
model = ::HttpLogEntry.new(
|
||||
id: legacy_model.id,
|
||||
uri_scheme: legacy_model.scheme,
|
||||
uri_host: legacy_model.host,
|
||||
uri_path: legacy_model.path,
|
||||
uri_query: legacy_model.query,
|
||||
verb: legacy_model.verb,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers_id: request_headers_id,
|
||||
response_headers_id: response_headers_id,
|
||||
response_sha256: response_sha256,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at,
|
||||
)
|
||||
def build_http_log_entry(
|
||||
legacy_model,
|
||||
request_headers_id,
|
||||
response_headers_id,
|
||||
response_sha256
|
||||
)
|
||||
model =
|
||||
::HttpLogEntry.new(
|
||||
id: legacy_model.id,
|
||||
uri_scheme: legacy_model.scheme,
|
||||
uri_host: legacy_model.host,
|
||||
uri_path: legacy_model.path,
|
||||
uri_query: legacy_model.query,
|
||||
verb: legacy_model.verb,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers_id: request_headers_id,
|
||||
response_headers_id: response_headers_id,
|
||||
response_sha256: response_sha256,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at
|
||||
)
|
||||
model
|
||||
end
|
||||
|
||||
@@ -336,102 +407,102 @@ class LegacyImport::HttpLogEntryBulkImporter
|
||||
puts "-" * 40
|
||||
end
|
||||
|
||||
Timings = Struct.new(
|
||||
:keys,
|
||||
:totals
|
||||
) do
|
||||
def initialize
|
||||
@start_at = Time.now
|
||||
self.keys = []
|
||||
self.totals = {}
|
||||
end
|
||||
Timings =
|
||||
Struct.new(:keys, :totals) do
|
||||
def initialize
|
||||
@start_at = Time.now
|
||||
self.keys = []
|
||||
self.totals = {}
|
||||
end
|
||||
|
||||
def merge!(other)
|
||||
raise if other.nil?
|
||||
other.keys.each do |key|
|
||||
def merge!(other)
|
||||
raise if other.nil?
|
||||
other.keys.each do |key|
|
||||
self.keys << key unless self.keys.include?(key)
|
||||
self.entry_for(key)[:secs] += other.totals[key][:secs]
|
||||
end
|
||||
end
|
||||
|
||||
def start(key)
|
||||
self.keys << key unless self.keys.include?(key)
|
||||
self.entry_for(key)[:secs] += other.totals[key][:secs]
|
||||
entry = self.entry_for(key)
|
||||
raise("#{key} already started") if entry[:started]
|
||||
entry[:started] = Time.now
|
||||
end
|
||||
|
||||
def finish(key)
|
||||
entry = self.totals[key]
|
||||
raise("#{key} does not exist") unless entry
|
||||
started = entry[:started]
|
||||
entry[:started] = nil
|
||||
raise("#{key} not started") unless started
|
||||
entry[:secs] += Time.now - started
|
||||
end
|
||||
|
||||
def entries
|
||||
total_secs = Time.now - @start_at
|
||||
total_measured_secs = self.totals.values.map { |e| e[:secs] }.sum
|
||||
self
|
||||
.keys
|
||||
.map do |key|
|
||||
key_secs = self.totals[key][:secs]
|
||||
{
|
||||
key: key,
|
||||
key_secs: key_secs,
|
||||
proportion: key_secs / total_measured_secs
|
||||
}
|
||||
end
|
||||
.chain(
|
||||
[
|
||||
{
|
||||
key: :measured_total,
|
||||
key_secs: total_measured_secs,
|
||||
proportion: total_measured_secs / total_secs
|
||||
},
|
||||
{ key: :actual_total, key_secs: total_secs, proportion: 1.0 }
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
def entry_for(key)
|
||||
self.totals[key] ||= { started: nil, secs: 0.0 }
|
||||
end
|
||||
end
|
||||
|
||||
def start(key)
|
||||
self.keys << key unless self.keys.include?(key)
|
||||
entry = self.entry_for(key)
|
||||
raise("#{key} already started") if entry[:started]
|
||||
entry[:started] = Time.now
|
||||
end
|
||||
InsertStats =
|
||||
Struct.new(
|
||||
:http_entries_inserted,
|
||||
:blob_entries_inserted,
|
||||
:header_entries_inserted,
|
||||
:bytes_stored,
|
||||
:bytes_length
|
||||
) do
|
||||
def initialize
|
||||
self.http_entries_inserted = 0
|
||||
self.blob_entries_inserted = 0
|
||||
self.header_entries_inserted = 0
|
||||
self.bytes_stored = 0
|
||||
self.bytes_length = 0
|
||||
end
|
||||
|
||||
def finish(key)
|
||||
entry = self.totals[key]
|
||||
raise("#{key} does not exist") unless entry
|
||||
started = entry[:started]
|
||||
entry[:started] = nil
|
||||
raise("#{key} not started") unless started
|
||||
entry[:secs] += Time.now - started
|
||||
end
|
||||
def merge!(other)
|
||||
self.http_entries_inserted += other.http_entries_inserted
|
||||
self.blob_entries_inserted += other.blob_entries_inserted
|
||||
self.header_entries_inserted += other.header_entries_inserted
|
||||
self.bytes_stored += other.bytes_stored
|
||||
self.bytes_length += other.bytes_length
|
||||
end
|
||||
|
||||
def entries
|
||||
total_secs = Time.now - @start_at
|
||||
total_measured_secs = self.totals.values.map { |e| e[:secs] }.sum
|
||||
self.keys.map do |key|
|
||||
key_secs = self.totals[key][:secs]
|
||||
{
|
||||
key: key,
|
||||
key_secs: key_secs,
|
||||
proportion: key_secs / total_measured_secs,
|
||||
}
|
||||
end.chain([
|
||||
{
|
||||
key: :measured_total,
|
||||
key_secs: total_measured_secs,
|
||||
proportion: total_measured_secs / total_secs,
|
||||
},
|
||||
{
|
||||
key: :actual_total,
|
||||
key_secs: total_secs,
|
||||
proportion: 1.0,
|
||||
},
|
||||
])
|
||||
end
|
||||
def to_s
|
||||
ratio = self.bytes_stored.to_f / self.bytes_length
|
||||
[
|
||||
"+#{self.http_entries_inserted} requests, +#{self.blob_entries_inserted} blobs, +#{self.header_entries_inserted} headers",
|
||||
"size ratio: #{ratio.round(2)} - #{self.class.humansize(self.bytes_stored)}/#{self.class.humansize(self.bytes_length)}"
|
||||
].join("\n")
|
||||
end
|
||||
|
||||
def entry_for(key)
|
||||
self.totals[key] ||= { started: nil, secs: 0.0 }
|
||||
def self.humansize(size)
|
||||
HexUtil.humansize(size)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
InsertStats = Struct.new(
|
||||
:http_entries_inserted,
|
||||
:blob_entries_inserted,
|
||||
:header_entries_inserted,
|
||||
:bytes_stored,
|
||||
:bytes_length,
|
||||
) do
|
||||
def initialize
|
||||
self.http_entries_inserted = 0
|
||||
self.blob_entries_inserted = 0
|
||||
self.header_entries_inserted = 0
|
||||
self.bytes_stored = 0
|
||||
self.bytes_length = 0
|
||||
end
|
||||
|
||||
def merge!(other)
|
||||
self.http_entries_inserted += other.http_entries_inserted
|
||||
self.blob_entries_inserted += other.blob_entries_inserted
|
||||
self.header_entries_inserted += other.header_entries_inserted
|
||||
self.bytes_stored += other.bytes_stored
|
||||
self.bytes_length += other.bytes_length
|
||||
end
|
||||
|
||||
def to_s
|
||||
ratio = self.bytes_stored.to_f / self.bytes_length
|
||||
[
|
||||
"+#{self.http_entries_inserted} requests, +#{self.blob_entries_inserted} blobs, +#{self.header_entries_inserted} headers",
|
||||
"size ratio: #{ratio.round(2)} - #{self.class.humansize(self.bytes_stored)}/#{self.class.humansize(self.bytes_length)}",
|
||||
].join("\n")
|
||||
end
|
||||
|
||||
def self.humansize(size)
|
||||
HexUtil.humansize(size)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -6,7 +6,8 @@ module LiteTrail::ActiveRecordClassMethods
|
||||
)
|
||||
self_class = self
|
||||
|
||||
versions_table_name = if separate_versions_table.is_a?(String)
|
||||
versions_table_name =
|
||||
if separate_versions_table.is_a?(String)
|
||||
separate_versions_table
|
||||
elsif separate_versions_table == true
|
||||
self.table_name.singularize + "_versions"
|
||||
@@ -14,14 +15,16 @@ module LiteTrail::ActiveRecordClassMethods
|
||||
nil
|
||||
end
|
||||
|
||||
lite_trail_class = if versions_table_name.nil?
|
||||
lite_trail_class =
|
||||
if versions_table_name.nil?
|
||||
::LiteTrail::Version
|
||||
else
|
||||
# separate table defined, use that, no need for polymorphism
|
||||
klass = Class.new(::LiteTrail::AbstractVersion) do
|
||||
self.table_name = versions_table_name
|
||||
belongs_to :item, class_name: self_class.to_s
|
||||
end
|
||||
klass =
|
||||
Class.new(::LiteTrail::AbstractVersion) do
|
||||
self.table_name = versions_table_name
|
||||
belongs_to :item, class_name: self_class.to_s
|
||||
end
|
||||
|
||||
# "foo_bar_versions" => define "LiteTrail::PerTable::FooBarVersions"
|
||||
LiteTrail::PerTable.const_set(versions_table_name.camelize, klass)
|
||||
@@ -33,7 +36,7 @@ module LiteTrail::ActiveRecordClassMethods
|
||||
class_attribute :lite_trail_options
|
||||
self.lite_trail_options = {
|
||||
schema_version: schema_version,
|
||||
map_attribute: map_attribute,
|
||||
map_attribute: map_attribute
|
||||
}
|
||||
|
||||
if !separate_versions_table
|
||||
@@ -62,9 +65,13 @@ module LiteTrail::ActiveRecordClassMethods
|
||||
map_attribute.each do |attr_name, mapper|
|
||||
if changes[attr_name]
|
||||
# value before the update
|
||||
changes[attr_name][0] = mapper.map_to(changes[attr_name][0]) if changes[attr_name][0]
|
||||
changes[attr_name][0] = mapper.map_to(
|
||||
changes[attr_name][0]
|
||||
) if changes[attr_name][0]
|
||||
# value after the update
|
||||
changes[attr_name][1] = mapper.map_to(changes[attr_name][1]) if changes[attr_name][1]
|
||||
changes[attr_name][1] = mapper.map_to(
|
||||
changes[attr_name][1]
|
||||
) if changes[attr_name][1]
|
||||
end
|
||||
end if map_attribute
|
||||
|
||||
@@ -74,18 +81,18 @@ module LiteTrail::ActiveRecordClassMethods
|
||||
model_updated_at = Time.now
|
||||
end
|
||||
|
||||
self.versions << lite_trail_class.new({
|
||||
event: "update",
|
||||
item: self,
|
||||
schema_version: schema_version,
|
||||
diff: changes,
|
||||
created_at: model_updated_at,
|
||||
})
|
||||
self.versions << lite_trail_class.new(
|
||||
{
|
||||
event: "update",
|
||||
item: self,
|
||||
schema_version: schema_version,
|
||||
diff: changes,
|
||||
created_at: model_updated_at
|
||||
}
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
after_save do
|
||||
self.versions.filter(&:new_record?).each(&:save!)
|
||||
end
|
||||
after_save { self.versions.filter(&:new_record?).each(&:save!) }
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,6 +11,9 @@ module LiteTrail::MigrationExtensions
|
||||
t.datetime :created_at, null: false
|
||||
end
|
||||
|
||||
add_foreign_key versions_table_name, table_name, column: :item_id, validate: true
|
||||
add_foreign_key versions_table_name,
|
||||
table_name,
|
||||
column: :item_id,
|
||||
validate: true
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,19 +1,12 @@
|
||||
class Scraper::E621HttpClientConfig < Scraper::HttpClientConfig
|
||||
DEFAULT_ALLOWED_DOMAINS = [
|
||||
"e621.net",
|
||||
"*.e621.net",
|
||||
]
|
||||
DEFAULT_ALLOWED_DOMAINS = %w[e621.net *.e621.net]
|
||||
|
||||
def cookies
|
||||
[]
|
||||
end
|
||||
|
||||
def ratelimit
|
||||
[
|
||||
["static1.e621.net", 1],
|
||||
["facdn.net", 2],
|
||||
["*", 0.25],
|
||||
]
|
||||
[["static1.e621.net", 1], ["facdn.net", 2], ["*", 0.25]]
|
||||
end
|
||||
|
||||
def allowed_domains
|
||||
|
||||
@@ -3,32 +3,31 @@ class Scraper::GalleryDlClient
|
||||
|
||||
StartEvent = Struct.new(:url, :extractor)
|
||||
FinishEvent = Struct.new(:ignore)
|
||||
HttpRequestEvent = Struct.new(
|
||||
:method,
|
||||
:url,
|
||||
:kwargs,
|
||||
:requested_at,
|
||||
:request_headers,
|
||||
:response_headers,
|
||||
:response_code,
|
||||
:response_time_ms,
|
||||
:body,
|
||||
:log_entry,
|
||||
keyword_init: true,
|
||||
)
|
||||
TweetEvent = Struct.new(
|
||||
:tweet,
|
||||
:author
|
||||
)
|
||||
TweetMediaEvent = Struct.new(
|
||||
:tweet_id,
|
||||
:file_url,
|
||||
:filename,
|
||||
:media_num,
|
||||
:extension,
|
||||
:height,
|
||||
:width,
|
||||
)
|
||||
HttpRequestEvent =
|
||||
Struct.new(
|
||||
:method,
|
||||
:url,
|
||||
:kwargs,
|
||||
:requested_at,
|
||||
:request_headers,
|
||||
:response_headers,
|
||||
:response_code,
|
||||
:response_time_ms,
|
||||
:body,
|
||||
:log_entry,
|
||||
keyword_init: true
|
||||
)
|
||||
TweetEvent = Struct.new(:tweet, :author)
|
||||
TweetMediaEvent =
|
||||
Struct.new(
|
||||
:tweet_id,
|
||||
:file_url,
|
||||
:filename,
|
||||
:media_num,
|
||||
:extension,
|
||||
:height,
|
||||
:width
|
||||
)
|
||||
|
||||
def initialize(name, host)
|
||||
name || raise("no name provided")
|
||||
@@ -37,14 +36,19 @@ class Scraper::GalleryDlClient
|
||||
@performed_by = name
|
||||
@client = Ripcord::Client.new(host)
|
||||
@max_cache_size = 8
|
||||
@blob_entry_cache = Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
|
||||
end
|
||||
@blob_entry_cache =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
|
||||
end
|
||||
end
|
||||
|
||||
def start_twitter_user(username, caused_by_entry: nil)
|
||||
@token = SecureRandom.uuid
|
||||
rpc = @client.call("start_user", [@token, "https://twitter.com/#{username}/tweets"])
|
||||
rpc =
|
||||
@client.call(
|
||||
"start_user",
|
||||
[@token, "https://twitter.com/#{username}/tweets"]
|
||||
)
|
||||
raise rpc_error_str(rpc) unless rpc.successful?
|
||||
decode_message(rpc.result, caused_by_entry)
|
||||
end
|
||||
@@ -66,25 +70,29 @@ class Scraper::GalleryDlClient
|
||||
raise("token mismatch: #{token} != #{@token}") if token != @token
|
||||
|
||||
case response[:event]
|
||||
when "start" then StartEvent.new(response[:url], response[:extractor])
|
||||
when "finish" then FinishEvent.new(nil)
|
||||
when "start"
|
||||
StartEvent.new(response[:url], response[:extractor])
|
||||
when "finish"
|
||||
FinishEvent.new(nil)
|
||||
when "http_request"
|
||||
http_request = response[:http_request]
|
||||
event = HttpRequestEvent.new(
|
||||
method: http_request[:method],
|
||||
url: http_request[:url],
|
||||
kwargs: http_request[:kwargs],
|
||||
requested_at: Time.at(http_request[:requested_at]),
|
||||
request_headers: http_request[:kwargs][:headers],
|
||||
response_headers: http_request[:response_headers],
|
||||
response_code: http_request[:status_code],
|
||||
response_time_ms: (http_request[:duration] * 1000).to_i,
|
||||
body: Base64.decode64(http_request[:content_base64]),
|
||||
log_entry: nil,
|
||||
)
|
||||
event =
|
||||
HttpRequestEvent.new(
|
||||
method: http_request[:method],
|
||||
url: http_request[:url],
|
||||
kwargs: http_request[:kwargs],
|
||||
requested_at: Time.at(http_request[:requested_at]),
|
||||
request_headers: http_request[:kwargs][:headers],
|
||||
response_headers: http_request[:response_headers],
|
||||
response_code: http_request[:status_code],
|
||||
response_time_ms: (http_request[:duration] * 1000).to_i,
|
||||
body: Base64.decode64(http_request[:content_base64]),
|
||||
log_entry: nil
|
||||
)
|
||||
log_and_set_http_request_event(event, caused_by_entry)
|
||||
event
|
||||
when "tweet" then TweetEvent.new(response[:tweet], response[:author])
|
||||
when "tweet"
|
||||
TweetEvent.new(response[:tweet], response[:author])
|
||||
when "tweet_media"
|
||||
media = response[:media]
|
||||
TweetMediaEvent.new(
|
||||
@@ -94,7 +102,7 @@ class Scraper::GalleryDlClient
|
||||
media[:media_num],
|
||||
media[:extension],
|
||||
media[:height],
|
||||
media[:width],
|
||||
media[:width]
|
||||
)
|
||||
end
|
||||
end
|
||||
@@ -102,14 +110,15 @@ class Scraper::GalleryDlClient
|
||||
def log_and_set_http_request_event(http_event, caused_by_entry)
|
||||
request_headers = http_event.request_headers
|
||||
response_headers = http_event.response_headers
|
||||
content_type = response_headers[:"Content-Type"] ||
|
||||
response_headers[:"content-type"] ||
|
||||
raise("no content type provided: #{response_headers}")
|
||||
content_type =
|
||||
response_headers[:"Content-Type"] || response_headers[:"content-type"] ||
|
||||
raise("no content type provided: #{response_headers}")
|
||||
|
||||
url = http_event.url
|
||||
uri = Addressable::URI.parse(url)
|
||||
|
||||
if http_event.kwargs && http_event.kwargs[:params] && http_event.kwargs[:params][:variables]
|
||||
if http_event.kwargs && http_event.kwargs[:params] &&
|
||||
http_event.kwargs[:params][:variables]
|
||||
uri.query = JSON.parse(http_event.kwargs[:params][:variables]).to_query
|
||||
end
|
||||
|
||||
@@ -117,38 +126,47 @@ class Scraper::GalleryDlClient
|
||||
|
||||
cache_key = "#{uri.host}|#{content_type}"
|
||||
blob_entry_cache = @blob_entry_cache[cache_key]
|
||||
candidates = if blob_entry_cache.at_capacity? && rand(0..100) >= 5
|
||||
candidates =
|
||||
if blob_entry_cache.at_capacity? && rand(0..100) >= 5
|
||||
blob_entry_cache.candidates
|
||||
else
|
||||
[]
|
||||
end
|
||||
candidates << caused_by_entry.response if caused_by_entry&.response
|
||||
candidates << caused_by_entry.response.base if caused_by_entry&.response&.base
|
||||
if caused_by_entry&.response&.base
|
||||
candidates << caused_by_entry.response.base
|
||||
end
|
||||
|
||||
retries = 0
|
||||
begin
|
||||
response_blob_entry = BlobEntryP.find_or_build(
|
||||
content_type: content_type,
|
||||
contents: http_event.body,
|
||||
candidates: candidates,
|
||||
)
|
||||
response_blob_entry =
|
||||
BlobEntryP.find_or_build(
|
||||
content_type: content_type,
|
||||
contents: http_event.body,
|
||||
candidates: candidates
|
||||
)
|
||||
|
||||
log_entry = HttpLogEntry.new({
|
||||
uri: url,
|
||||
verb: http_event.method.downcase,
|
||||
content_type: content_type,
|
||||
status_code: http_event.response_code,
|
||||
request_headers: HttpLogEntryHeader.find_or_build(headers: request_headers),
|
||||
response_headers: HttpLogEntryHeader.find_or_build(headers: response_headers),
|
||||
response: response_blob_entry,
|
||||
response_time_ms: http_event.response_time_ms,
|
||||
requested_at: http_event.requested_at,
|
||||
caused_by_entry: caused_by_entry,
|
||||
performed_by: @performed_by,
|
||||
})
|
||||
log_entry =
|
||||
HttpLogEntry.new(
|
||||
{
|
||||
uri: url,
|
||||
verb: http_event.method.downcase,
|
||||
content_type: content_type,
|
||||
status_code: http_event.response_code,
|
||||
request_headers:
|
||||
HttpLogEntryHeader.find_or_build(headers: request_headers),
|
||||
response_headers:
|
||||
HttpLogEntryHeader.find_or_build(headers: response_headers),
|
||||
response: response_blob_entry,
|
||||
response_time_ms: http_event.response_time_ms,
|
||||
requested_at: http_event.requested_at,
|
||||
caused_by_entry: caused_by_entry,
|
||||
performed_by: @performed_by
|
||||
}
|
||||
)
|
||||
|
||||
log_entry.save!
|
||||
rescue
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
retry if retries < 2
|
||||
raise
|
||||
@@ -158,10 +176,14 @@ class Scraper::GalleryDlClient
|
||||
http_event.log_entry = log_entry
|
||||
|
||||
if response_blob_entry.base_sha256
|
||||
blob_entry_cache.reward(HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8])
|
||||
blob_entry_cache.reward(
|
||||
HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8]
|
||||
)
|
||||
else
|
||||
blob_entry_cache.insert(
|
||||
HexUtil.bin2hex(response_blob_entry.sha256)[0..8], response_blob_entry, url
|
||||
HexUtil.bin2hex(response_blob_entry.sha256)[0..8],
|
||||
response_blob_entry,
|
||||
url
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
class Scraper::InkbunnyHttpClientConfig < Scraper::HttpClientConfig
|
||||
DEFAULT_ALLOWED_DOMAINS = [
|
||||
"inkbunny.net",
|
||||
"*.ib.metapix.net",
|
||||
]
|
||||
DEFAULT_ALLOWED_DOMAINS = %w[inkbunny.net *.ib.metapix.net]
|
||||
|
||||
def do_login(performer)
|
||||
sid_model = Domain::Inkbunny::GlobalState.find_or_create_by(
|
||||
key: "#{performer.name}-sid",
|
||||
)
|
||||
sid_model =
|
||||
Domain::Inkbunny::GlobalState.find_or_create_by(
|
||||
key: "#{performer.name}-sid"
|
||||
)
|
||||
sid_model.with_lock do
|
||||
if sid_model.value.blank?
|
||||
sid_model.value = do_ib_login(performer)
|
||||
@@ -28,13 +26,12 @@ class Scraper::InkbunnyHttpClientConfig < Scraper::HttpClientConfig
|
||||
def scrub_stored_uri(uri)
|
||||
if uri.path == "/api_login.php"
|
||||
uri = uri.dup
|
||||
new_query_ar = URI.decode_www_form(uri.query || "").map do |k, v|
|
||||
if ["username", "password"].include?(k)
|
||||
[k, "*****"]
|
||||
else
|
||||
[k, v]
|
||||
end
|
||||
end
|
||||
new_query_ar =
|
||||
URI
|
||||
.decode_www_form(uri.query || "")
|
||||
.map do |k, v|
|
||||
%w[username password].include?(k) ? [k, "*****"] : [k, v]
|
||||
end
|
||||
uri.query = URI.encode_www_form(new_query_ar)
|
||||
end
|
||||
uri
|
||||
@@ -44,10 +41,7 @@ class Scraper::InkbunnyHttpClientConfig < Scraper::HttpClientConfig
|
||||
end
|
||||
|
||||
def ratelimit
|
||||
[
|
||||
["inkbunny.net", 2],
|
||||
["*.ib.metapix.net", 1],
|
||||
]
|
||||
[["inkbunny.net", 2], ["*.ib.metapix.net", 1]]
|
||||
end
|
||||
|
||||
def allowed_domains
|
||||
@@ -61,23 +55,29 @@ class Scraper::InkbunnyHttpClientConfig < Scraper::HttpClientConfig
|
||||
private
|
||||
|
||||
def do_ib_login(performer)
|
||||
username = Domain::Inkbunny::GlobalState.find_by(key: "#{performer.name}-username")&.value || raise(
|
||||
"missing inkbunny username in global state"
|
||||
)
|
||||
password = Domain::Inkbunny::GlobalState.find_by(key: "#{performer.name}-password")&.value || raise(
|
||||
"missing inkbunny password in global state"
|
||||
)
|
||||
uri = "https://inkbunny.net/api_login.php?username=#{username}&password=#{password}"
|
||||
username =
|
||||
Domain::Inkbunny::GlobalState.find_by(
|
||||
key: "#{performer.name}-username"
|
||||
)&.value || raise("missing inkbunny username in global state")
|
||||
password =
|
||||
Domain::Inkbunny::GlobalState.find_by(
|
||||
key: "#{performer.name}-password"
|
||||
)&.value || raise("missing inkbunny password in global state")
|
||||
uri =
|
||||
"https://inkbunny.net/api_login.php?username=#{username}&password=#{password}"
|
||||
logger.info("logging in to inkbunny as #{username}...")
|
||||
response = performer.do_request(:post, uri, {})
|
||||
if response.response_code != 200
|
||||
message = "inkbunny login failed: #{response.response_code} #{response.body}"
|
||||
message =
|
||||
"inkbunny login failed: #{response.response_code} #{response.body}"
|
||||
logger.error(message)
|
||||
raise message
|
||||
end
|
||||
sid = JSON.parse(response.body)["sid"] || raise(
|
||||
"inkbunny login failed: no sid in response (#{response.body[0..1000]})"
|
||||
)
|
||||
sid =
|
||||
JSON.parse(response.body)["sid"] ||
|
||||
raise(
|
||||
"inkbunny login failed: no sid in response (#{response.body[0..1000]})"
|
||||
)
|
||||
logger.info("logged in to inkbunny as #{username}: #{sid}")
|
||||
sid
|
||||
end
|
||||
|
||||
@@ -3,68 +3,64 @@ class Scraper::LinkFinder
|
||||
|
||||
def initialize(origin_host, document)
|
||||
@origin_host = origin_host
|
||||
@document = document.encode("UTF-8", :invalid => :replace, :undef => :replace)
|
||||
@document = document.encode("UTF-8", invalid: :replace, undef: :replace)
|
||||
end
|
||||
|
||||
RAW_REGEXES = [
|
||||
URI.regexp,
|
||||
/(?:(?:https?:\/\/)?(?:[-\w\.]+)\.\w+(?:\/(?:[\w\-\/_\.…]*(?:\?\S+)?)?))/,
|
||||
%r{(?:(?:https?://)?(?:[-\w\.]+)\.\w+(?:/(?:[\w\-/_\.…]*(?:\?\S+)?)?))}
|
||||
]
|
||||
|
||||
def find_links
|
||||
from_text_uris = RAW_REGEXES.map do |regex|
|
||||
@document.scan(regex)
|
||||
end.flatten.reject(&:blank?).reject do |str|
|
||||
str.include?("…")
|
||||
end.map do |str|
|
||||
uri = Addressable::URI.heuristic_parse(str)
|
||||
if uri.host
|
||||
uri
|
||||
else
|
||||
nil
|
||||
end
|
||||
rescue
|
||||
logger.warn("invalid parsed uri detected - #{str}")
|
||||
nil
|
||||
end.reject(&:blank?)
|
||||
from_text_uris =
|
||||
RAW_REGEXES
|
||||
.map { |regex| @document.scan(regex) }
|
||||
.flatten
|
||||
.reject(&:blank?)
|
||||
.reject { |str| str.include?("…") }
|
||||
.map do |str|
|
||||
uri = Addressable::URI.heuristic_parse(str)
|
||||
uri.host ? uri : nil
|
||||
rescue StandardError
|
||||
logger.warn("invalid parsed uri detected - #{str}")
|
||||
nil
|
||||
end
|
||||
.reject(&:blank?)
|
||||
|
||||
parsed_uris = Nokogiri::HTML(@document).css("a").map do |link|
|
||||
uri = Addressable::URI.parse(link[:href])
|
||||
uri.host ||= @origin_host
|
||||
uri
|
||||
rescue
|
||||
logger.warn("invalid parsed uri detected - #{link[:href]}")
|
||||
nil
|
||||
end.reject(&:blank?)
|
||||
parsed_uris =
|
||||
Nokogiri
|
||||
.HTML(@document)
|
||||
.css("a")
|
||||
.map do |link|
|
||||
uri = Addressable::URI.parse(link[:href])
|
||||
uri.host ||= @origin_host
|
||||
uri
|
||||
rescue StandardError
|
||||
logger.warn("invalid parsed uri detected - #{link[:href]}")
|
||||
nil
|
||||
end
|
||||
.reject(&:blank?)
|
||||
|
||||
all_uris = from_text_uris + parsed_uris
|
||||
all_uris.map do |uri|
|
||||
if uri.host.end_with?("furaffinity.net")
|
||||
process_fa(uri)
|
||||
elsif uri.host.end_with?("e621.net")
|
||||
process_e621(uri)
|
||||
elsif uri.host.end_with?("twitter.com")
|
||||
process_twitter(uri)
|
||||
all_uris
|
||||
.map do |uri|
|
||||
if uri.host.end_with?("furaffinity.net")
|
||||
process_fa(uri)
|
||||
elsif uri.host.end_with?("e621.net")
|
||||
process_e621(uri)
|
||||
elsif uri.host.end_with?("twitter.com")
|
||||
process_twitter(uri)
|
||||
end
|
||||
end
|
||||
end.
|
||||
reject(&:nil?).
|
||||
uniq
|
||||
.reject(&:nil?)
|
||||
.uniq
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
FA_USER_PREFIXES = [
|
||||
"/user/",
|
||||
"/gallery/",
|
||||
"/scraps/",
|
||||
"/journals/",
|
||||
"/favorites/",
|
||||
]
|
||||
FA_USER_PREFIXES = %w[/user/ /gallery/ /scraps/ /journals/ /favorites/]
|
||||
|
||||
FA_POST_PREFIXES = [
|
||||
"/view/",
|
||||
"/full/",
|
||||
]
|
||||
FA_POST_PREFIXES = %w[/view/ /full/]
|
||||
|
||||
def process_fa(uri)
|
||||
if prefixed_with?(FA_USER_PREFIXES, uri.path)
|
||||
@@ -80,21 +76,21 @@ class Scraper::LinkFinder
|
||||
nil
|
||||
end
|
||||
|
||||
TWITTER_IGNORE_FIRST_PATH_PART = [
|
||||
"messages",
|
||||
"explore",
|
||||
"home",
|
||||
"i",
|
||||
"notifications",
|
||||
"privacy",
|
||||
"search",
|
||||
"tos",
|
||||
TWITTER_IGNORE_FIRST_PATH_PART = %w[
|
||||
messages
|
||||
explore
|
||||
home
|
||||
i
|
||||
notifications
|
||||
privacy
|
||||
search
|
||||
tos
|
||||
]
|
||||
|
||||
TWITTER_IGNORE_HOSTS = [
|
||||
"business.twitter.com",
|
||||
"help.twitter.com",
|
||||
"support.twitter.com",
|
||||
TWITTER_IGNORE_HOSTS = %w[
|
||||
business.twitter.com
|
||||
help.twitter.com
|
||||
support.twitter.com
|
||||
]
|
||||
|
||||
def process_twitter(uri)
|
||||
@@ -106,9 +102,7 @@ class Scraper::LinkFinder
|
||||
end
|
||||
|
||||
def prefixed_with?(prefixes, path)
|
||||
prefixes.any? do |prefix|
|
||||
path.start_with?(prefix)
|
||||
end
|
||||
prefixes.any? { |prefix| path.start_with?(prefix) }
|
||||
end
|
||||
|
||||
def path_parts(path)
|
||||
|
||||
@@ -1,18 +1,12 @@
|
||||
class Scraper::TwitterHttpClientConfig < Scraper::HttpClientConfig
|
||||
DEFAULT_ALLOWED_DOMAINS = [
|
||||
"*.twimg.com",
|
||||
"ipinfo.io",
|
||||
]
|
||||
DEFAULT_ALLOWED_DOMAINS = %w[*.twimg.com ipinfo.io]
|
||||
|
||||
def cookies
|
||||
[]
|
||||
end
|
||||
|
||||
def ratelimit
|
||||
[
|
||||
["*.twimg.com", 0.5],
|
||||
["*", 1],
|
||||
]
|
||||
[["*.twimg.com", 0.5], ["*", 1]]
|
||||
end
|
||||
|
||||
def allowed_domains
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
class VpnOnlyRouteConstraint
|
||||
def matches?(request)
|
||||
if request.params[:force_vpn_off] == '1'
|
||||
if request.params[:force_vpn_off] == "1"
|
||||
false
|
||||
elsif Rails.env.test?
|
||||
true
|
||||
elsif Rails.env.development? || Rails.env.staging?
|
||||
request.ip == '127.0.0.1' || request.ip == '::1'
|
||||
request.ip == "127.0.0.1" || request.ip == "::1"
|
||||
elsif Rails.env.production?
|
||||
# curtus IP on vpn
|
||||
request.ip == '10.200.0.3'
|
||||
request.ip == "10.200.0.3"
|
||||
else
|
||||
false
|
||||
end
|
||||
|
||||
@@ -5,12 +5,15 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
before_destroy { raise ActiveRecord::ReadOnlyRecord }
|
||||
|
||||
self.primary_key = :sha256
|
||||
EMPTY_FILE_SHA256 = HexUtil.hex2bin("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855")
|
||||
EMPTY_FILE_SHA256 =
|
||||
HexUtil.hex2bin(
|
||||
"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
)
|
||||
|
||||
belongs_to :base,
|
||||
optional: true,
|
||||
foreign_key: :base_sha256,
|
||||
class_name: "::BlobEntryP"
|
||||
optional: true,
|
||||
foreign_key: :base_sha256,
|
||||
class_name: "::BlobEntryP"
|
||||
|
||||
after_create do
|
||||
actual_sha256 = Digest::SHA256.digest(contents)
|
||||
@@ -21,17 +24,19 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
@base_model ||= super || self.class.ensure(base_sha256) if base_sha256
|
||||
end
|
||||
|
||||
validates_presence_of(
|
||||
:sha256,
|
||||
:content_type,
|
||||
:size
|
||||
)
|
||||
validates :contents, length: { minimum: 0, allow_nil: false, message: "can't be nil" }
|
||||
validates_presence_of(:sha256, :content_type, :size)
|
||||
validates :contents,
|
||||
length: {
|
||||
minimum: 0,
|
||||
allow_nil: false,
|
||||
message: "can't be nil"
|
||||
}
|
||||
validates :sha256, length: { is: 32 }
|
||||
validates :base_sha256, length: { is: 32 }, if: :base_sha256
|
||||
|
||||
def self.ensure(sha256)
|
||||
find_by(sha256: sha256) || raise("blob #{HexUtil.bin2hex(sha256)} does not exist")
|
||||
find_by(sha256: sha256) ||
|
||||
raise("blob #{HexUtil.bin2hex(sha256)} does not exist")
|
||||
end
|
||||
|
||||
def sha256_hex
|
||||
@@ -39,13 +44,10 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def contents
|
||||
@contents ||= begin
|
||||
@contents ||=
|
||||
begin
|
||||
contents_raw = self.read_attribute(:contents)
|
||||
if self.base
|
||||
XDiff.patch(self.base.contents, contents_raw)
|
||||
else
|
||||
contents_raw
|
||||
end
|
||||
self.base ? XDiff.patch(self.base.contents, contents_raw) : contents_raw
|
||||
end
|
||||
end
|
||||
|
||||
@@ -55,47 +57,48 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
|
||||
def self.find_or_build(content_type:, contents:, candidates: [])
|
||||
sha256 = Digest::SHA256.digest(contents)
|
||||
BlobEntryP.find_by(sha256: sha256) || begin
|
||||
build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: contents,
|
||||
candidates: candidates,
|
||||
)
|
||||
end
|
||||
BlobEntryP.find_by(sha256: sha256) ||
|
||||
begin
|
||||
build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: contents,
|
||||
candidates: candidates
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
DIFFABLE_CONTENT_TYPES = [
|
||||
/text\/html/,
|
||||
/text\/plain/,
|
||||
/application\/json/,
|
||||
]
|
||||
DIFFABLE_CONTENT_TYPES = [%r{text/html}, %r{text/plain}, %r{application/json}]
|
||||
|
||||
def self.build_record(content_type:, sha256: nil, contents:, candidates: [])
|
||||
sha256 ||= Digest::SHA256.digest(contents)
|
||||
record = self.new(sha256: sha256, content_type: content_type, size: contents.size)
|
||||
record =
|
||||
self.new(sha256: sha256, content_type: content_type, size: contents.size)
|
||||
|
||||
smallest_patch_size = nil
|
||||
smallest_patch = nil
|
||||
smallest_candidate = nil
|
||||
|
||||
candidates.map do |candidate|
|
||||
# only consider candidates with the same content type (may relax this later)
|
||||
next nil if candidate.content_type != content_type
|
||||
# only consider candidates who themselves aren't patch-based
|
||||
next nil unless candidate.base.nil?
|
||||
# only consider diffable content types
|
||||
next nil unless DIFFABLE_CONTENT_TYPES.any? { |ct| content_type =~ ct }
|
||||
candidates
|
||||
.map do |candidate|
|
||||
# only consider candidates with the same content type (may relax this later)
|
||||
next nil if candidate.content_type != content_type
|
||||
# only consider candidates who themselves aren't patch-based
|
||||
next nil unless candidate.base.nil?
|
||||
# only consider diffable content types
|
||||
next nil unless DIFFABLE_CONTENT_TYPES.any? { |ct| content_type =~ ct }
|
||||
|
||||
[candidate, XDiff.diff(candidate.contents, contents)]
|
||||
end.reject(&:nil?).each do |pair|
|
||||
candidate, patch = pair
|
||||
if smallest_patch_size.nil? || patch.size < smallest_patch_size
|
||||
smallest_patch_size = patch.size
|
||||
smallest_patch = patch
|
||||
smallest_candidate = candidate
|
||||
[candidate, XDiff.diff(candidate.contents, contents)]
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.each do |pair|
|
||||
candidate, patch = pair
|
||||
if smallest_patch_size.nil? || patch.size < smallest_patch_size
|
||||
smallest_patch_size = patch.size
|
||||
smallest_patch = patch
|
||||
smallest_candidate = candidate
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# only use a patch if it's <= 60% the original content size
|
||||
if smallest_patch_size && smallest_patch_size <= (contents.size * 0.6)
|
||||
@@ -106,9 +109,7 @@ class BlobEntryP < ReduxApplicationRecord
|
||||
record.contents = contents
|
||||
end
|
||||
|
||||
if record.contents != contents
|
||||
raise RuntimeError.new("invariant!")
|
||||
end
|
||||
raise RuntimeError.new("invariant!") if record.contents != contents
|
||||
|
||||
record
|
||||
end
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
module ImmutableModel
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
included do
|
||||
before_update { raise ActiveRecord::ReadOnlyRecord }
|
||||
end
|
||||
included { before_update { raise ActiveRecord::ReadOnlyRecord } }
|
||||
end
|
||||
|
||||
@@ -10,10 +10,7 @@ class Domain::E621::Post < ReduxApplicationRecord
|
||||
enum state: %i[ok scan_error, file_error]
|
||||
enum rating: %i[s q e]
|
||||
|
||||
validates_presence_of(
|
||||
:e621_id,
|
||||
:state,
|
||||
)
|
||||
validates_presence_of(:e621_id, :state)
|
||||
after_initialize do
|
||||
self.state ||= :ok
|
||||
self.state_detail ||= {}
|
||||
@@ -24,12 +21,9 @@ class Domain::E621::Post < ReduxApplicationRecord
|
||||
self.tags_array ||= []
|
||||
end
|
||||
|
||||
has_many :taggings,
|
||||
class_name: "Domain::E621::Tagging"
|
||||
has_many :taggings, class_name: "Domain::E621::Tagging"
|
||||
|
||||
has_many :tags,
|
||||
class_name: "Domain::E621::Tag",
|
||||
through: :taggings
|
||||
has_many :tags, class_name: "Domain::E621::Tag", through: :taggings
|
||||
|
||||
# If the file was scraped, this is the blob entry that represents it
|
||||
belongs_to :file,
|
||||
@@ -42,67 +36,67 @@ class Domain::E621::Post < ReduxApplicationRecord
|
||||
foreign_key: :e621_id,
|
||||
optional: true
|
||||
|
||||
SKIP_MISMATCH_LEGACY_IDS = Set.new([
|
||||
836414,
|
||||
1070178,
|
||||
])
|
||||
SKIP_MISMATCH_LEGACY_IDS = Set.new([836_414, 1_070_178])
|
||||
|
||||
def self.find_or_build_from_legacy(legacy_model)
|
||||
model = self.find_by(e621_id: legacy_model.e621_id)
|
||||
return model if model
|
||||
model = self.new({
|
||||
state: :ok,
|
||||
file_url_str: legacy_model.file_url,
|
||||
rating: legacy_model.rating,
|
||||
sources_array: legacy_model.sources,
|
||||
tags_array: legacy_model.tags.map(&:value),
|
||||
artists_array: legacy_model.artists || [],
|
||||
})
|
||||
model =
|
||||
self.new(
|
||||
{
|
||||
state: :ok,
|
||||
file_url_str: legacy_model.file_url,
|
||||
rating: legacy_model.rating,
|
||||
sources_array: legacy_model.sources,
|
||||
tags_array: legacy_model.tags.map(&:value),
|
||||
artists_array: legacy_model.artists || []
|
||||
}
|
||||
)
|
||||
|
||||
if legacy_model.e621_status != "active"
|
||||
model.flags_array << legacy_model.e621_status
|
||||
model.flags_array.uniq!
|
||||
end
|
||||
|
||||
[
|
||||
:e621_id,
|
||||
:md5,
|
||||
:description,
|
||||
:score,
|
||||
:created_at,
|
||||
].each do |attr|
|
||||
%i[e621_id md5 description score created_at].each do |attr|
|
||||
model.send(:"#{attr}=", legacy_model.send(attr))
|
||||
end
|
||||
|
||||
http_log_entries = ::HttpLogEntry.where(
|
||||
uri_host: model.file_uri.host,
|
||||
uri_path: model.file_uri.path,
|
||||
)
|
||||
http_log_entries =
|
||||
::HttpLogEntry.where(
|
||||
uri_host: model.file_uri.host,
|
||||
uri_path: model.file_uri.path
|
||||
)
|
||||
http_log_entry = http_log_entries.first
|
||||
|
||||
if !http_log_entry && legacy_model.blob_entry
|
||||
legacy_hles = ::Legacy::HttpLogEntry.where(
|
||||
host: model.file_uri.host,
|
||||
path: model.file_uri.path,
|
||||
)
|
||||
legacy_hles =
|
||||
::Legacy::HttpLogEntry.where(
|
||||
host: model.file_uri.host,
|
||||
path: model.file_uri.path
|
||||
)
|
||||
legacy_hle = legacy_hles.first
|
||||
|
||||
if legacy_hle
|
||||
http_log_entry = ::HttpLogEntry.build_from_legacy(legacy_hle)
|
||||
else
|
||||
http_log_entry = ::HttpLogEntry.new({
|
||||
uri: model.file_uri || raise,
|
||||
status_code: 200,
|
||||
verb: "get",
|
||||
response_time_ms: -1,
|
||||
requested_at: Time.now,
|
||||
request_headers: ::HttpLogEntryHeader.empty,
|
||||
response_headers: ::HttpLogEntryHeader.empty,
|
||||
performed_by: "legacy",
|
||||
})
|
||||
http_log_entry =
|
||||
::HttpLogEntry.new(
|
||||
{
|
||||
uri: model.file_uri || raise,
|
||||
status_code: 200,
|
||||
verb: "get",
|
||||
response_time_ms: -1,
|
||||
requested_at: Time.now,
|
||||
request_headers: ::HttpLogEntryHeader.empty,
|
||||
response_headers: ::HttpLogEntryHeader.empty,
|
||||
performed_by: "legacy"
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
http_log_entry.response ||= ::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
http_log_entry.response ||=
|
||||
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
blob_entry = http_log_entry.response
|
||||
|
||||
if blob_entry && http_log_entry
|
||||
|
||||
@@ -1,19 +1,17 @@
|
||||
class Domain::E621::Tagging < ReduxApplicationRecord
|
||||
self.table_name = "domain_e621_taggings"
|
||||
belongs_to :post,
|
||||
class_name: "Domain::E621::Post"
|
||||
belongs_to :tag,
|
||||
class_name: "Domain::E621::Tag"
|
||||
belongs_to :post, class_name: "Domain::E621::Post"
|
||||
belongs_to :tag, class_name: "Domain::E621::Tag"
|
||||
|
||||
enum category: [
|
||||
:cat_general,
|
||||
:cat_artist,
|
||||
:cat_copyright,
|
||||
:cat_character,
|
||||
:cat_species,
|
||||
:cat_invalid,
|
||||
:cat_meta,
|
||||
:cat_lore,
|
||||
]
|
||||
enum category: %i[
|
||||
cat_general
|
||||
cat_artist
|
||||
cat_copyright
|
||||
cat_character
|
||||
cat_species
|
||||
cat_invalid
|
||||
cat_meta
|
||||
cat_lore
|
||||
]
|
||||
validates_inclusion_of(:category, in: self.categories.keys)
|
||||
end
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
class Domain::Fa::Fav < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_favs"
|
||||
|
||||
belongs_to :user,
|
||||
class_name: "::Domain::Fa::User"
|
||||
belongs_to :post,
|
||||
class_name: "::Domain::Fa::Post"
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
belongs_to :post, class_name: "::Domain::Fa::Post"
|
||||
end
|
||||
|
||||
@@ -2,8 +2,6 @@ class Domain::Fa::Follow < ReduxApplicationRecord
|
||||
self.table_name = "domain_fa_follows"
|
||||
self.primary_keys = :follower_id, :followed_id
|
||||
|
||||
belongs_to :follower,
|
||||
class_name: "::Domain::Fa::User"
|
||||
belongs_to :followed,
|
||||
class_name: "::Domain::Fa::User"
|
||||
belongs_to :follower, class_name: "::Domain::Fa::User"
|
||||
belongs_to :followed, class_name: "::Domain::Fa::User"
|
||||
end
|
||||
|
||||
@@ -4,16 +4,17 @@ class Domain::Fa::Post < ReduxApplicationRecord
|
||||
has_lite_trail(
|
||||
schema_version: 1,
|
||||
map_attribute: {
|
||||
file_sha256: ::Sha256AttributeMapper,
|
||||
},
|
||||
file_sha256: ::Sha256AttributeMapper
|
||||
}
|
||||
)
|
||||
|
||||
enum :state, [
|
||||
:ok, # so far so good, post may not yet be scanned or have file downloaded
|
||||
:removed, # post has been removed
|
||||
:scan_error, # error scanning post page (see state_detail)
|
||||
:file_error, # error downloading post file (see state_detail)
|
||||
]
|
||||
enum :state,
|
||||
[
|
||||
:ok, # so far so good, post may not yet be scanned or have file downloaded
|
||||
:removed, # post has been removed
|
||||
:scan_error, # error scanning post page (see state_detail)
|
||||
:file_error # error downloading post file (see state_detail)
|
||||
]
|
||||
validates_inclusion_of(:state, in: self.states.keys)
|
||||
after_initialize do
|
||||
self.state_detail ||= {}
|
||||
@@ -27,20 +28,18 @@ class Domain::Fa::Post < ReduxApplicationRecord
|
||||
validates_presence_of(:fa_id, :state)
|
||||
|
||||
belongs_to :creator,
|
||||
class_name: "::Domain::Fa::User",
|
||||
inverse_of: :posts,
|
||||
optional: true,
|
||||
autosave: true
|
||||
class_name: "::Domain::Fa::User",
|
||||
inverse_of: :posts,
|
||||
optional: true,
|
||||
autosave: true
|
||||
|
||||
# If the file was scraped, this is the blob entry that represents it
|
||||
belongs_to :file,
|
||||
class_name: "::HttpLogEntry",
|
||||
optional: :true,
|
||||
autosave: true
|
||||
class_name: "::HttpLogEntry",
|
||||
optional: :true,
|
||||
autosave: true
|
||||
|
||||
has_many :fav_post_joins,
|
||||
class_name: "::Domain::Fa::Fav",
|
||||
inverse_of: :post
|
||||
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :post
|
||||
|
||||
has_many :faved_by,
|
||||
class_name: "::Domain::Fa::User",
|
||||
@@ -95,22 +94,23 @@ class Domain::Fa::Post < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def scanned_at=(time)
|
||||
unless time.is_a?(Time)
|
||||
raise ArgumentError("time must be Time, was #{time.class}")
|
||||
end unless time.nil?
|
||||
unless time.nil?
|
||||
unless time.is_a?(Time)
|
||||
raise ArgumentError("time must be Time, was #{time.class}")
|
||||
end
|
||||
end
|
||||
self.state_detail["scanned_at"] = time&.to_i
|
||||
end
|
||||
|
||||
def posted_at
|
||||
pa = super
|
||||
return pa if pa
|
||||
@posted_at ||= begin
|
||||
@posted_at ||=
|
||||
begin
|
||||
contents = guess_last_submission_page&.response&.contents
|
||||
if contents
|
||||
parser = Domain::Fa::Parser::Page.new(contents)
|
||||
if parser.probably_submission?
|
||||
parser.submission.posted_date
|
||||
end
|
||||
parser.submission.posted_date if parser.probably_submission?
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -124,26 +124,32 @@ class Domain::Fa::Post < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def guess_last_submission_page
|
||||
last_submission_page || begin
|
||||
HttpLogEntry.where(
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: ["/view/#{self.fa_id}/", "/view/#{self.fa_id}"],
|
||||
uri_query: nil,
|
||||
status_code: 200,
|
||||
).order(created_at: :desc).first
|
||||
end
|
||||
last_submission_page ||
|
||||
begin
|
||||
HttpLogEntry
|
||||
.where(
|
||||
uri_host: "www.furaffinity.net",
|
||||
uri_path: ["/view/#{self.fa_id}/", "/view/#{self.fa_id}"],
|
||||
uri_query: nil,
|
||||
status_code: 200
|
||||
)
|
||||
.order(created_at: :desc)
|
||||
.first
|
||||
end
|
||||
end
|
||||
|
||||
def have_file?
|
||||
self.file_id.present?
|
||||
end
|
||||
|
||||
def self.hash_from_submission_parser_helper(submission, first_seen_log_entry: nil)
|
||||
creator = Domain::Fa::User.find_or_create_by({
|
||||
url_name: submission.artist_url_name,
|
||||
}) do |user|
|
||||
user.name = submission.artist
|
||||
end
|
||||
def self.hash_from_submission_parser_helper(
|
||||
submission,
|
||||
first_seen_log_entry: nil
|
||||
)
|
||||
creator =
|
||||
Domain::Fa::User.find_or_create_by(
|
||||
{ url_name: submission.artist_url_name }
|
||||
) { |user| user.name = submission.artist }
|
||||
|
||||
{
|
||||
fa_id: submission.id,
|
||||
@@ -151,8 +157,8 @@ class Domain::Fa::Post < ReduxApplicationRecord
|
||||
title: submission.title,
|
||||
state_detail: {
|
||||
"first_seen_entry" => first_seen_log_entry&.id,
|
||||
"thumbnail_url_str" => submission.thumb_path,
|
||||
},
|
||||
"thumbnail_url_str" => submission.thumb_path
|
||||
}
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -4,15 +4,16 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
|
||||
schema_version: 1,
|
||||
separate_versions_table: true,
|
||||
map_attribute: {
|
||||
file_sha256: ::Sha256AttributeMapper,
|
||||
},
|
||||
file_sha256: ::Sha256AttributeMapper
|
||||
}
|
||||
)
|
||||
|
||||
enum :state, [
|
||||
enum :state,
|
||||
[
|
||||
:ok, # got the file, no problem
|
||||
:download_error, # other error processing the file
|
||||
:no_file_on_guessed_user_page_error,
|
||||
:file_not_found, # 404 from server
|
||||
:file_not_found # 404 from server
|
||||
]
|
||||
after_initialize do
|
||||
self.state ||= :ok
|
||||
@@ -20,16 +21,17 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
belongs_to :user, class_name: "::Domain::Fa::User"
|
||||
belongs_to :file, foreign_key: :file_sha256, class_name: "::BlobEntryP", optional: true
|
||||
belongs_to :file,
|
||||
foreign_key: :file_sha256,
|
||||
class_name: "::BlobEntryP",
|
||||
optional: true
|
||||
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
|
||||
|
||||
def file
|
||||
@file_model ||= BlobEntryP.ensure(file_sha256) if file_sha256
|
||||
end
|
||||
|
||||
before_validation do
|
||||
file_uri = Addressable::URI.parse(file_url_str)
|
||||
end
|
||||
before_validation { file_uri = Addressable::URI.parse(file_url_str) }
|
||||
|
||||
def file_uri
|
||||
Addressable::URI.parse(file_url_str) unless file_url_str.blank?
|
||||
@@ -48,25 +50,30 @@ class Domain::Fa::UserAvatar < ReduxApplicationRecord
|
||||
def guess_file_uri_from_hles_with_info
|
||||
hle = guess_user_page_log_entry
|
||||
if hle
|
||||
page = Domain::Fa::Parser::Page.new(hle.response.contents, require_logged_in: false)
|
||||
page =
|
||||
Domain::Fa::Parser::Page.new(
|
||||
hle.response.contents,
|
||||
require_logged_in: false
|
||||
)
|
||||
if page.probably_user_page? && (url = page.user_page.profile_thumb_url)
|
||||
return [:user_page, url]
|
||||
return :user_page, url
|
||||
end
|
||||
end
|
||||
|
||||
posts = user.
|
||||
posts.
|
||||
where(state: [:ok, nil]).
|
||||
where("file_url_str IS NOT NULL").
|
||||
order(created_at: :desc).
|
||||
limit(3)
|
||||
posts =
|
||||
user
|
||||
.posts
|
||||
.where(state: [:ok, nil])
|
||||
.where("file_url_str IS NOT NULL")
|
||||
.order(created_at: :desc)
|
||||
.limit(3)
|
||||
|
||||
for post in posts
|
||||
if (hle = post.guess_last_submission_page)
|
||||
page = Domain::Fa::Parser::Page.new(hle.response.contents)
|
||||
next unless page.probably_submission?
|
||||
url = page.submission.artist_avatar_url
|
||||
return [:post_page, url, post.fa_id] if url
|
||||
return :post_page, url, post.fa_id if url
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
class Domain::Inkbunny::File < ReduxApplicationRecord
|
||||
self.table_name = "domain_inkbunny_files"
|
||||
|
||||
belongs_to :post,
|
||||
class_name: "::Domain::Inkbunny::Post",
|
||||
inverse_of: :files
|
||||
belongs_to :post, class_name: "::Domain::Inkbunny::Post", inverse_of: :files
|
||||
|
||||
belongs_to :blob_entry,
|
||||
class_name: "::BlobEntryP",
|
||||
foreign_key: :blob_entry_sha256,
|
||||
optional: true
|
||||
|
||||
belongs_to :log_entry,
|
||||
class_name: "::HttpLogEntry",
|
||||
optional: true
|
||||
belongs_to :log_entry, class_name: "::HttpLogEntry", optional: true
|
||||
|
||||
enum :state, %i[ok error]
|
||||
after_initialize do
|
||||
@@ -20,9 +16,16 @@ class Domain::Inkbunny::File < ReduxApplicationRecord
|
||||
self.state_detail ||= {}
|
||||
end
|
||||
|
||||
validates_presence_of(%i[
|
||||
ib_file_id file_name url_str
|
||||
ib_created_at file_order
|
||||
md5_initial md5_full md5s
|
||||
])
|
||||
validates_presence_of(
|
||||
%i[
|
||||
ib_file_id
|
||||
file_name
|
||||
url_str
|
||||
ib_created_at
|
||||
file_order
|
||||
md5_initial
|
||||
md5_full
|
||||
md5s
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
@@ -5,29 +5,28 @@ class Domain::Inkbunny::Post < ReduxApplicationRecord
|
||||
class_name: "::Domain::Inkbunny::User",
|
||||
inverse_of: :posts
|
||||
|
||||
has_many :files,
|
||||
class_name: "::Domain::Inkbunny::File",
|
||||
inverse_of: :post
|
||||
has_many :files, class_name: "::Domain::Inkbunny::File", inverse_of: :post
|
||||
|
||||
enum :state, %i[ok error]
|
||||
enum :rating, %i[general mature adult]
|
||||
enum :submission_type, %i[
|
||||
unknown
|
||||
picture_pinup
|
||||
sketch
|
||||
picture_series
|
||||
comic
|
||||
portfolio
|
||||
flash_animation
|
||||
flash_interactive
|
||||
video_feature
|
||||
video_animation
|
||||
music_single
|
||||
music_album
|
||||
writing_document
|
||||
character_sheet
|
||||
photography
|
||||
]
|
||||
enum :submission_type,
|
||||
%i[
|
||||
unknown
|
||||
picture_pinup
|
||||
sketch
|
||||
picture_series
|
||||
comic
|
||||
portfolio
|
||||
flash_animation
|
||||
flash_interactive
|
||||
video_feature
|
||||
video_animation
|
||||
music_single
|
||||
music_album
|
||||
writing_document
|
||||
character_sheet
|
||||
photography
|
||||
]
|
||||
|
||||
after_initialize do
|
||||
self.state ||= :ok
|
||||
|
||||
@@ -6,9 +6,7 @@ class Domain::Twitter::Tweet < ReduxApplicationRecord
|
||||
foreign_key: :author_id,
|
||||
primary_key: :tw_id
|
||||
|
||||
has_many :medias,
|
||||
class_name: "Domain::Twitter::Media",
|
||||
foreign_key: :tweet_id
|
||||
has_many :medias, class_name: "Domain::Twitter::Media", foreign_key: :tweet_id
|
||||
|
||||
enum state: %i[ok error]
|
||||
after_initialize do
|
||||
|
||||
@@ -10,29 +10,28 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
dedipath-1
|
||||
direct-gdl
|
||||
serverhost-1
|
||||
], _prefix: true
|
||||
],
|
||||
_prefix: true
|
||||
|
||||
belongs_to :response,
|
||||
foreign_key: :response_sha256,
|
||||
class_name: "::BlobEntryP",
|
||||
autosave: true
|
||||
foreign_key: :response_sha256,
|
||||
class_name: "::BlobEntryP",
|
||||
autosave: true
|
||||
|
||||
belongs_to :request_headers,
|
||||
class_name: "::HttpLogEntryHeader"
|
||||
belongs_to :request_headers, class_name: "::HttpLogEntryHeader"
|
||||
|
||||
belongs_to :response_headers,
|
||||
class_name: "::HttpLogEntryHeader"
|
||||
belongs_to :response_headers, class_name: "::HttpLogEntryHeader"
|
||||
|
||||
validates :response_sha256, length: { is: 32 }
|
||||
|
||||
belongs_to :caused_by_entry,
|
||||
class_name: "::HttpLogEntry",
|
||||
foreign_key: :caused_by_id,
|
||||
optional: true
|
||||
class_name: "::HttpLogEntry",
|
||||
foreign_key: :caused_by_id,
|
||||
optional: true
|
||||
|
||||
has_many :triggered_entries,
|
||||
class_name: "::HttpLogEntry",
|
||||
foreign_key: :caused_by_id
|
||||
class_name: "::HttpLogEntry",
|
||||
foreign_key: :caused_by_id
|
||||
|
||||
validates_presence_of(
|
||||
:uri_scheme,
|
||||
@@ -48,25 +47,23 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
|
||||
def self.find_by_uri_host_path(uri)
|
||||
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
|
||||
find_by(
|
||||
uri_host: uri.host,
|
||||
uri_path: uri.path,
|
||||
)
|
||||
find_by(uri_host: uri.host, uri_path: uri.path)
|
||||
end
|
||||
|
||||
def self.build_from_legacy(legacy_model)
|
||||
response_body = legacy_model.response_body
|
||||
can_reconstruct_be =
|
||||
response_body.nil? &&
|
||||
legacy_model.parent_log_entry_id.nil? &&
|
||||
legacy_model.resp_body.present? &&
|
||||
legacy_model.blob_entry.present?
|
||||
response_body.nil? && legacy_model.parent_log_entry_id.nil? &&
|
||||
legacy_model.resp_body.present? && legacy_model.blob_entry.present?
|
||||
|
||||
if can_reconstruct_be
|
||||
blob_entry = ::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
blob_entry =
|
||||
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
blob_sha256 = HexUtil.hex2bin(legacy_model.resp_body)
|
||||
unless blob_entry.sha256 == blob_sha256
|
||||
raise("mismatch for legacy http entry #{legacy_model.id} / legacy blob entry #{legacy_model.blob_entry.id}")
|
||||
raise(
|
||||
"mismatch for legacy http entry #{legacy_model.id} / legacy blob entry #{legacy_model.blob_entry.id}"
|
||||
)
|
||||
end
|
||||
else
|
||||
blob_entry = nil
|
||||
@@ -76,20 +73,24 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
uri.scheme ||= "https"
|
||||
uri.path ||= "/"
|
||||
|
||||
::HttpLogEntry.new({
|
||||
verb: legacy_model.verb,
|
||||
uri: uri,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers: ::HttpLogEntryHeader.find_or_build(headers: legacy_model.req_headers),
|
||||
response_headers: ::HttpLogEntryHeader.find_or_build(headers: legacy_model.res_headers),
|
||||
response: blob_entry,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at,
|
||||
performed_by: "legacy",
|
||||
})
|
||||
::HttpLogEntry.new(
|
||||
{
|
||||
verb: legacy_model.verb,
|
||||
uri: uri,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers:
|
||||
::HttpLogEntryHeader.find_or_build(headers: legacy_model.req_headers),
|
||||
response_headers:
|
||||
::HttpLogEntryHeader.find_or_build(headers: legacy_model.res_headers),
|
||||
response: blob_entry,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at,
|
||||
performed_by: "legacy"
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
def uri=(uri)
|
||||
@@ -133,7 +134,7 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
response_sha256: self.response_sha256,
|
||||
requested_at: self.requested_at,
|
||||
created_at: self.created_at,
|
||||
updated_at: self.updated_at,
|
||||
updated_at: self.updated_at
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -27,10 +27,7 @@ class HttpLogEntryHeader < ReduxApplicationRecord
|
||||
|
||||
headers = headers.sort.to_h
|
||||
sha256 = Digest::SHA256.digest(headers.to_s)
|
||||
HttpLogEntryHeader.new(
|
||||
sha256: sha256,
|
||||
headers: headers,
|
||||
)
|
||||
HttpLogEntryHeader.new(sha256: sha256, headers: headers)
|
||||
end
|
||||
|
||||
def self.find_or_build(headers:)
|
||||
@@ -49,16 +46,11 @@ class HttpLogEntryHeader < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def self.scrub_datetime_header(hash, key)
|
||||
if hash[key]
|
||||
hash[key].gsub!(/\d\d:\d\d:\d\d/, "(scrubbed)")
|
||||
end
|
||||
hash[key].gsub!(/\d\d:\d\d:\d\d/, "(scrubbed)") if hash[key]
|
||||
end
|
||||
|
||||
def to_bulk_insert_hash
|
||||
{
|
||||
sha256: sha256,
|
||||
headers: headers,
|
||||
}
|
||||
{ sha256: sha256, headers: headers }
|
||||
end
|
||||
|
||||
def self.empty
|
||||
|
||||
@@ -46,16 +46,19 @@ class Legacy::BlobEntry < LegacyApplicationRecord
|
||||
unless File.exist?(file_path)
|
||||
found = false
|
||||
(2..5).each do |depth|
|
||||
path = File.join(
|
||||
Legacy::SConfig.blob_static_dir,
|
||||
self.class.file_path_at_depth(sha256: sha256, depth: depth)
|
||||
)
|
||||
path =
|
||||
File.join(
|
||||
Legacy::SConfig.blob_static_dir,
|
||||
self.class.file_path_at_depth(sha256: sha256, depth: depth)
|
||||
)
|
||||
next unless File.exist?(path)
|
||||
|
||||
self.dir_depth = depth
|
||||
save!
|
||||
found = true
|
||||
Legacy::SConfig.logger.warn("found fixed path at #{depth} for BE id #{id}")
|
||||
Legacy::SConfig.logger.warn(
|
||||
"found fixed path at #{depth} for BE id #{id}"
|
||||
)
|
||||
break
|
||||
end
|
||||
|
||||
@@ -69,9 +72,9 @@ class Legacy::BlobEntry < LegacyApplicationRecord
|
||||
# generate something like sha256[0]/sha256[1]/sha256
|
||||
raise("invalid sha256: #{sha256}") unless sha256.length == hash_length
|
||||
|
||||
parts = (0...depth).map do |idx|
|
||||
sha256[(idx * stride)...((idx + 1) * stride)]
|
||||
end + [sha256]
|
||||
parts =
|
||||
(0...depth).map { |idx| sha256[(idx * stride)...((idx + 1) * stride)] } +
|
||||
[sha256]
|
||||
File.join(*parts)
|
||||
end
|
||||
|
||||
@@ -90,28 +93,31 @@ class Legacy::BlobEntry < LegacyApplicationRecord
|
||||
def self.create_from_blob(blob:, opts: {})
|
||||
sha256 = Digest::SHA256.hexdigest blob
|
||||
|
||||
write_out = lambda { |be, _contents|
|
||||
dir = File.dirname be.file_path
|
||||
FileUtils.mkdir_p dir
|
||||
f = File.open(be.file_path, "wb")
|
||||
begin
|
||||
f.write(blob)
|
||||
f.fsync
|
||||
write_out =
|
||||
lambda do |be, _contents|
|
||||
dir = File.dirname be.file_path
|
||||
FileUtils.mkdir_p dir
|
||||
f = File.open(be.file_path, "wb")
|
||||
begin
|
||||
f.write(blob)
|
||||
f.fsync
|
||||
|
||||
unless File.exist?(be.file_path)
|
||||
raise("error ensuring blob exists for #{be.id}")
|
||||
unless File.exist?(be.file_path)
|
||||
raise("error ensuring blob exists for #{be.id}")
|
||||
end
|
||||
ensure
|
||||
f.close
|
||||
end
|
||||
ensure
|
||||
f.close
|
||||
end
|
||||
}
|
||||
|
||||
be = nil
|
||||
::Legacy::BlobEntry.transaction do
|
||||
be = ::Legacy::BlobEntry.find_by(sha256: sha256)
|
||||
if be && !be.ensure_file_path
|
||||
# correct directory depth as well
|
||||
Legacy::SConfig.logger.warn("file doesn't exist for #{be.id}, writing again...")
|
||||
Legacy::SConfig.logger.warn(
|
||||
"file doesn't exist for #{be.id}, writing again..."
|
||||
)
|
||||
write_out.call(be, blob)
|
||||
elsif !be
|
||||
new_be = ::Legacy::BlobEntry.new(opts.merge(sha256: sha256))
|
||||
|
||||
@@ -31,42 +31,49 @@
|
||||
class Legacy::E621::Post < LegacyApplicationRecord
|
||||
self.table_name = "e621_posts"
|
||||
|
||||
validates_presence_of :e621_id, :md5, :author, :file_url, :file_ext, :rating, :tags_string, :status, :score
|
||||
validates_presence_of :e621_id,
|
||||
:md5,
|
||||
:author,
|
||||
:file_url,
|
||||
:file_ext,
|
||||
:rating,
|
||||
:tags_string,
|
||||
:status,
|
||||
:score
|
||||
validates_uniqueness_of :md5, :e621_id
|
||||
|
||||
serialize :sources, Array
|
||||
serialize :artists, Array
|
||||
|
||||
belongs_to :blob_entry,
|
||||
class_name: "Legacy::BlobEntry"
|
||||
belongs_to :blob_entry, class_name: "Legacy::BlobEntry"
|
||||
|
||||
# just inserted into db: :not_processed
|
||||
# we've checked for the existance of its file on the
|
||||
# disk and it isn't there: :should_download
|
||||
# we've made an attempt to download its file: :processed
|
||||
enum status: %i[not_processed should_download processed processed_404 processed_err]
|
||||
validates_inclusion_of :status,
|
||||
in: statuses.keys
|
||||
enum status: %i[
|
||||
not_processed
|
||||
should_download
|
||||
processed
|
||||
processed_404
|
||||
processed_err
|
||||
]
|
||||
validates_inclusion_of :status, in: statuses.keys
|
||||
|
||||
has_many :taggings, class_name: "Legacy::E621::Tagging"
|
||||
has_many :tags, through: :taggings
|
||||
|
||||
enum rating: %i[s q e]
|
||||
validates_inclusion_of :rating,
|
||||
in: ratings.keys
|
||||
validates_inclusion_of :rating, in: ratings.keys
|
||||
|
||||
def file_relative_path
|
||||
base = File.basename(file_url)
|
||||
"#{base[0]}/#{base[1]}/#{base}"
|
||||
end
|
||||
|
||||
before_validation do
|
||||
self.file_ext ||= File.extname(file_path)[1..-1]
|
||||
end
|
||||
before_validation { self.file_ext ||= File.extname(file_path)[1..-1] }
|
||||
|
||||
before_destroy do
|
||||
blob_entry.dec_refcount
|
||||
end
|
||||
before_destroy { blob_entry.dec_refcount }
|
||||
|
||||
def file_path
|
||||
File.join SConfig.e621_static_dir, file_relative_path
|
||||
@@ -75,32 +82,43 @@ class Legacy::E621::Post < LegacyApplicationRecord
|
||||
def resized_file_path(style)
|
||||
raise("no md5") unless md5
|
||||
|
||||
hashed_path = Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: md5,
|
||||
depth: 4, stride: 2, hash_length: 32,
|
||||
)
|
||||
File.join SConfig.e621_data_dir, "resized", style.to_s, (hashed_path + "." + file_ext)
|
||||
hashed_path =
|
||||
Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: md5,
|
||||
depth: 4,
|
||||
stride: 2,
|
||||
hash_length: 32
|
||||
)
|
||||
File.join SConfig.e621_data_dir,
|
||||
"resized",
|
||||
style.to_s,
|
||||
(hashed_path + "." + file_ext)
|
||||
end
|
||||
|
||||
FASource = Struct.new(:type, :id, :url)
|
||||
|
||||
def fa_sources
|
||||
self.sources.flatten.map do |source|
|
||||
if matches = /furaffinity.net\/view\/(\d+)/.match(source)
|
||||
fa_id = matches[1]
|
||||
FASource.new(:post, fa_id.to_i, source)
|
||||
elsif matches = /furaffinity.net\/(gallery|user)\/([^\/]+)/.match(source)
|
||||
url_name = FA::User.name_to_url_name(matches[2])
|
||||
FASource.new(:user, url_name, source)
|
||||
else
|
||||
nil
|
||||
self
|
||||
.sources
|
||||
.flatten
|
||||
.map do |source|
|
||||
if matches = %r{furaffinity.net/view/(\d+)}.match(source)
|
||||
fa_id = matches[1]
|
||||
FASource.new(:post, fa_id.to_i, source)
|
||||
elsif matches = %r{furaffinity.net/(gallery|user)/([^/]+)}.match(source)
|
||||
url_name = FA::User.name_to_url_name(matches[2])
|
||||
FASource.new(:user, url_name, source)
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end.reject(&:nil?)
|
||||
.reject(&:nil?)
|
||||
end
|
||||
|
||||
def update_taggings(assume_total_overwrite: false)
|
||||
tags_string_split = tags_string.split(/\s+/).map(&:strip).reject(&:blank?)
|
||||
tags_arr = Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
|
||||
tags_arr =
|
||||
Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
|
||||
|
||||
missing = Set.new(tags_string_split) - Set.new(tags_arr.map(&:value))
|
||||
|
||||
|
||||
@@ -46,12 +46,12 @@ class Legacy::Fa::Post < LegacyApplicationRecord
|
||||
serialize :in_folders, Array
|
||||
|
||||
enum state: [
|
||||
:seen_listing, # have seen a reference to this post on a listing page
|
||||
:scanned_submission, # have scanned the actual submission page
|
||||
:scan_error, # error scanning the submission page
|
||||
:have_static, # have the static asset associated with the page
|
||||
:static_error,
|
||||
] # error getting the static asset
|
||||
:seen_listing, # have seen a reference to this post on a listing page
|
||||
:scanned_submission, # have scanned the actual submission page
|
||||
:scan_error, # error scanning the submission page
|
||||
:have_static, # have the static asset associated with the page
|
||||
:static_error
|
||||
] # error getting the static asset
|
||||
validates_inclusion_of :state, in: Legacy::Fa::Post.states.keys
|
||||
|
||||
serialize :state_error
|
||||
@@ -60,10 +60,10 @@ class Legacy::Fa::Post < LegacyApplicationRecord
|
||||
belongs_to :creator, class_name: "::Legacy::Fa::User"
|
||||
belongs_to :blob_entry, class_name: "::Legacy::BlobEntry"
|
||||
|
||||
belongs_to :submission_page_log_entry,
|
||||
class_name: "::Legacy::HttpLogEntry"
|
||||
belongs_to :submission_page_log_entry, class_name: "::Legacy::HttpLogEntry"
|
||||
|
||||
belongs_to :description_ref, nil,
|
||||
belongs_to :description_ref,
|
||||
nil,
|
||||
class_name: "::Legacy::Fa::PostDescription",
|
||||
foreign_key: :description_id,
|
||||
inverse_of: :fa_post
|
||||
@@ -86,7 +86,12 @@ class Legacy::Fa::Post < LegacyApplicationRecord
|
||||
|
||||
name = ensure_creator_name_filename
|
||||
file = self.class.file_name_filter(File.basename(file_url))
|
||||
File.join(name, file).encode(Encoding.find("UTF-8"), invalid: :replace, undef: :replace, replace: "")
|
||||
File.join(name, file).encode(
|
||||
Encoding.find("UTF-8"),
|
||||
invalid: :replace,
|
||||
undef: :replace,
|
||||
replace: ""
|
||||
)
|
||||
end
|
||||
|
||||
def file_path
|
||||
@@ -102,11 +107,17 @@ class Legacy::Fa::Post < LegacyApplicationRecord
|
||||
def resized_file_path(style)
|
||||
raise("no fa_id") unless fa_id
|
||||
|
||||
hashed_path = Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: Digest::SHA256.hexdigest(fa_id.to_s),
|
||||
depth: 4, stride: 2,
|
||||
)
|
||||
File.join Legacy::SConfig.fa_data_dir, "static", "resized", style.to_s, (hashed_path + file_ext)
|
||||
hashed_path =
|
||||
Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: Digest::SHA256.hexdigest(fa_id.to_s),
|
||||
depth: 4,
|
||||
stride: 2
|
||||
)
|
||||
File.join Legacy::SConfig.fa_data_dir,
|
||||
"static",
|
||||
"resized",
|
||||
style.to_s,
|
||||
(hashed_path + file_ext)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
@@ -13,7 +13,5 @@
|
||||
class Legacy::Fa::PostDescription < LegacyApplicationRecord
|
||||
self.table_name = "fa_post_descriptions"
|
||||
|
||||
has_one :fa_post,
|
||||
class_name: "Legacy::Fa::Post",
|
||||
foreign_key: :description_id
|
||||
has_one :fa_post, class_name: "Legacy::Fa::Post", foreign_key: :description_id
|
||||
end
|
||||
|
||||
@@ -29,22 +29,17 @@
|
||||
class Legacy::Fa::User < LegacyApplicationRecord
|
||||
self.table_name = "fa_users"
|
||||
|
||||
validates :name,
|
||||
uniqueness: true,
|
||||
presence: true
|
||||
validates :name, uniqueness: true, presence: true
|
||||
|
||||
validates :url_name,
|
||||
uniqueness: true,
|
||||
presence: true
|
||||
validates :url_name, uniqueness: true, presence: true
|
||||
|
||||
has_many :posts, class_name: "Legacy::Fa::Post",
|
||||
foreign_key: :creator_name,
|
||||
primary_key: :name
|
||||
has_many :posts,
|
||||
class_name: "Legacy::Fa::Post",
|
||||
foreign_key: :creator_name,
|
||||
primary_key: :name
|
||||
|
||||
# if present, this user is being watched
|
||||
has_one :watched_user,
|
||||
foreign_key: :user_id,
|
||||
inverse_of: :user
|
||||
has_one :watched_user, foreign_key: :user_id, inverse_of: :user
|
||||
|
||||
belongs_to :user_page_log_entry, class_name: "Legacy::Cache::HttpLogEntry"
|
||||
|
||||
|
||||
@@ -74,20 +74,20 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
# native: use the native LogStore server to store the entry
|
||||
enum diff_type: %i[text binary native]
|
||||
validates_inclusion_of :diff_type, in: Legacy::HttpLogEntry.diff_types.keys
|
||||
after_initialize do
|
||||
self.diff_type = "native" if new_record?
|
||||
end
|
||||
after_initialize { self.diff_type = "native" if new_record? }
|
||||
|
||||
# out of line req/response headers
|
||||
belongs_to :req_headers_ref,
|
||||
foreign_key: :req_headers_id, class_name: "Legacy::HttpLogEntryHeader"
|
||||
foreign_key: :req_headers_id,
|
||||
class_name: "Legacy::HttpLogEntryHeader"
|
||||
belongs_to :res_headers_ref,
|
||||
foreign_key: :res_headers_id, class_name: "Legacy::HttpLogEntryHeader"
|
||||
foreign_key: :res_headers_id,
|
||||
class_name: "Legacy::HttpLogEntryHeader"
|
||||
|
||||
belongs_to :native_blob_entry,
|
||||
foreign_key: :native_blob_entry_sha256,
|
||||
primary_key: :key,
|
||||
class_name: "::LogStoreSstEntry"
|
||||
foreign_key: :native_blob_entry_sha256,
|
||||
primary_key: :key,
|
||||
class_name: "::LogStoreSstEntry"
|
||||
|
||||
attr_accessor :can_force_update
|
||||
before_update do
|
||||
@@ -137,25 +137,15 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
|
||||
def req_headers
|
||||
ref = req_headers_ref
|
||||
if ref
|
||||
ref.headers
|
||||
else
|
||||
{}
|
||||
end
|
||||
ref ? ref.headers : {}
|
||||
end
|
||||
|
||||
def res_headers
|
||||
ref = res_headers_ref
|
||||
if ref
|
||||
ref.headers
|
||||
else
|
||||
{}
|
||||
end
|
||||
ref ? ref.headers : {}
|
||||
end
|
||||
|
||||
before_create do
|
||||
self.requested_at ||= DateTime.now
|
||||
end
|
||||
before_create { self.requested_at ||= DateTime.now }
|
||||
|
||||
def body_stored?
|
||||
!!(blob_entry_id || parent_log_entry_id || resp_body)
|
||||
@@ -177,20 +167,31 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
raise("legacy can't write")
|
||||
|
||||
# try and find a good HttpLogEntry to diff this against
|
||||
candidate_keys = if !opts[:skip_find_candidates]
|
||||
Legacy::HttpLogEntry.where(
|
||||
host: host,
|
||||
path: path,
|
||||
diff_type: self.class.diff_types["native"],
|
||||
).limit(5).to_a.map(&:resp_body).reject(&:nil?).reject(&:empty?)
|
||||
candidate_keys =
|
||||
if !opts[:skip_find_candidates]
|
||||
Legacy::HttpLogEntry
|
||||
.where(
|
||||
host: host,
|
||||
path: path,
|
||||
diff_type: self.class.diff_types["native"]
|
||||
)
|
||||
.limit(5)
|
||||
.to_a
|
||||
.map(&:resp_body)
|
||||
.reject(&:nil?)
|
||||
.reject(&:empty?)
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
SConfig.with_log_store_client do |lsc|
|
||||
ret = lsc.put_binary(
|
||||
LogStore::PutBinaryArgs.new(hint_hashes: candidate_keys, contents: body_string)
|
||||
)
|
||||
ret =
|
||||
lsc.put_binary(
|
||||
LogStore::PutBinaryArgs.new(
|
||||
hint_hashes: candidate_keys,
|
||||
contents: body_string
|
||||
)
|
||||
)
|
||||
self.resp_body = ret.key
|
||||
end
|
||||
|
||||
@@ -202,23 +203,24 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
return set_response_body_native(body_string, opts) if diff_type == "native"
|
||||
|
||||
# try and find a good HttpLogEntry to diff this against
|
||||
candidate_entries = Legacy::HttpLogEntry.where(
|
||||
host: host,
|
||||
path: path,
|
||||
parent_log_entry_id: nil,
|
||||
).limit(3).to_a
|
||||
candidate_entries =
|
||||
Legacy::HttpLogEntry
|
||||
.where(host: host, path: path, parent_log_entry_id: nil)
|
||||
.limit(3)
|
||||
.to_a
|
||||
|
||||
# add or remove trailing slash to each of the paths
|
||||
hint_paths = opts[:similar_content_path_hints] || []
|
||||
hint_paths += hint_paths.map do |p|
|
||||
if p == "/"
|
||||
p
|
||||
elsif p[-1] == "/"
|
||||
p[0..-2]
|
||||
else
|
||||
p + "/"
|
||||
hint_paths +=
|
||||
hint_paths.map do |p|
|
||||
if p == "/"
|
||||
p
|
||||
elsif p[-1] == "/"
|
||||
p[0..-2]
|
||||
else
|
||||
p + "/"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
body_string = body_string.force_encoding("UTF-8")
|
||||
|
||||
@@ -228,15 +230,17 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
|
||||
if body_string.valid_encoding?
|
||||
if hint_paths.any?
|
||||
candidate_entries += Legacy::HttpLogEntry.where(
|
||||
host: host,
|
||||
path: hint_paths,
|
||||
parent_log_entry_id: nil,
|
||||
).limit(50).to_a
|
||||
candidate_entries +=
|
||||
Legacy::HttpLogEntry
|
||||
.where(host: host, path: hint_paths, parent_log_entry_id: nil)
|
||||
.limit(50)
|
||||
.to_a
|
||||
end
|
||||
|
||||
SConfig.logger.info("Comparing against #{candidate_entries.length} " \
|
||||
"candidates: #{candidate_entries.map(&:path).join(", ")}")
|
||||
SConfig.logger.info(
|
||||
"Comparing against #{candidate_entries.length} " \
|
||||
"candidates: #{candidate_entries.map(&:path).join(", ")}"
|
||||
)
|
||||
candidate_entries.each do |ce|
|
||||
SConfig.logger.info "Comparing diff against HLE (#{ce.id}: #{ce.path})"
|
||||
ce_body = ce.response_body
|
||||
@@ -246,14 +250,19 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
end
|
||||
|
||||
ce_diff = self.class.get_diff(ce_body, body_string, diff_type)
|
||||
if (diff_type == "text") && (/^Binary files .+ and .+ differ/ =~ ce_diff)
|
||||
SConfig.logger.warn("diff detected HLE #{ce.id} was a binary, skipping...")
|
||||
if (diff_type == "text") &&
|
||||
(/^Binary files .+ and .+ differ/ =~ ce_diff)
|
||||
SConfig.logger.warn(
|
||||
"diff detected HLE #{ce.id} was a binary, skipping..."
|
||||
)
|
||||
next
|
||||
end
|
||||
|
||||
# verify we can reconstruct the original body string
|
||||
if self.class.apply_patch(ce_body, ce_diff, diff_type) != body_string
|
||||
SConfig.logger.error("couldn't succesfully apply patch to get orig...")
|
||||
SConfig.logger.error(
|
||||
"couldn't succesfully apply patch to get orig..."
|
||||
)
|
||||
next
|
||||
end
|
||||
|
||||
@@ -273,13 +282,13 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
if use_string.length < ce_use_string.length
|
||||
SConfig.logger.info(
|
||||
"Previous config was still smaller (#{use_string.length} vs" \
|
||||
" #{ce_use_string.length} bytes)"
|
||||
" #{ce_use_string.length} bytes)"
|
||||
)
|
||||
next
|
||||
else
|
||||
SConfig.logger.info(
|
||||
"HLE (#{ce.id}) is good candidate: #{ce_use_string.length} bytes " \
|
||||
"(gz: #{ce_gzipped})"
|
||||
"(gz: #{ce_gzipped})"
|
||||
)
|
||||
end
|
||||
|
||||
@@ -298,7 +307,13 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
self.resp_body = use_string
|
||||
SConfig.logger.info "Storing data interally"
|
||||
else
|
||||
self.blob_entry = Legacy::BlobEntry.create_from_blob(blob: use_string, opts: { dir_depth: 4 })
|
||||
self.blob_entry =
|
||||
Legacy::BlobEntry.create_from_blob(
|
||||
blob: use_string,
|
||||
opts: {
|
||||
dir_depth: 4
|
||||
}
|
||||
)
|
||||
blob_entry.inc_refcount
|
||||
SConfig.logger.info "Storing data in blob entry #{blob_entry.id}..."
|
||||
end
|
||||
@@ -311,7 +326,7 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
total_bytes = body_string.length
|
||||
SConfig.logger.info(
|
||||
"Stored #{stored_bytes}/#{total_bytes} bytes" \
|
||||
" (#{(stored_bytes.to_f / total_bytes.to_f * 100.0).round(1)}\% of original)"
|
||||
" (#{(stored_bytes.to_f / total_bytes.to_f * 100.0).round(1)}\% of original)"
|
||||
)
|
||||
|
||||
response_body
|
||||
@@ -325,14 +340,17 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
|
||||
# -> string
|
||||
def response_body
|
||||
@response_body ||= begin
|
||||
@response_body ||=
|
||||
begin
|
||||
return response_body_native if diff_type == "native"
|
||||
|
||||
our_string = if blob_entry
|
||||
our_string =
|
||||
if blob_entry
|
||||
path = blob_entry.ensure_file_path
|
||||
unless path
|
||||
raise NoBEPathException, "no path for blob entry " \
|
||||
"#{blob_entry_id} (HLE id: #{id}) (#{blob_entry.file_path})"
|
||||
raise NoBEPathException,
|
||||
"no path for blob entry " \
|
||||
"#{blob_entry_id} (HLE id: #{id}) (#{blob_entry.file_path})"
|
||||
end
|
||||
File.read(path)
|
||||
else
|
||||
@@ -346,7 +364,11 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
# our_string = our_string.force_encoding("UTF-8")
|
||||
|
||||
if parent_log_entry
|
||||
self.class.apply_patch(parent_log_entry.response_body, our_string, diff_type)
|
||||
self.class.apply_patch(
|
||||
parent_log_entry.response_body,
|
||||
our_string,
|
||||
diff_type
|
||||
)
|
||||
else
|
||||
our_string
|
||||
end
|
||||
@@ -360,12 +382,17 @@ class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
# new:
|
||||
@response_body_native ||=
|
||||
self.native_blob_entry&.patched_value ||
|
||||
::FlatSstEntry.find_by_hex_key(resp_body)&.contents ||
|
||||
::LogStoreSstEntry.find_by_hex_key(resp_body)&.patched_value
|
||||
::FlatSstEntry.find_by_hex_key(resp_body)&.contents ||
|
||||
::LogStoreSstEntry.find_by_hex_key(resp_body)&.patched_value
|
||||
end
|
||||
|
||||
def self.encode_str(str)
|
||||
str.encode(Encoding.find("UTF-8"), invalid: :replace, undef: :replace, replace: "")
|
||||
str.encode(
|
||||
Encoding.find("UTF-8"),
|
||||
invalid: :replace,
|
||||
undef: :replace,
|
||||
replace: ""
|
||||
)
|
||||
end
|
||||
|
||||
def self.gunzip(data)
|
||||
|
||||
@@ -77,21 +77,28 @@ class Legacy::SConfig
|
||||
end
|
||||
|
||||
def self.logger
|
||||
@@logger ||= begin
|
||||
@@logger ||=
|
||||
begin
|
||||
l = Logger.new(STDOUT)
|
||||
l.level = Logger::INFO
|
||||
l.datetime_format = "%Y-%m-%d %H:%M:%S"
|
||||
l.formatter = proc do |sev, datetime, _prog, msg|
|
||||
color = case sev
|
||||
when "INFO" then :blue
|
||||
when "ERROR" then :red
|
||||
when "DEBUG" then :yellow
|
||||
else :white
|
||||
end
|
||||
l.formatter =
|
||||
proc do |sev, datetime, _prog, msg|
|
||||
color =
|
||||
case sev
|
||||
when "INFO"
|
||||
:blue
|
||||
when "ERROR"
|
||||
:red
|
||||
when "DEBUG"
|
||||
:yellow
|
||||
else
|
||||
:white
|
||||
end
|
||||
|
||||
date_format = datetime.strftime("%Y-%m-%d %H:%M:%S")
|
||||
"[#{date_format}] #{sev.ljust(5).send(color)}: #{msg}\n"
|
||||
end
|
||||
date_format = datetime.strftime("%Y-%m-%d %H:%M:%S")
|
||||
"[#{date_format}] #{sev.ljust(5).send(color)}: #{msg}\n"
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
|
||||
@@ -6,16 +6,20 @@ class LiteTrail::AbstractVersion < ReduxApplicationRecord
|
||||
versions_arr = item.versions
|
||||
self_idx = versions_arr.find_index(self)
|
||||
if self_idx == nil
|
||||
raise("item.versions (#{item.item_type}/#{item.item_id}) does not contain self: #{self.id}")
|
||||
raise(
|
||||
"item.versions (#{item.item_type}/#{item.item_id}) does not contain self: #{self.id}"
|
||||
)
|
||||
end
|
||||
|
||||
model = self.item.dup
|
||||
|
||||
# unapply versions in reverse order
|
||||
(versions_arr.length - 1).downto(self_idx).each do |idx|
|
||||
version = versions_arr[idx]
|
||||
version._unapply(model)
|
||||
end
|
||||
(versions_arr.length - 1)
|
||||
.downto(self_idx)
|
||||
.each do |idx|
|
||||
version = versions_arr[idx]
|
||||
version._unapply(model)
|
||||
end
|
||||
|
||||
model
|
||||
end
|
||||
@@ -31,14 +35,20 @@ class LiteTrail::AbstractVersion < ReduxApplicationRecord
|
||||
|
||||
attr_name_sym = attr_name.to_sym
|
||||
if mapper_config[attr_name_sym]
|
||||
attr_before = mapper_config[attr_name_sym].map_from(attr_before) if attr_before
|
||||
attr_after = mapper_config[attr_name_sym].map_from(attr_after) if attr_after
|
||||
attr_before =
|
||||
mapper_config[attr_name_sym].map_from(attr_before) if attr_before
|
||||
attr_after =
|
||||
mapper_config[attr_name_sym].map_from(attr_after) if attr_after
|
||||
end
|
||||
|
||||
# sanity check - but ignore updated_at due to rounding issues
|
||||
if model.send(attr_name.to_sym) != attr_after
|
||||
raise("expected #{attr_name} to be #{attr_after}, was #{item_attributes[attr_name]}")
|
||||
end if attr_name_sym != :updated_at
|
||||
if attr_name_sym != :updated_at
|
||||
if model.send(attr_name.to_sym) != attr_after
|
||||
raise(
|
||||
"expected #{attr_name} to be #{attr_after}, was #{item_attributes[attr_name]}"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
model.send(:"#{attr_name}=", attr_before)
|
||||
end
|
||||
|
||||
@@ -11,10 +11,10 @@ class LogStoreSstEntry < ReduxApplicationRecord
|
||||
# contents - bytea
|
||||
|
||||
belongs_to :base,
|
||||
foreign_key: :base_key,
|
||||
primary_key: :key,
|
||||
class_name: "::LogStoreSstEntry",
|
||||
optional: true
|
||||
foreign_key: :base_key,
|
||||
primary_key: :key,
|
||||
class_name: "::LogStoreSstEntry",
|
||||
optional: true
|
||||
|
||||
def hex_key
|
||||
self.class.bin2hex(self.key)
|
||||
@@ -24,13 +24,16 @@ class LogStoreSstEntry < ReduxApplicationRecord
|
||||
if contents
|
||||
contents_digest = Digest::SHA256.digest(contents)
|
||||
if contents_digest != key
|
||||
raise RuntimeError("digest mismatch: #{HexUtil.bin2hex(contents_digest)} != #{hex_key}")
|
||||
raise RuntimeError(
|
||||
"digest mismatch: #{HexUtil.bin2hex(contents_digest)} != #{hex_key}"
|
||||
)
|
||||
end
|
||||
|
||||
return contents
|
||||
end
|
||||
|
||||
@patched_value ||= begin
|
||||
@patched_value ||=
|
||||
begin
|
||||
if base
|
||||
# format is:
|
||||
# 0..4 - version
|
||||
@@ -42,7 +45,9 @@ class LogStoreSstEntry < ReduxApplicationRecord
|
||||
parent_value = base.patched_value
|
||||
patched_value = XDiff.patch(parent_value, patch_value)
|
||||
if patched_value.length != value_length
|
||||
raise RuntimeError.new("length mismatch: #{patched_value.length} != #{value_length}")
|
||||
raise RuntimeError.new(
|
||||
"length mismatch: #{patched_value.length} != #{value_length}"
|
||||
)
|
||||
end
|
||||
else
|
||||
# format is:
|
||||
@@ -54,7 +59,9 @@ class LogStoreSstEntry < ReduxApplicationRecord
|
||||
|
||||
value_digest = Digest::SHA256.digest(patched_value)
|
||||
if value_digest != key
|
||||
raise RuntimeError("digest mismatch: #{HexUtil.bin2hex(value_digest)} != #{hex_key}")
|
||||
raise RuntimeError(
|
||||
"digest mismatch: #{HexUtil.bin2hex(value_digest)} != #{hex_key}"
|
||||
)
|
||||
end
|
||||
|
||||
patched_value
|
||||
@@ -82,11 +89,7 @@ class LogStoreSstEntry < ReduxApplicationRecord
|
||||
end
|
||||
|
||||
def parent_key
|
||||
if has_parent?
|
||||
self.value[12...44]
|
||||
else
|
||||
nil
|
||||
end
|
||||
has_parent? ? self.value[12...44] : nil
|
||||
end
|
||||
|
||||
def self.find_by_hex_key(hex_key)
|
||||
|
||||
@@ -27,7 +27,7 @@ Rails.application.configure do
|
||||
|
||||
config.cache_store = :memory_store
|
||||
config.public_file_server.headers = {
|
||||
"Cache-Control" => "public, max-age=#{2.days.to_i}",
|
||||
"Cache-Control" => "public, max-age=#{2.days.to_i}"
|
||||
}
|
||||
else
|
||||
config.action_controller.perform_caching = false
|
||||
|
||||
@@ -91,11 +91,11 @@ Rails.application.configure do
|
||||
end
|
||||
|
||||
config.log_tags = {
|
||||
ip: ->request {
|
||||
ip: ->(request) do
|
||||
request.headers["HTTP_CF_CONNECTING_IP"] || request.remote_ip
|
||||
},
|
||||
api_token: ->request { request.params[:api_token] || "(nil api token)" },
|
||||
user_name: ->request {
|
||||
end,
|
||||
api_token: ->(request) { request.params[:api_token] || "(nil api token)" },
|
||||
user_name: ->(request) do
|
||||
api_token = request.params[:api_token]
|
||||
if api_token
|
||||
user = ApplicationController::API_TOKENS[api_token]
|
||||
@@ -103,7 +103,7 @@ Rails.application.configure do
|
||||
else
|
||||
"(nil api_token)"
|
||||
end
|
||||
},
|
||||
end
|
||||
}
|
||||
|
||||
# Do not dump schema after migrations.
|
||||
|
||||
@@ -28,7 +28,7 @@ Rails.application.configure do
|
||||
|
||||
config.cache_store = :memory_store
|
||||
config.public_file_server.headers = {
|
||||
"Cache-Control" => "public, max-age=#{2.days.to_i}",
|
||||
"Cache-Control" => "public, max-age=#{2.days.to_i}"
|
||||
}
|
||||
else
|
||||
config.action_controller.perform_caching = false
|
||||
|
||||
@@ -19,7 +19,7 @@ Rails.application.configure do
|
||||
# Configure public file server for tests with Cache-Control for performance.
|
||||
config.public_file_server.enabled = true
|
||||
config.public_file_server.headers = {
|
||||
"Cache-Control" => "public, max-age=#{1.hour.to_i}",
|
||||
"Cache-Control" => "public, max-age=#{1.hour.to_i}"
|
||||
}
|
||||
|
||||
# Show full error reports and disable caching.
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
if Rails.env.staging?
|
||||
Rails.application.config.middleware.insert_before 0, Rack::Cors, debug: true do
|
||||
Rails.application.config.middleware.insert_before 0,
|
||||
Rack::Cors,
|
||||
debug: true do
|
||||
allow do
|
||||
origins "localhost:3000"
|
||||
resource "/api/fa/search_users", headers: :any, methods: [:get, :options]
|
||||
resource "/api/fa/load_users", headers: :any, methods: [:get, :options]
|
||||
resource "/api/fa/search_users", headers: :any, methods: %i[get options]
|
||||
resource "/api/fa/load_users", headers: :any, methods: %i[get options]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,6 +3,14 @@
|
||||
# Configure parameters to be filtered from the log file. Use this to limit dissemination of
|
||||
# sensitive information. See the ActiveSupport::ParameterFilter documentation for supported
|
||||
# notations and behaviors.
|
||||
Rails.application.config.filter_parameters += [
|
||||
:passw, :secret, :token, :_key, :crypt, :salt, :certificate, :otp, :ssn,
|
||||
Rails.application.config.filter_parameters += %i[
|
||||
passw
|
||||
secret
|
||||
token
|
||||
_key
|
||||
crypt
|
||||
salt
|
||||
certificate
|
||||
otp
|
||||
ssn
|
||||
]
|
||||
|
||||
@@ -3,5 +3,7 @@ require_relative Rails.root.join("app/lib/lite_trail/migration_extensions")
|
||||
ActiveRecord::Migration.send(:include, ::LiteTrail::MigrationExtensions)
|
||||
|
||||
require_relative Rails.root.join("app/models/redux_application_record")
|
||||
require_relative Rails.root.join("app/lib/lite_trail/active_record_class_methods")
|
||||
require_relative Rails.root.join(
|
||||
"app/lib/lite_trail/active_record_class_methods"
|
||||
)
|
||||
ReduxApplicationRecord.send(:extend, ::LiteTrail::ActiveRecordClassMethods)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
if Rails.env.development? || Rails.env.staging?
|
||||
Rack::MiniProfiler.config.position = "top-right"
|
||||
Rack::MiniProfiler.config.disable_caching = false
|
||||
Rack::MiniProfiler.config.skip_paths = [/\/blobs\/.+\/contents.jpg$/]
|
||||
Rack::MiniProfiler.config.skip_paths = [%r{/blobs/.+/contents.jpg$}]
|
||||
end
|
||||
|
||||
@@ -97,7 +97,6 @@
|
||||
# "Referrer-Policy" => "strict-origin-when-cross-origin"
|
||||
# }
|
||||
|
||||
|
||||
# ** Please read carefully, this must be configured in config/application.rb **
|
||||
# Change the format of the cache entry.
|
||||
# Changing this default means that all new cache entries added to the cache
|
||||
@@ -107,9 +106,8 @@
|
||||
# When you're ready to change format, add this to `config/application.rb` (NOT this file):
|
||||
# config.active_support.cache_format_version = 7.0
|
||||
|
||||
|
||||
# Cookie serializer: 2 options
|
||||
#
|
||||
#
|
||||
# If you're upgrading and haven't set `cookies_serializer` previously, your cookie serializer
|
||||
# is `:marshal`. The default for new apps is `:json`.
|
||||
#
|
||||
@@ -117,10 +115,10 @@
|
||||
#
|
||||
#
|
||||
# To migrate an existing application to the `:json` serializer, use the `:hybrid` option.
|
||||
#
|
||||
#
|
||||
# Rails transparently deserializes existing (Marshal-serialized) cookies on read and
|
||||
# re-writes them in the JSON format.
|
||||
#
|
||||
#
|
||||
# It is fine to use `:hybrid` long term; you should do that until you're confident *all* your cookies
|
||||
# have been converted to JSON. To keep using `:hybrid` long term, move this config to its own
|
||||
# initializer or to `config/application.rb`.
|
||||
@@ -131,5 +129,5 @@
|
||||
# If your cookies can't yet be serialized to JSON, keep using `:marshal` for backward-compatibility.
|
||||
#
|
||||
# If you have configured the serializer elsewhere, you can remove this section of the file.
|
||||
#
|
||||
#
|
||||
# See https://guides.rubyonrails.org/action_controller_overview.html#cookies for more information.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
Rails.application.routes.draw do
|
||||
root to: 'pages#root'
|
||||
root to: "pages#root"
|
||||
|
||||
# Define your application routes per the DSL in https://guides.rubyonrails.org/routing.html
|
||||
namespace :api do
|
||||
namespace :fa do
|
||||
get :similar_users, to: '/domain/fa/api#similar_users'
|
||||
get :search_user_names, to: '/domain/fa/api#search_user_names'
|
||||
get :similar_users, to: "/domain/fa/api#similar_users"
|
||||
get :search_user_names, to: "/domain/fa/api#search_user_names"
|
||||
end
|
||||
end
|
||||
|
||||
@@ -20,30 +20,30 @@ Rails.application.routes.draw do
|
||||
get :contents, on: :member
|
||||
end
|
||||
|
||||
get 'us/:script', to: 'user_scripts#get', constraints: { script: /.*/ }
|
||||
get "us/:script", to: "user_scripts#get", constraints: { script: /.*/ }
|
||||
|
||||
scope constraints: VpnOnlyRouteConstraint.new do
|
||||
mount PgHero::Engine => 'pghero'
|
||||
mount GoodJob::Engine => 'jobs'
|
||||
mount PgHero::Engine => "pghero"
|
||||
mount GoodJob::Engine => "jobs"
|
||||
namespace :api do
|
||||
get 'search/user/:prefix', to: 'search#user'
|
||||
get "search/user/:prefix", to: "search#user"
|
||||
|
||||
namespace :fa do
|
||||
post :enqueue_objects, to: '/domain/fa/api#enqueue_objects'
|
||||
post :object_statuses, to: '/domain/fa/api#object_statuses'
|
||||
post :enqueue_objects, to: "/domain/fa/api#enqueue_objects"
|
||||
post :object_statuses, to: "/domain/fa/api#object_statuses"
|
||||
|
||||
get :export_user, to: '/domain/fa/export#user'
|
||||
get :export_user, to: "/domain/fa/export#user"
|
||||
end
|
||||
namespace :twitter do
|
||||
post :enqueue_objects, to: '/domain/twitter/api#enqueue_objects'
|
||||
post :object_statuses, to: '/domain/twitter/api#object_statuses'
|
||||
post :enqueue_objects, to: "/domain/twitter/api#enqueue_objects"
|
||||
post :object_statuses, to: "/domain/twitter/api#object_statuses"
|
||||
end
|
||||
end
|
||||
|
||||
namespace :domain do
|
||||
namespace :fa do
|
||||
resources :users, param: :url_name, only: [] do
|
||||
resources :posts, controller: '/domain/fa/posts'
|
||||
resources :posts, controller: "/domain/fa/posts"
|
||||
end
|
||||
resources :posts, param: :fa_id, only: [:inde] do
|
||||
post :scan_post, on: :member
|
||||
@@ -56,7 +56,12 @@ Rails.application.routes.draw do
|
||||
resources :log_entries, only: %i[index show] do
|
||||
get :stats, on: :collection
|
||||
|
||||
get 'filter/*filter', on: :collection, action: :index, constraints: { filter: /.*/ }
|
||||
get "filter/*filter",
|
||||
on: :collection,
|
||||
action: :index,
|
||||
constraints: {
|
||||
filter: /.*/
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -50,19 +50,28 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "res_headers_id"
|
||||
t.integer "diff_type", default: 0
|
||||
t.binary "native_blob_entry_sha256"
|
||||
t.index ["imported_from_file"], name: "index_cache_http_log_entries_on_imported_from_file"
|
||||
t.index ["path", "host"], name: "index_cache_http_log_entries_on_path_and_host"
|
||||
t.index ["path"], name: "cache_http_log_entries_path_idx", opclass: :gist_trgm_ops, using: :gist
|
||||
t.index ["imported_from_file"],
|
||||
name: "index_cache_http_log_entries_on_imported_from_file"
|
||||
t.index %w[path host], name: "index_cache_http_log_entries_on_path_and_host"
|
||||
t.index ["path"],
|
||||
name: "cache_http_log_entries_path_idx",
|
||||
opclass: :gist_trgm_ops,
|
||||
using: :gist
|
||||
t.index ["path"], name: "index_pattern_ops_on_hle_entry_path"
|
||||
t.index ["requested_at"], name: "index_cache_http_log_entries_on_requested_at"
|
||||
t.index ["requested_at"],
|
||||
name: "index_cache_http_log_entries_on_requested_at"
|
||||
end
|
||||
|
||||
create_table "cache_http_log_entry_headers", id: :serial, force: :cascade do |t|
|
||||
create_table "cache_http_log_entry_headers",
|
||||
id: :serial,
|
||||
force: :cascade do |t|
|
||||
t.hstore "headers", null: false
|
||||
t.binary "sha256", null: false
|
||||
t.datetime "created_at"
|
||||
t.datetime "updated_at"
|
||||
t.index ["sha256"], name: "index_cache_http_log_entry_headers_on_sha256", unique: true
|
||||
t.index ["sha256"],
|
||||
name: "index_cache_http_log_entry_headers_on_sha256",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "e621_posts", id: :serial, force: :cascade do |t|
|
||||
@@ -89,7 +98,8 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "imgsearch_entry_id"
|
||||
t.index ["blob_entry_id"], name: "index_e621_posts_on_blob_entry_id"
|
||||
t.index ["e621_id"], name: "index_e621_posts_on_e621_id", unique: true
|
||||
t.index ["imgsearch_entry_id"], name: "index_e621_posts_on_imgsearch_entry_id"
|
||||
t.index ["imgsearch_entry_id"],
|
||||
name: "index_e621_posts_on_imgsearch_entry_id"
|
||||
t.index ["md5"], name: "index_e621_posts_on_md5"
|
||||
end
|
||||
|
||||
@@ -238,7 +248,10 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "object_type", limit: 2, null: false
|
||||
t.integer "object_id", null: false
|
||||
t.bigint "fingerprints", array: true
|
||||
t.index ["object_type", "object_id"], name: "index_lite_media_file_fingerprints_on_object_type_and_object_id", unique: true
|
||||
t.index %w[object_type object_id],
|
||||
name:
|
||||
"index_lite_media_file_fingerprints_on_object_type_and_object_id",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "lite_s3_objects", force: :cascade do |t|
|
||||
@@ -258,7 +271,8 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.float "total_time"
|
||||
t.bigint "calls"
|
||||
t.datetime "captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at"
|
||||
t.index %w[database captured_at],
|
||||
name: "index_pghero_query_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "pghero_space_stats", id: :serial, force: :cascade do |t|
|
||||
@@ -267,7 +281,8 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.text "relation"
|
||||
t.bigint "size"
|
||||
t.datetime "captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
|
||||
t.index %w[database captured_at],
|
||||
name: "index_pghero_space_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "watch_jobs", id: :serial, force: :cascade do |t|
|
||||
@@ -304,7 +319,9 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "status"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
t.index %w[enum_type weasyl_id],
|
||||
name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_descriptions", id: :serial, force: :cascade do |t|
|
||||
@@ -316,13 +333,18 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
create_table "weasyl_joins_user_follows", id: :serial, force: :cascade do |t|
|
||||
t.integer "follower_id", null: false
|
||||
t.integer "followed_id", null: false
|
||||
t.index ["follower_id", "followed_id"], name: "index_weasyl_joins_user_follows_on_follower_id_and_followed_id", unique: true
|
||||
t.index %w[follower_id followed_id],
|
||||
name:
|
||||
"index_weasyl_joins_user_follows_on_follower_id_and_followed_id",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_joins_user_friends", id: :serial, force: :cascade do |t|
|
||||
t.integer "a_id", null: false
|
||||
t.integer "b_id", null: false
|
||||
t.index ["a_id", "b_id"], name: "index_weasyl_joins_user_friends_on_a_id_and_b_id", unique: true
|
||||
t.index %w[a_id b_id],
|
||||
name: "index_weasyl_joins_user_friends_on_a_id_and_b_id",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_medias", id: :serial, force: :cascade do |t|
|
||||
@@ -355,7 +377,9 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.datetime "full_scanned_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
t.index %w[enum_type weasyl_id],
|
||||
name: "index_weasyl_posts_on_enum_type_and_weasyl_id",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_users", id: :serial, force: :cascade do |t|
|
||||
@@ -369,7 +393,9 @@ ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
t.integer "userid"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["login_name"], name: "index_weasyl_users_on_login_name", unique: true
|
||||
t.index ["login_name"],
|
||||
name: "index_weasyl_users_on_login_name",
|
||||
unique: true
|
||||
end
|
||||
|
||||
create_table "xtwitter_tweets", id: :serial, force: :cascade do |t|
|
||||
|
||||
@@ -7,7 +7,9 @@ class AddServiceNameToActiveStorageBlobs < ActiveRecord::Migration[6.0]
|
||||
add_column :active_storage_blobs, :service_name, :string
|
||||
|
||||
if configured_service = ActiveStorage::Blob.service.name
|
||||
ActiveStorage::Blob.unscoped.update_all(service_name: configured_service)
|
||||
ActiveStorage::Blob.unscoped.update_all(
|
||||
service_name: configured_service
|
||||
)
|
||||
end
|
||||
|
||||
change_column :active_storage_blobs, :service_name, :string, null: false
|
||||
|
||||
@@ -4,11 +4,18 @@ class CreateActiveStorageVariantRecords < ActiveRecord::Migration[6.0]
|
||||
return unless table_exists?(:active_storage_blobs)
|
||||
|
||||
# Use Active Record's configured type for primary key
|
||||
create_table :active_storage_variant_records, id: primary_key_type, if_not_exists: true do |t|
|
||||
t.belongs_to :blob, null: false, index: false, type: blobs_primary_key_type
|
||||
create_table :active_storage_variant_records,
|
||||
id: primary_key_type,
|
||||
if_not_exists: true do |t|
|
||||
t.belongs_to :blob,
|
||||
null: false,
|
||||
index: false,
|
||||
type: blobs_primary_key_type
|
||||
t.string :variation_digest, null: false
|
||||
|
||||
t.index %i[blob_id variation_digest], name: 'index_active_storage_variant_records_uniqueness', unique: true
|
||||
t.index %i[blob_id variation_digest],
|
||||
name: "index_active_storage_variant_records_uniqueness",
|
||||
unique: true
|
||||
t.foreign_key :active_storage_blobs, column: :blob_id
|
||||
end
|
||||
end
|
||||
@@ -22,7 +29,8 @@ class CreateActiveStorageVariantRecords < ActiveRecord::Migration[6.0]
|
||||
|
||||
def blobs_primary_key_type
|
||||
pkey_name = connection.primary_key(:active_storage_blobs)
|
||||
pkey_column = connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
|
||||
pkey_column =
|
||||
connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
|
||||
pkey_column.bigint? ? :bigint : pkey_column.type
|
||||
end
|
||||
end
|
||||
|
||||
@@ -24,7 +24,8 @@ class CreateHttpLogEntries < ActiveRecord::Migration[7.0]
|
||||
t.datetime :requested_at, null: false
|
||||
t.timestamps null: false
|
||||
|
||||
t.index ["uri_host", "uri_path", "uri_query"], name: "index_http_log_entries_on_uri_host_path_query"
|
||||
t.index %w[uri_host uri_path uri_query],
|
||||
name: "index_http_log_entries_on_uri_host_path_query"
|
||||
end
|
||||
|
||||
create_table :http_log_entry_headers do |t|
|
||||
|
||||
@@ -1,8 +1,24 @@
|
||||
class AddForeignKeyConstraints < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
add_foreign_key :http_log_entries, :blob_entries, column: :response_sha256, primary_key: :sha256, validate: true
|
||||
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :request_headers_id, primary_key: :id, validate: true
|
||||
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :response_headers_id, primary_key: :id, validate: true
|
||||
add_foreign_key :blob_entries, :blob_entries, column: :base_sha256, primary_key: :sha256, validate: true
|
||||
add_foreign_key :http_log_entries,
|
||||
:blob_entries,
|
||||
column: :response_sha256,
|
||||
primary_key: :sha256,
|
||||
validate: true
|
||||
add_foreign_key :http_log_entries,
|
||||
:http_log_entry_headers,
|
||||
column: :request_headers_id,
|
||||
primary_key: :id,
|
||||
validate: true
|
||||
add_foreign_key :http_log_entries,
|
||||
:http_log_entry_headers,
|
||||
column: :response_headers_id,
|
||||
primary_key: :id,
|
||||
validate: true
|
||||
add_foreign_key :blob_entries,
|
||||
:blob_entries,
|
||||
column: :base_sha256,
|
||||
primary_key: :sha256,
|
||||
validate: true
|
||||
end
|
||||
end
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user