refactor user script, add deleted_at to e621 posts
This commit is contained in:
1
Gemfile
1
Gemfile
@@ -105,6 +105,7 @@ gem "daemons"
|
||||
gem "delayed_job_worker_pool"
|
||||
gem "ripcord"
|
||||
gem "influxdb-client"
|
||||
gem "discard"
|
||||
# gem 'cli-ui'
|
||||
# gem "paper_trail"
|
||||
# gem "paper_trail-hashdiff"
|
||||
|
||||
@@ -123,6 +123,8 @@ GEM
|
||||
delayed_job (>= 3.0, < 4.2)
|
||||
diff-lcs (1.5.0)
|
||||
diffy (3.4.2)
|
||||
discard (1.2.1)
|
||||
activerecord (>= 4.2, < 8)
|
||||
domain_name (0.5.20190701)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
erubi (1.12.0)
|
||||
@@ -331,6 +333,7 @@ DEPENDENCIES
|
||||
delayed_job_web
|
||||
delayed_job_worker_pool
|
||||
diffy
|
||||
discard
|
||||
http-cookie
|
||||
importmap-rails
|
||||
influxdb-client
|
||||
|
||||
@@ -1,32 +1,32 @@
|
||||
class Domain::Fa::ApiController < ApplicationController
|
||||
include ActionController::Live
|
||||
|
||||
skip_before_action :verify_authenticity_token,
|
||||
only: %i[ enqueue_objects ]
|
||||
only: %i[ enqueue_objects object_statuses ]
|
||||
|
||||
def enqueue_objects
|
||||
def object_statuses
|
||||
fa_ids = (params[:fa_ids] || []).map(&:to_i)
|
||||
url_names = (params[:url_names] || [])
|
||||
url_names_to_enqueue = Set.new(params[:url_names_to_enqueue] || [])
|
||||
|
||||
Rails.logger.info "fa_ids: #{fa_ids.inspect}"
|
||||
Rails.logger.info "url_names: #{url_names.inspect}"
|
||||
Rails.logger.info "highpri: #{url_names_to_enqueue.inspect}"
|
||||
jobs_async = Delayed::Backend::ActiveRecord::Job.
|
||||
select(:id, :queue, :handler).
|
||||
where(queue: "manual").
|
||||
load_async
|
||||
|
||||
fa_id_to_post = Domain::Fa::Post.includes(:file).where(fa_id: fa_ids).map do |post|
|
||||
users_async = Domain::Fa::User.
|
||||
where(url_name: url_names).
|
||||
load_async
|
||||
|
||||
fa_id_to_post = Domain::Fa::Post.
|
||||
includes(:file).
|
||||
where(fa_id: fa_ids).
|
||||
map do |post|
|
||||
[post.fa_id, post]
|
||||
end.to_h
|
||||
|
||||
url_name_to_user = Domain::Fa::User.where(url_name: url_names).map do |user|
|
||||
[user.url_name, user]
|
||||
end.to_h
|
||||
|
||||
posts_response = {}
|
||||
users_response = {}
|
||||
|
||||
fa_ids.each do |fa_id|
|
||||
post = fa_id_to_post[fa_id]
|
||||
try_enqueue_post_scan(post, fa_id)
|
||||
|
||||
post_response = {
|
||||
terminal_state: false,
|
||||
@@ -55,9 +55,12 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
posts_response[fa_id] = post_response
|
||||
end
|
||||
|
||||
url_name_to_user = users_async.map do |user|
|
||||
[user.url_name, user]
|
||||
end.to_h
|
||||
|
||||
url_names.each do |url_name|
|
||||
user = url_name_to_user[url_name]
|
||||
try_enqueue_user_scan(user, url_name, url_names_to_enqueue.include?(url_name))
|
||||
|
||||
if user
|
||||
user_response = {
|
||||
@@ -88,7 +91,7 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
hash[key] = 0
|
||||
end
|
||||
|
||||
Delayed::Backend::ActiveRecord::Job.select(:id, :queue, :handler).where(queue: "manual").find_each do |job|
|
||||
jobs_async.each do |job|
|
||||
queue_depths[job.payload_object.job_data["job_class"]] += 1
|
||||
end
|
||||
|
||||
@@ -111,40 +114,70 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
depths: queue_depths,
|
||||
},
|
||||
}
|
||||
response.stream.close
|
||||
perform_enqueues!
|
||||
ensure
|
||||
response.stream.close
|
||||
end
|
||||
|
||||
def enqueue_objects
|
||||
@enqueue_counts ||= Hash.new { |h, k| h[k] = 0 }
|
||||
|
||||
fa_ids = (params[:fa_ids] || []).map(&:to_i)
|
||||
url_names = (params[:url_names] || [])
|
||||
url_names_to_enqueue = Set.new(params[:url_names_to_enqueue] || [])
|
||||
|
||||
fa_id_to_post = Domain::Fa::Post.includes(:file).where(fa_id: fa_ids).map do |post|
|
||||
[post.fa_id, post]
|
||||
end.to_h
|
||||
|
||||
url_name_to_user = Domain::Fa::User.where(url_name: url_names).map do |user|
|
||||
[user.url_name, user]
|
||||
end.to_h
|
||||
|
||||
fa_ids.each do |fa_id|
|
||||
post = fa_id_to_post[fa_id]
|
||||
defer_post_scan(post, fa_id)
|
||||
end
|
||||
|
||||
url_names.each do |url_name|
|
||||
user = url_name_to_user[url_name]
|
||||
defer_user_scan(user, url_name, url_names_to_enqueue.include?(url_name))
|
||||
end
|
||||
|
||||
enqueue_deferred!
|
||||
|
||||
render json: {
|
||||
post_scans: @enqueue_counts[Domain::Fa::Job::ScanPostJob],
|
||||
post_files: @enqueue_counts[Domain::Fa::Job::ScanFileJob],
|
||||
user_pages: @enqueue_counts[Domain::Fa::Job::UserPageJob],
|
||||
user_galleries: @enqueue_counts[Domain::Fa::Job::UserGalleryJob],
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def try_enqueue_post_scan(post, fa_id)
|
||||
def defer_post_scan(post, fa_id)
|
||||
if !post || !post.scanned?
|
||||
enqueue_manual(Domain::Fa::Job::ScanPostJob, {
|
||||
defer_manual(Domain::Fa::Job::ScanPostJob, {
|
||||
fa_id: fa_id,
|
||||
}, -17)
|
||||
return
|
||||
end
|
||||
|
||||
if post && post.file_uri && !post.file.present?
|
||||
enqueue_manual(Domain::Fa::Job::ScanFileJob, {
|
||||
post: post,
|
||||
}, -15, "static_file")
|
||||
return defer_manual(Domain::Fa::Job::ScanFileJob, {
|
||||
post: post,
|
||||
}, -15, "static_file")
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
def try_enqueue_user_scan(user, url_name, highpri)
|
||||
def defer_user_scan(user, url_name, highpri)
|
||||
if !user || user.due_for_page_scan?
|
||||
enqueue_manual(Domain::Fa::Job::UserPageJob, {
|
||||
defer_manual(Domain::Fa::Job::UserPageJob, {
|
||||
url_name: url_name,
|
||||
}, highpri ? -16 : -6)
|
||||
return
|
||||
end
|
||||
|
||||
if !user || user.due_for_gallery_scan?
|
||||
enqueue_manual(Domain::Fa::Job::UserGalleryJob, {
|
||||
defer_manual(Domain::Fa::Job::UserGalleryJob, {
|
||||
url_name: url_name,
|
||||
}, highpri ? -14 : -4)
|
||||
return
|
||||
@@ -153,16 +186,17 @@ class Domain::Fa::ApiController < ApplicationController
|
||||
false
|
||||
end
|
||||
|
||||
def enqueue_manual(klass, args, priority, queue = "manual")
|
||||
def defer_manual(klass, args, priority, queue = "manual")
|
||||
@@enqueue_deduper ||= Set.new
|
||||
return unless @@enqueue_deduper.add?([klass, args, priority])
|
||||
|
||||
@enqueue_manual ||= []
|
||||
@enqueue_manual << [klass, args, priority, queue]
|
||||
@deferred_jobs ||= []
|
||||
@deferred_jobs << [klass, args, priority, queue]
|
||||
@enqueue_counts[klass] += 1
|
||||
end
|
||||
|
||||
def perform_enqueues!
|
||||
while job = (@enqueue_manual || []).shift
|
||||
def enqueue_deferred!
|
||||
while job = (@deferred_jobs || []).shift
|
||||
klass, args, priority, queue = job
|
||||
klass.set(priority: priority, queue: queue).perform_later(args)
|
||||
end
|
||||
|
||||
@@ -8,8 +8,8 @@ class LegacyImport::E621CsvPostImporter < LegacyImport::BulkImportJob
|
||||
start_at:
|
||||
)
|
||||
@csv_file = CSV.new(File.open(csv_path, "r+"), headers: true)
|
||||
@forks = forks || 16
|
||||
@batch_size = batch_size || @forks * 32
|
||||
@forks = forks || 2
|
||||
@batch_size = batch_size || @forks * 64
|
||||
@start_at = start_at || 0
|
||||
@start_time = Time.now
|
||||
|
||||
@@ -26,22 +26,45 @@ class LegacyImport::E621CsvPostImporter < LegacyImport::BulkImportJob
|
||||
|
||||
def run_impl
|
||||
progress = 0
|
||||
while row = @csv_file.shift&.to_h
|
||||
e621_id = row["id"].to_i
|
||||
if @start_at && e621_id < @start_at
|
||||
next
|
||||
while true
|
||||
batch = []
|
||||
while row = @csv_file.shift&.to_h
|
||||
e621_id = row["id"].to_i
|
||||
if @start_at && e621_id < @start_at
|
||||
@start_time = Time.now
|
||||
next
|
||||
end
|
||||
batch << row
|
||||
break if batch.size >= @batch_size
|
||||
end
|
||||
|
||||
break if batch.empty?
|
||||
last_e621_id = batch.last["id"].to_i
|
||||
if @forks == 1
|
||||
progress += import_e621_rows(batch)
|
||||
else
|
||||
progress += ForkFuture.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_e621_rows(fork_batch)
|
||||
end.sum
|
||||
end
|
||||
progress += import_e621_post(row)
|
||||
rate = progress.to_f / (Time.now - @start_time)
|
||||
puts "finish batch, last id #{e621_id} - #{progress} - #{rate.round(1)} / second"
|
||||
write_last_id e621_id
|
||||
write_last_id last_e621_id
|
||||
end
|
||||
progress
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def import_e621_post(row)
|
||||
def import_e621_rows(rows)
|
||||
progress = 0
|
||||
rows.each do |row|
|
||||
progress += import_e621_row(row)
|
||||
end
|
||||
progress
|
||||
end
|
||||
|
||||
def import_e621_row(row)
|
||||
e621_id = row["id"].to_i
|
||||
post = Domain::E621::Post.find_by(e621_id: e621_id)
|
||||
md5 = row["md5"]
|
||||
@@ -56,10 +79,22 @@ class LegacyImport::E621CsvPostImporter < LegacyImport::BulkImportJob
|
||||
e621_id: e621_id,
|
||||
md5: md5,
|
||||
})
|
||||
|
||||
existing = Domain::E621::Post.find_by(md5: md5)
|
||||
if existing
|
||||
existing.state_detail["discarded_reason"] = "duplicate found during csv import: #{e621_id}",
|
||||
existing.discard
|
||||
puts "discard #{existing.id} / #{existing.e621_id} / #{existing.md5} in favor of #{e621_id}"
|
||||
end
|
||||
end
|
||||
|
||||
file_ext = row["file_ext"]
|
||||
post.file_url_str = "https://static1.e621.net/data/#{md5[0...2]}/#{md5[2...4]}/#{md5}.#{file_ext}"
|
||||
post.file ||= begin
|
||||
le = HttpLogEntry.find_by_uri_host_path(post.file_url_str)
|
||||
# puts "look up log entry for #{post.e621_id} (found: #{!le.nil?})"
|
||||
le
|
||||
end
|
||||
post.sources_array = row["source"].split("\n")
|
||||
post.rating = row["rating"]
|
||||
post.tags_array = row["tag_string"].split(" ").sort
|
||||
@@ -83,5 +118,8 @@ class LegacyImport::E621CsvPostImporter < LegacyImport::BulkImportJob
|
||||
else
|
||||
return 0
|
||||
end
|
||||
rescue
|
||||
binding.pry
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,6 +2,10 @@ class Domain::E621::Post < ReduxApplicationRecord
|
||||
self.table_name = "domain_e621_posts"
|
||||
has_lite_trail(schema_version: 1, separate_versions_table: true)
|
||||
|
||||
include Discard::Model
|
||||
self.discard_column = :deleted_at
|
||||
default_scope -> { kept }
|
||||
|
||||
# see state_detail for scan_error/file_error
|
||||
enum state: %i[ok scan_error, file_error]
|
||||
enum rating: %i[s q e]
|
||||
|
||||
@@ -38,6 +38,14 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
:requested_at
|
||||
)
|
||||
|
||||
def self.find_by_uri_host_path(uri)
|
||||
uri = Addressable::URI.parse(uri) unless uri.is_a?(Addressable::URI)
|
||||
find_by(
|
||||
uri_host: uri.host,
|
||||
uri_path: uri.path,
|
||||
)
|
||||
end
|
||||
|
||||
def self.build_from_legacy(legacy_model)
|
||||
response_body = legacy_model.response_body
|
||||
can_reconstruct_be =
|
||||
|
||||
@@ -17,7 +17,7 @@ redux_prod: &redux_prod
|
||||
username: scraper_redux
|
||||
password: pdkFLqRmQwPUPaDDC4pX
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
pool: 2
|
||||
|
||||
redux_test: &redux_test
|
||||
adapter: postgresql
|
||||
@@ -27,7 +27,7 @@ redux_test: &redux_test
|
||||
username: scraper_redux
|
||||
password: pdkFLqRmQwPUPaDDC4pX
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
pool: 2
|
||||
|
||||
legacy_prod: &legacy_prod
|
||||
adapter: postgresql
|
||||
@@ -40,7 +40,7 @@ legacy_prod: &legacy_prod
|
||||
password: pdkFLqRmQwPUPaDDC4pX
|
||||
migrations_paths: db/legacy_migrate
|
||||
database_tasks: false
|
||||
pool: 4
|
||||
pool: 2
|
||||
|
||||
legacy_prod_readonly: &legacy_prod_readonly
|
||||
adapter: postgresql
|
||||
@@ -51,7 +51,7 @@ legacy_prod_readonly: &legacy_prod_readonly
|
||||
password: zL7zDRXycLhLFJLQj5Zh
|
||||
migrations_paths: db/legacy_migrate
|
||||
database_tasks: false
|
||||
pool: 4
|
||||
pool: 2
|
||||
|
||||
development:
|
||||
redux:
|
||||
|
||||
@@ -7,6 +7,7 @@ Rails.application.routes.draw do
|
||||
end
|
||||
resources :api, only: [] do
|
||||
post :enqueue_objects, on: :collection
|
||||
post :object_statuses, on: :collection
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
class AddDeletedAtToE6Posts < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
change_table :domain_e621_posts do |t|
|
||||
t.datetime :deleted_at
|
||||
end
|
||||
|
||||
remove_index :domain_e621_posts, :md5
|
||||
add_index :domain_e621_posts, :md5, where: "deleted_at is null", unique: true
|
||||
end
|
||||
end
|
||||
5
db/schema.rb
generated
5
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_01_013456) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_06_181250) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
@@ -78,9 +78,10 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_01_013456) do
|
||||
t.bigint "parent_e621_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.datetime "deleted_at"
|
||||
t.index ["e621_id"], name: "index_domain_e621_posts_on_e621_id", unique: true
|
||||
t.index ["file_id"], name: "index_domain_e621_posts_on_file_id"
|
||||
t.index ["md5"], name: "index_domain_e621_posts_on_md5", unique: true
|
||||
t.index ["md5"], name: "index_domain_e621_posts_on_md5", unique: true, where: "(deleted_at IS NULL)"
|
||||
t.index ["parent_e621_id"], name: "index_domain_e621_posts_on_parent_e621_id"
|
||||
end
|
||||
|
||||
|
||||
365
enqueuer.user.js
365
enqueuer.user.js
@@ -12,14 +12,21 @@
|
||||
// ==/UserScript==
|
||||
|
||||
function setupNavbar() {
|
||||
const navbar = document.querySelector("nav");
|
||||
const navbarStatusNode = document.createElement("li");
|
||||
navbarStatusNode.classList = "lileft";
|
||||
navbarStatusNode.style.height = "100%";
|
||||
navbarStatusNode.style.display = "flex";
|
||||
navbarStatusNode.innerHTML = "loading...";
|
||||
navbar.querySelector("ul > div").after(navbarStatusNode);
|
||||
return navbarStatusNode;
|
||||
|
||||
const navbar = document.querySelector("nav");
|
||||
if (navbar != null) {
|
||||
navbar.querySelector("ul > div").after(navbarStatusNode);
|
||||
return navbarStatusNode;
|
||||
} else {
|
||||
// watch pages, etc don't have a navbar - create one at the top of the page
|
||||
const center = document.body.querySelector("div[align=center]");
|
||||
center.prepend(navbarStatusNode);
|
||||
return navbarStatusNode;
|
||||
}
|
||||
}
|
||||
|
||||
function makeLargeStatusNode(opts = {}) {
|
||||
@@ -29,6 +36,9 @@ function makeLargeStatusNode(opts = {}) {
|
||||
if (opts.smaller == null) {
|
||||
opts.smaller = true;
|
||||
}
|
||||
if (opts.style == null) {
|
||||
opts.style = {};
|
||||
}
|
||||
|
||||
const statusNode = document.createElement(opts.type);
|
||||
statusNode.style.cssText = "margin-left: 5px; color: #b7b7b7!important; display: inline";
|
||||
@@ -36,6 +46,10 @@ function makeLargeStatusNode(opts = {}) {
|
||||
statusNode.style.fontSize = "80%";
|
||||
}
|
||||
|
||||
for (const [property, value] of Object.entries(opts.style)) {
|
||||
statusNode.style.setProperty(property, value);
|
||||
}
|
||||
|
||||
statusNode.innerHTML = "(...)";
|
||||
return statusNode;
|
||||
}
|
||||
@@ -278,6 +292,24 @@ function gatherUserElements() {
|
||||
};
|
||||
});
|
||||
|
||||
// on a /watchlist/by/ or /watchlist/to page
|
||||
const watchListUserLinks = [...document.querySelectorAll(".watch-list-items.watch-row a")]
|
||||
.map(elem => {
|
||||
const statusNode = makeLargeStatusNode({
|
||||
smaller: false, style: {
|
||||
display: "block",
|
||||
'margin-bottom': "5px",
|
||||
'font-size': "50%",
|
||||
}
|
||||
});
|
||||
elem.parentNode.appendChild(statusNode);
|
||||
return {
|
||||
urlName: urlNameFromUserHref(elem.href),
|
||||
shouldEnqueue: true,
|
||||
statusNode
|
||||
};
|
||||
});
|
||||
|
||||
return [
|
||||
...userSubmissionLinks,
|
||||
...userPageMain,
|
||||
@@ -285,7 +317,8 @@ function gatherUserElements() {
|
||||
...iconUsernames,
|
||||
...watchersAndWatchList,
|
||||
...submissionDataElems,
|
||||
...submissionContainerUserLinks
|
||||
...submissionContainerUserLinks,
|
||||
...watchListUserLinks,
|
||||
];
|
||||
}
|
||||
|
||||
@@ -306,18 +339,106 @@ function urlNameFromUserHref(href) {
|
||||
}
|
||||
|
||||
function faIdFromViewHref(href) {
|
||||
const viewPageRegex = /\/view\/(\d+)/;
|
||||
const viewPageRegex = /\/(view|full)\/(\d+)/;
|
||||
const match = href.match(viewPageRegex);
|
||||
const faId = match && match[1] || null;
|
||||
const faId = match && match[2] || null;
|
||||
if (faId) {
|
||||
return parseInt(faId);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function renderTable(stats, styleOpts = {}) {
|
||||
if (stats.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (styleOpts['border-collapse'] == null) {
|
||||
styleOpts['border-collapse'] = "collapse";
|
||||
}
|
||||
|
||||
let table = document.createElement("table");
|
||||
for (const [property, value] of Object.entries(styleOpts)) {
|
||||
table.style.setProperty(property, value);
|
||||
}
|
||||
|
||||
let tbody = document.createElement("tbody");
|
||||
table.appendChild(tbody);
|
||||
|
||||
stats.each(({ name, value, sep, nameAlign, valueAlign }) => {
|
||||
if (name == "" && value == "") {
|
||||
tbody.innerHTML += `<tr><td>---</td><td>---</td></tr>`;
|
||||
}
|
||||
else {
|
||||
if (sep == null) {
|
||||
sep = ":";
|
||||
}
|
||||
if (nameAlign == null) {
|
||||
nameAlign = "right";
|
||||
}
|
||||
if (valueAlign == null) {
|
||||
valueAlign = "right";
|
||||
}
|
||||
|
||||
tbody.innerHTML += `<tr>
|
||||
<td style="text-align:${nameAlign};padding-right:5px">${name}${sep}</td>
|
||||
<td style="text-align:${valueAlign}">${value}</td>
|
||||
</tr>`;
|
||||
}
|
||||
});
|
||||
return table;
|
||||
}
|
||||
|
||||
function optsForNumRows(numRows) {
|
||||
switch (numRows) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
return { 'line-height': '1.0em', 'font-size': "1.0em" };
|
||||
case 4:
|
||||
return { 'line-height': '0.9em', 'font-size': "0.8em" };
|
||||
default:
|
||||
return { 'line-height': "0.9em", 'font-size': "0.6em" }
|
||||
}
|
||||
}
|
||||
|
||||
(function () {
|
||||
'use strict';
|
||||
const navbarStatusNode = setupNavbar();
|
||||
const navbarNode = setupNavbar();
|
||||
const navbarPageStatsNode = document.createElement("div");
|
||||
const navbarEnqueueNode = document.createElement("div");
|
||||
const navbarLiveQueueNode = document.createElement("div");
|
||||
const navbarLiveEntityNode = document.createElement("div");
|
||||
|
||||
navbarPageStatsNode.innerHTML = "querying...";
|
||||
navbarEnqueueNode.innerHTML = "enqueueing...";
|
||||
navbarLiveQueueNode.innerHTML = "queue loading...";
|
||||
navbarLiveEntityNode.innerHTML = "entity loading...";
|
||||
|
||||
[
|
||||
navbarPageStatsNode,
|
||||
navbarEnqueueNode,
|
||||
navbarLiveQueueNode,
|
||||
navbarLiveEntityNode
|
||||
].forEach(node => {
|
||||
node.style.display = "flex";
|
||||
node.style.marginRight = "5px";
|
||||
});
|
||||
|
||||
[
|
||||
navbarPageStatsNode,
|
||||
navbarEnqueueNode,
|
||||
navbarLiveQueueNode
|
||||
].forEach(node => {
|
||||
node.style.paddingRight = "5px";
|
||||
node.style.borderRight = "1px solid #d7d7d7";
|
||||
});
|
||||
|
||||
navbarNode.append(navbarPageStatsNode);
|
||||
navbarNode.append(navbarEnqueueNode);
|
||||
navbarNode.append(navbarLiveQueueNode);
|
||||
navbarNode.append(navbarLiveEntityNode);
|
||||
|
||||
const userElements = gatherUserElements();
|
||||
const urlNames = [...new Set(userElements.map(({ urlName }) => urlName))];
|
||||
@@ -329,116 +450,81 @@ function faIdFromViewHref(href) {
|
||||
const postElements = gatherPostElements();
|
||||
const faIds = [...new Set(postElements.map(({ faId }) => faId))];
|
||||
|
||||
let elemsCountsNode = document.createElement("div");
|
||||
elemsCountsNode.style.width = "100%";
|
||||
elemsCountsNode.style.height = "100%";
|
||||
elemsCountsNode.style.display = "flex";
|
||||
elemsCountsNode.style.flexDirection = "row";
|
||||
elemsCountsNode.style.gap = "1em";
|
||||
function renderLiveQueueStats({
|
||||
livePostsStats,
|
||||
liveQueueStats,
|
||||
}) {
|
||||
let elemsCountsNode = document.createElement("div");
|
||||
elemsCountsNode.style.width = "100%";
|
||||
elemsCountsNode.style.height = "100%";
|
||||
elemsCountsNode.style.display = "flex";
|
||||
elemsCountsNode.style.flexDirection = "row";
|
||||
elemsCountsNode.style.gap = "1em";
|
||||
|
||||
let postsStats = [
|
||||
{ name: "not seen", value: "---" },
|
||||
{ name: "ok", value: "---" },
|
||||
{ name: "scanned", value: "---" },
|
||||
{ name: "have file", value: "---" },
|
||||
];
|
||||
navbarLiveQueueNode.innerHTML = "";
|
||||
navbarLiveQueueNode.appendChild(elemsCountsNode);
|
||||
|
||||
let queueStats = [
|
||||
{ name: "queue depths", value: "---" },
|
||||
{ name: "", value: "" },
|
||||
{ name: "", value: "" },
|
||||
{ name: "", value: "" },
|
||||
];
|
||||
|
||||
let pageSpecificStats = [];
|
||||
|
||||
function renderStats() {
|
||||
navbarStatusNode.innerHTML = "";
|
||||
navbarStatusNode.appendChild(elemsCountsNode);
|
||||
|
||||
function renderTable(stats, styleOpts = {}) {
|
||||
if (stats.length == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (styleOpts['border-collapse'] == null) {
|
||||
styleOpts['border-collapse'] = "collapse";
|
||||
}
|
||||
|
||||
let table = document.createElement("table");
|
||||
for (const [property, value] of Object.entries(styleOpts)) {
|
||||
table.style.setProperty(property, value);
|
||||
}
|
||||
|
||||
let tbody = document.createElement("tbody");
|
||||
table.appendChild(tbody);
|
||||
|
||||
stats.each(({ name, value, sep, nameAlign, valueAlign }) => {
|
||||
if (name == "" && value == "") {
|
||||
tbody.innerHTML += `<tr><td>---</td><td>---</td></tr>`;
|
||||
}
|
||||
else {
|
||||
if (sep == null) {
|
||||
sep = ":";
|
||||
}
|
||||
if (nameAlign == null) {
|
||||
nameAlign = "right";
|
||||
}
|
||||
if (valueAlign == null) {
|
||||
valueAlign = "right";
|
||||
}
|
||||
|
||||
tbody.innerHTML += `<tr>
|
||||
<td style="text-align:${nameAlign};padding-right:5px">${name}${sep}</td>
|
||||
<td style="text-align:${valueAlign}">${value}</td>
|
||||
</tr>`;
|
||||
}
|
||||
});
|
||||
return table;
|
||||
}
|
||||
|
||||
function optsForNumRows(numRows) {
|
||||
switch (numRows) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
return { 'line-height': '1.0em', 'font-size': "1.0em" };
|
||||
case 4:
|
||||
return { 'line-height': '0.9em', 'font-size': "0.8em" };
|
||||
default:
|
||||
return { 'line-height': "0.9em", 'font-size': "0.6em" }
|
||||
}
|
||||
}
|
||||
|
||||
const baseStatsTable = renderTable([
|
||||
{ name: "num users", value: urlNames.length },
|
||||
{ name: "num posts", value: faIds.length }
|
||||
], { ...optsForNumRows(2), width: "auto" });
|
||||
|
||||
const postsStatsTable = renderTable(postsStats, {
|
||||
...optsForNumRows(postsStats.length), width: "auto"
|
||||
const postsStatsTable = renderTable(livePostsStats, {
|
||||
...optsForNumRows(livePostsStats.length), width: "auto"
|
||||
});
|
||||
|
||||
const queueStatsTable = renderTable(queueStats, {
|
||||
...optsForNumRows(queueStats.length), width: "auto"
|
||||
const queueStatsTable = renderTable(liveQueueStats, {
|
||||
...optsForNumRows(liveQueueStats.length), width: "auto"
|
||||
});
|
||||
|
||||
const pageStatsTable = renderTable(pageSpecificStats, {
|
||||
...optsForNumRows(pageSpecificStats.length), width: "auto"
|
||||
});
|
||||
|
||||
elemsCountsNode.innerHTML = "";
|
||||
baseStatsTable && elemsCountsNode.appendChild(baseStatsTable);
|
||||
postsStatsTable && elemsCountsNode.appendChild(postsStatsTable);
|
||||
queueStatsTable && elemsCountsNode.appendChild(queueStatsTable);
|
||||
pageStatsTable && elemsCountsNode.appendChild(pageStatsTable);
|
||||
}
|
||||
renderStats();
|
||||
|
||||
function poll() {
|
||||
function renderLiveEntityStats(liveEntityStats) {
|
||||
const liveEntityStatsTable = renderTable(liveEntityStats, {
|
||||
...optsForNumRows(liveEntityStats.length), width: "auto"
|
||||
});
|
||||
navbarLiveEntityNode.innerHTML = "";
|
||||
liveEntityStatsTable && navbarLiveEntityNode.appendChild(liveEntityStatsTable);
|
||||
}
|
||||
|
||||
let completedEnqueue = false;
|
||||
|
||||
GM_xmlhttpRequest({
|
||||
url: 'http://scraper.local:3000/domain/fa/api/enqueue_objects',
|
||||
method: "POST",
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
data: JSON.stringify({ fa_ids: faIds, url_names: urlNames, url_names_to_enqueue: urlNamesToEnqueue }),
|
||||
onload: response => {
|
||||
console.log("response: ", response);
|
||||
completedEnqueue = true;
|
||||
|
||||
if (response.status === 200) {
|
||||
const jsonResponse = JSON.parse(response.response);
|
||||
console.log("json: ", jsonResponse);
|
||||
handleEnqueueResponse(jsonResponse);
|
||||
} else {
|
||||
navbarLiveQueueNode.innerHTML = `<b>${response.status} enqueing</b>`;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
function handleEnqueueResponse(jsonResponse) {
|
||||
navbarEnqueueNode.innerHTML = "";
|
||||
|
||||
const enqueueStats = Object
|
||||
.entries(jsonResponse)
|
||||
.map(([name, value]) => ({ name: name.split("_").join(" "), value }));
|
||||
const enqueueStatsTable = renderTable(enqueueStats, {
|
||||
...optsForNumRows(enqueueStats.length), width: "auto"
|
||||
});
|
||||
|
||||
enqueueStatsTable && navbarEnqueueNode.append(enqueueStatsTable);
|
||||
}
|
||||
|
||||
function pollLiveStats() {
|
||||
GM_xmlhttpRequest({
|
||||
url: 'http://scraper.local:3000/domain/fa/api/enqueue_objects',
|
||||
url: 'http://scraper.local:3000/domain/fa/api/object_statuses',
|
||||
method: "POST",
|
||||
headers: {
|
||||
'Accept': 'application/json',
|
||||
@@ -450,12 +536,12 @@ function faIdFromViewHref(href) {
|
||||
if (response.status === 200) {
|
||||
const jsonResponse = JSON.parse(response.response);
|
||||
console.log("json: ", jsonResponse);
|
||||
const keepPolling = handleResponse(jsonResponse);
|
||||
if (keepPolling) {
|
||||
setTimeout(() => poll(), 2500);
|
||||
const keepPolling = handleLiveStatsResponse(jsonResponse);
|
||||
if (!completedEnqueue || keepPolling) {
|
||||
setTimeout(() => pollLiveStats(), 2500);
|
||||
}
|
||||
else {
|
||||
console.log("reached terminal state on all objects");
|
||||
console.log("reached terminal state");
|
||||
}
|
||||
} else {
|
||||
navbarStatusNode.innerHTML = `<b>${response.status} from scraper</b>`;
|
||||
@@ -464,7 +550,7 @@ function faIdFromViewHref(href) {
|
||||
});
|
||||
}
|
||||
|
||||
function handleResponse(jsonResponse) {
|
||||
function handleLiveStatsResponse(jsonResponse) {
|
||||
let allTerminalState = true;
|
||||
|
||||
let numNotSeenPosts = 0;
|
||||
@@ -493,32 +579,38 @@ function faIdFromViewHref(href) {
|
||||
});
|
||||
}
|
||||
|
||||
postsStats = [
|
||||
const livePostsStats = [
|
||||
{ name: "not seen", value: numNotSeenPosts },
|
||||
{ name: "ok", value: numOkPosts },
|
||||
{ name: "scanned", value: numScannedPosts },
|
||||
{ name: "have file", value: numHaveFile },
|
||||
];
|
||||
|
||||
queueStats = Object
|
||||
let liveQueueStats = Object
|
||||
.entries(jsonResponse.queues.depths)
|
||||
.map(([queue, depth]) => ({ name: queue, value: depth }));
|
||||
|
||||
queueStats = [
|
||||
{ name: "queue depths", value: `${jsonResponse.queues.total_depth} total` },
|
||||
...queueStats
|
||||
liveQueueStats = [
|
||||
{ name: "total depth", value: `${jsonResponse.queues.total_depth}` },
|
||||
...liveQueueStats
|
||||
];
|
||||
while (queueStats.length < 4) {
|
||||
queueStats.push({ name: "", value: "" });
|
||||
|
||||
while (liveQueueStats.length < 4) {
|
||||
liveQueueStats.push({ name: "", value: "" });
|
||||
}
|
||||
|
||||
allTerminalState &&= jsonResponse.queues.total_depth == 0;
|
||||
|
||||
let liveEntityStats = [
|
||||
{ name: "no entity", value: "", sep: '' }
|
||||
];
|
||||
|
||||
const thisPageFaId = faIdFromViewHref(window.location.href);
|
||||
const pssCommon = { sep: '', valueAlign: 'left' };
|
||||
if (thisPageFaId != null) {
|
||||
const postData = jsonResponse.posts[thisPageFaId];
|
||||
pageSpecificStats = [
|
||||
{ name: 'link', value: `<a target="_blank" href="${postData.info_url}">${thisPageFaId}</a>`, ...pssCommon },
|
||||
liveEntityStats = [
|
||||
{ name: 'link', value: `<a target="_blank" href="${postData.info_url}" style="text-decoration: underline dotted">${thisPageFaId}</a>`, ...pssCommon },
|
||||
{ name: `seen`, value: postData.seen_at, ...pssCommon },
|
||||
{ name: `scanned`, value: postData.scanned_at, ...pssCommon },
|
||||
{ name: `downloaded`, value: postData.downloaded_at, ...pssCommon }
|
||||
@@ -528,7 +620,7 @@ function faIdFromViewHref(href) {
|
||||
const thisPageUrlName = urlNameFromUserHref(window.location.href);
|
||||
if (thisPageUrlName != null) {
|
||||
const userData = jsonResponse.users[thisPageUrlName];
|
||||
pageSpecificStats = [
|
||||
liveEntityStats = [
|
||||
{ name: '', value: thisPageUrlName, ...pssCommon },
|
||||
{ name: 'first seen', value: userData.created_at, ...pssCommon },
|
||||
{ name: 'page scan', value: userData.scanned_page_at, ...pssCommon },
|
||||
@@ -536,10 +628,37 @@ function faIdFromViewHref(href) {
|
||||
];
|
||||
}
|
||||
|
||||
renderStats();
|
||||
renderLiveQueueStats({
|
||||
livePostsStats,
|
||||
liveQueueStats,
|
||||
});
|
||||
renderLiveEntityStats(liveEntityStats);
|
||||
|
||||
return !allTerminalState;
|
||||
}
|
||||
|
||||
poll();
|
||||
// right off, can render the page stats table
|
||||
const pageStatsTable = renderTable([
|
||||
{ name: "page users", value: urlNames.length },
|
||||
{ name: "page posts", value: faIds.length }
|
||||
], { ...optsForNumRows(2), width: "auto" });
|
||||
navbarPageStatsNode.innerHTML = "";
|
||||
navbarPageStatsNode.append(pageStatsTable);
|
||||
|
||||
renderLiveQueueStats({
|
||||
livePostsStats: [
|
||||
{ name: "not seen", value: "---" },
|
||||
{ name: "ok", value: "---" },
|
||||
{ name: "scanned", value: "---" },
|
||||
{ name: "have file", value: "---" }
|
||||
],
|
||||
liveQueueStats: [
|
||||
{ name: "queue depths", value: "---" },
|
||||
{ name: "", value: "" },
|
||||
{ name: "", value: "" },
|
||||
{ name: "", value: "" },
|
||||
]
|
||||
});
|
||||
|
||||
pollLiveStats();
|
||||
})();
|
||||
Reference in New Issue
Block a user