basic e621

This commit is contained in:
2023-03-01 08:13:49 -08:00
parent a9ef897c42
commit d3a864b92f
17 changed files with 345 additions and 5 deletions

View File

@@ -1,4 +1,6 @@
class Domain::Fa::Job::FaJobBase < Scraper::JobBase
discard_on ActiveJob::DeserializationError
DATE_HELPER = Class.new.extend(ActionView::Helpers::DateHelper)
def self.build_http_client

View File

@@ -0,0 +1,11 @@
class Domain::E621::Post < ReduxApplicationRecord
self.table_name = "domain_e621_posts"
has_lite_trail(schema_version: 1)
has_many :taggings,
class_name: "Domain::E621::Tagging"
has_many :tags,
class_name: "Domain::E621::Tag",
through: :taggings
end

View File

@@ -0,0 +1,3 @@
class Domain::E621::Tag < ReduxApplicationRecord
self.table_name = "domain_e621_tags"
end

View File

@@ -0,0 +1,19 @@
class Domain::E621::Tagging < ReduxApplicationRecord
self.table_name = "domain_e621_taggings"
belongs_to :post,
class_name: "Domain::E621::Post"
belongs_to :tag,
class_name: "Domain::E621::Tag"
enum type: [
:type_general,
:type_artist,
:type_copyright,
:type_character,
:type_species,
:type_invalid,
:type_meta,
:type_lore,
]
validates_inclusion_of(:type, in: self.types.keys)
end

View File

@@ -0,0 +1,129 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_posts
#
# id :integer not null, primary key
# e621_id :integer not null
# md5 :string not null
# sources :string
# file_url :string not null
# file_ext :string not null
# description :string
# rating :integer
# width :integer
# height :integer not null
# tags_string :string not null
# status :integer
# score :integer
# removed :boolean
# created_at :datetime not null
# updated_at :datetime not null
# artists :string
# e621_count :integer
# author :string
# e621_status :string
# blob_entry_id :integer
# imgsearch_entry_id :integer
#
class Legacy::E621::Post < LegacyApplicationRecord
self.table_name = "e621_posts"
validates_presence_of :e621_id, :md5, :author, :file_url, :file_ext, :rating, :tags_string, :status, :score
validates_uniqueness_of :md5, :e621_id
serialize :sources, Array
serialize :artists, Array
belongs_to :blob_entry,
class_name: "Legacy::BlobEntry"
# just inserted into db: :not_processed
# we've checked for the existance of its file on the
# disk and it isn't there: :should_download
# we've made an attempt to download its file: :processed
enum status: %i[not_processed should_download processed processed_404 processed_err]
validates_inclusion_of :status,
in: statuses.keys
has_many :taggings, class_name: "Legacy::E621::Tagging"
has_many :tags, through: :taggings
enum rating: %i[s q e]
validates_inclusion_of :rating,
in: ratings.keys
def file_relative_path
base = File.basename(file_url)
"#{base[0]}/#{base[1]}/#{base}"
end
before_validation do
self.file_ext ||= File.extname(file_path)[1..-1]
end
before_destroy do
blob_entry.dec_refcount
end
def file_path
File.join SConfig.e621_static_dir, file_relative_path
end
def resized_file_path(style)
raise("no md5") unless md5
hashed_path = BlobEntry.file_path_at_depth(
sha256: md5,
depth: 4, stride: 2, hash_length: 32,
)
File.join SConfig.e621_data_dir, "resized", style.to_s, (hashed_path + "." + file_ext)
end
FASource = Struct.new(:type, :id, :url)
def fa_sources
self.sources.flatten.map do |source|
if matches = /furaffinity.net\/view\/(\d+)/.match(source)
fa_id = matches[1]
FASource.new(:post, fa_id.to_i, source)
elsif matches = /furaffinity.net\/(gallery|user)\/([^\/]+)/.match(source)
url_name = FA::User.name_to_url_name(matches[2])
FASource.new(:user, url_name, source)
else
nil
end
end.reject(&:nil?)
end
def update_taggings(assume_total_overwrite: false)
tags_string_split = tags_string.split(/\s+/).map(&:strip).reject(&:blank?)
tags_arr = Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
missing = Set.new(tags_string_split) - Set.new(tags_arr.map(&:value))
missing.each do |missing_val|
tags_arr << Legacy::E621::Tag.find_or_create_by(value: missing_val)
end
# SConfig.logger.info "had to create tags: #{missing.to_a.join(", ")}" if missing.any?
if assume_total_overwrite
self.tags = tags_arr
else
should_be = Set.new(tags_arr)
but_is = Set.new(tags)
removed = but_is - should_be
added = should_be - but_is
tags.delete(removed.to_a)
tags << added.to_a
end
if Set.new(tags.map(&:value)) != Set.new(tags_string_split)
puts "tagging mismatch on #{id} (#{e621_id})"
end
end
end

View File

@@ -0,0 +1,20 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_tags
#
# id :integer not null, primary key
# value :string not null
# e621_id :integer
# type :integer
# e621_count :integer
# created_at :datetime not null
# updated_at :datetime not null
#
class Legacy::E621::Tag < LegacyApplicationRecord
self.table_name = "e621_tags"
self.inheritance_column = nil
validates_presence_of :value
end

View File

@@ -0,0 +1,20 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_taggings
#
# id :integer not null, primary key
# tag_id :integer
# post_id :integer
# created_at :datetime not null
# updated_at :datetime not null
#
class Legacy::E621::Tagging < LegacyApplicationRecord
belongs_to :post, class_name: "Legacy::E621::Post"
belongs_to :tag, class_name: "Legacy::E621::Tag"
validates_presence_of :post, :tag
validates_uniqueness_of :tag_id, scope: :post_id
end

View File

@@ -59,7 +59,7 @@
<tr>
<td class="leftb">
<%= HexUtil.humansize(hle.response.size) %> <br>
(<%= (hle.response.bytes_stored.to_f / hle.response.size).round(2) %>)
(<%= (hle.response.bytes_stored.to_f / hle.response.size).round(2) %>, <%= hle.performed_by %>)
</td>
<td class="time leftb">
<%= time_ago_in_words(hle.created_at) %>

View File

@@ -24,6 +24,9 @@
table td:last-child {
border-right: none;
}
table td:first-child {
text-align: right;
}
iframe {
width: 100%;
@@ -61,6 +64,10 @@
<td>response time</td>
<td><%= rtms == -1 ? "(not recorded)" : "#{rtms}ms" %></td>
</tr>
<tr>
<td>performed by</td>
<td><%= @log_entry.performed_by %></td>
</tr>
<tr>
<td>content type</td>
<td><%= @log_entry.content_type %></td>

View File

@@ -24,7 +24,7 @@ module ReduxScraper
# config.eager_load_paths << Rails.root.join("extras")
config.x.cookies.fa = ReduxScraper::Application.config_for("cookies/fa")
config.x.proxies = {
"proxy-1" => "http://proxy-1.local:8999",
"proxy-1" => "http://proxy-1.local:9292",
}
end
end

View File

@@ -0,0 +1,53 @@
class CreateDomainE621Posts < ActiveRecord::Migration[7.0]
def change
create_table :domain_e621_posts do |t|
t.integer :e621_id, null: false
t.integer :state, null: false
t.jsonb :state_detail
t.string :file_url_str
t.string :description
t.integer :rating
t.integer :score
t.integer :up_score
t.integer :down_score
t.integer :status
t.integer :favorites
t.integer :file_width
t.integer :file_height
t.jsonb :sources_array
t.jsonb :tags_array
t.references :file
t.references :parent_e621
t.timestamps
t.index :e621_id, unique: :true
end
end
end
# id :integer not null, primary key
# e621_id :integer not null
# md5 :string not null
# sources :string
# file_url :string not null
# file_ext :string not null
# description :string
# rating :integer
# width :integer
# height :integer not null
# tags_string :string not null
# status :integer
# score :integer
# removed :boolean
# created_at :datetime not null
# updated_at :datetime not null
# artists :string
# e621_count :integer
# author :string
# e621_status :string
# blob_entry_id :integer
# imgsearch_entry_id :integer

View File

@@ -0,0 +1,14 @@
class CreateDomainE621Tags < ActiveRecord::Migration[7.0]
def change
create_table :domain_e621_tags do |t|
t.string :name, null: false
t.integer :e621_id
t.integer :type
t.string :description
t.timestamps
t.index :name, unique: true
t.index :e621_id, unique: true
end
end
end

View File

@@ -0,0 +1,9 @@
class CreateDomainE621Taggings < ActiveRecord::Migration[7.0]
def change
create_table :domain_e621_taggings do |t|
t.integer :type
t.references :post
t.references :tag
end
end
end

View File

@@ -0,0 +1,11 @@
class AddPerformedByToHttpLogEntries < ActiveRecord::Migration[7.0]
def up
add_column :http_log_entries, :performed_by, :integer
HttpLogEntry.update_all(performed_by: 0) # direct
change_column :http_log_entries, :performed_by, null: false
end
def down
remove_column :http_log_entries, :performed_by
end
end

44
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
ActiveRecord::Schema[7.0].define(version: 2023_03_01_013456) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "plpgsql"
@@ -42,6 +42,47 @@ ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
t.jsonb "args"
t.index ["priority", "run_at"], name: "delayed_jobs_priority_run_at_idx"
t.index ["queue"], name: "delayed_jobs_queue_idx"
t.index ["signature"], name: "delayed_jobs_signature_idx", unique: true
end
create_table "domain_e621_posts", force: :cascade do |t|
t.integer "e621_id", null: false
t.integer "state", null: false
t.jsonb "state_detail"
t.string "file_url_str"
t.string "description"
t.integer "rating"
t.integer "score"
t.integer "status"
t.integer "favorites"
t.integer "file_width"
t.integer "file_height"
t.jsonb "sources_array"
t.jsonb "tags_array"
t.bigint "file_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["e621_id"], name: "index_domain_e621_posts_on_e621_id", unique: true
t.index ["file_id"], name: "index_domain_e621_posts_on_file_id"
end
create_table "domain_e621_taggings", force: :cascade do |t|
t.integer "type"
t.bigint "post_id"
t.bigint "tag_id"
t.index ["post_id"], name: "index_domain_e621_taggings_on_post_id"
t.index ["tag_id"], name: "index_domain_e621_taggings_on_tag_id"
end
create_table "domain_e621_tags", force: :cascade do |t|
t.string "name", null: false
t.integer "e621_id"
t.integer "type"
t.string "description"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["e621_id"], name: "index_domain_e621_tags_on_e621_id", unique: true
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
end
create_table "domain_fa_posts", force: :cascade do |t|
@@ -118,6 +159,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.bigint "caused_by_id"
t.integer "performed_by", null: false
t.index ["caused_by_id"], name: "index_http_log_entries_on_caused_by_id"
t.index ["request_headers_id"], name: "index_http_log_entries_on_request_headers_id"
t.index ["response_headers_id"], name: "index_http_log_entries_on_response_headers_id"

View File

@@ -37,7 +37,7 @@ end
[:default, 1],
[:fa_user_page, 2],
[:fa_user_gallery, 1],
[:fa_post, 4],
[:fa_post, 3],
].each do |queue, workers|
worker_group(queue) do |g|
g.read_ahead = 8

View File

@@ -34,7 +34,7 @@ end
worker_group(:static_file) do |g|
g.read_ahead = 1
g.sleep_delay = 5
g.workers = 2
g.workers = 4
g.queues = ["static_file"]
end