basic e621
This commit is contained in:
@@ -1,4 +1,6 @@
|
||||
class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
||||
discard_on ActiveJob::DeserializationError
|
||||
|
||||
DATE_HELPER = Class.new.extend(ActionView::Helpers::DateHelper)
|
||||
|
||||
def self.build_http_client
|
||||
|
||||
11
app/models/domain/e621/post.rb
Normal file
11
app/models/domain/e621/post.rb
Normal file
@@ -0,0 +1,11 @@
|
||||
class Domain::E621::Post < ReduxApplicationRecord
|
||||
self.table_name = "domain_e621_posts"
|
||||
has_lite_trail(schema_version: 1)
|
||||
|
||||
has_many :taggings,
|
||||
class_name: "Domain::E621::Tagging"
|
||||
|
||||
has_many :tags,
|
||||
class_name: "Domain::E621::Tag",
|
||||
through: :taggings
|
||||
end
|
||||
3
app/models/domain/e621/tag.rb
Normal file
3
app/models/domain/e621/tag.rb
Normal file
@@ -0,0 +1,3 @@
|
||||
class Domain::E621::Tag < ReduxApplicationRecord
|
||||
self.table_name = "domain_e621_tags"
|
||||
end
|
||||
19
app/models/domain/e621/tagging.rb
Normal file
19
app/models/domain/e621/tagging.rb
Normal file
@@ -0,0 +1,19 @@
|
||||
class Domain::E621::Tagging < ReduxApplicationRecord
|
||||
self.table_name = "domain_e621_taggings"
|
||||
belongs_to :post,
|
||||
class_name: "Domain::E621::Post"
|
||||
belongs_to :tag,
|
||||
class_name: "Domain::E621::Tag"
|
||||
|
||||
enum type: [
|
||||
:type_general,
|
||||
:type_artist,
|
||||
:type_copyright,
|
||||
:type_character,
|
||||
:type_species,
|
||||
:type_invalid,
|
||||
:type_meta,
|
||||
:type_lore,
|
||||
]
|
||||
validates_inclusion_of(:type, in: self.types.keys)
|
||||
end
|
||||
129
app/models/legacy/e621/post.rb
Normal file
129
app/models/legacy/e621/post.rb
Normal file
@@ -0,0 +1,129 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_posts
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# e621_id :integer not null
|
||||
# md5 :string not null
|
||||
# sources :string
|
||||
# file_url :string not null
|
||||
# file_ext :string not null
|
||||
# description :string
|
||||
# rating :integer
|
||||
# width :integer
|
||||
# height :integer not null
|
||||
# tags_string :string not null
|
||||
# status :integer
|
||||
# score :integer
|
||||
# removed :boolean
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# artists :string
|
||||
# e621_count :integer
|
||||
# author :string
|
||||
# e621_status :string
|
||||
# blob_entry_id :integer
|
||||
# imgsearch_entry_id :integer
|
||||
#
|
||||
|
||||
class Legacy::E621::Post < LegacyApplicationRecord
|
||||
self.table_name = "e621_posts"
|
||||
|
||||
validates_presence_of :e621_id, :md5, :author, :file_url, :file_ext, :rating, :tags_string, :status, :score
|
||||
validates_uniqueness_of :md5, :e621_id
|
||||
|
||||
serialize :sources, Array
|
||||
serialize :artists, Array
|
||||
|
||||
belongs_to :blob_entry,
|
||||
class_name: "Legacy::BlobEntry"
|
||||
|
||||
# just inserted into db: :not_processed
|
||||
# we've checked for the existance of its file on the
|
||||
# disk and it isn't there: :should_download
|
||||
# we've made an attempt to download its file: :processed
|
||||
enum status: %i[not_processed should_download processed processed_404 processed_err]
|
||||
validates_inclusion_of :status,
|
||||
in: statuses.keys
|
||||
|
||||
has_many :taggings, class_name: "Legacy::E621::Tagging"
|
||||
has_many :tags, through: :taggings
|
||||
|
||||
enum rating: %i[s q e]
|
||||
validates_inclusion_of :rating,
|
||||
in: ratings.keys
|
||||
|
||||
def file_relative_path
|
||||
base = File.basename(file_url)
|
||||
"#{base[0]}/#{base[1]}/#{base}"
|
||||
end
|
||||
|
||||
before_validation do
|
||||
self.file_ext ||= File.extname(file_path)[1..-1]
|
||||
end
|
||||
|
||||
before_destroy do
|
||||
blob_entry.dec_refcount
|
||||
end
|
||||
|
||||
def file_path
|
||||
File.join SConfig.e621_static_dir, file_relative_path
|
||||
end
|
||||
|
||||
def resized_file_path(style)
|
||||
raise("no md5") unless md5
|
||||
|
||||
hashed_path = BlobEntry.file_path_at_depth(
|
||||
sha256: md5,
|
||||
depth: 4, stride: 2, hash_length: 32,
|
||||
)
|
||||
File.join SConfig.e621_data_dir, "resized", style.to_s, (hashed_path + "." + file_ext)
|
||||
end
|
||||
|
||||
FASource = Struct.new(:type, :id, :url)
|
||||
|
||||
def fa_sources
|
||||
self.sources.flatten.map do |source|
|
||||
if matches = /furaffinity.net\/view\/(\d+)/.match(source)
|
||||
fa_id = matches[1]
|
||||
FASource.new(:post, fa_id.to_i, source)
|
||||
elsif matches = /furaffinity.net\/(gallery|user)\/([^\/]+)/.match(source)
|
||||
url_name = FA::User.name_to_url_name(matches[2])
|
||||
FASource.new(:user, url_name, source)
|
||||
else
|
||||
nil
|
||||
end
|
||||
end.reject(&:nil?)
|
||||
end
|
||||
|
||||
def update_taggings(assume_total_overwrite: false)
|
||||
tags_string_split = tags_string.split(/\s+/).map(&:strip).reject(&:blank?)
|
||||
tags_arr = Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
|
||||
|
||||
missing = Set.new(tags_string_split) - Set.new(tags_arr.map(&:value))
|
||||
|
||||
missing.each do |missing_val|
|
||||
tags_arr << Legacy::E621::Tag.find_or_create_by(value: missing_val)
|
||||
end
|
||||
|
||||
# SConfig.logger.info "had to create tags: #{missing.to_a.join(", ")}" if missing.any?
|
||||
if assume_total_overwrite
|
||||
self.tags = tags_arr
|
||||
else
|
||||
should_be = Set.new(tags_arr)
|
||||
but_is = Set.new(tags)
|
||||
|
||||
removed = but_is - should_be
|
||||
added = should_be - but_is
|
||||
|
||||
tags.delete(removed.to_a)
|
||||
tags << added.to_a
|
||||
end
|
||||
|
||||
if Set.new(tags.map(&:value)) != Set.new(tags_string_split)
|
||||
puts "tagging mismatch on #{id} (#{e621_id})"
|
||||
end
|
||||
end
|
||||
end
|
||||
20
app/models/legacy/e621/tag.rb
Normal file
20
app/models/legacy/e621/tag.rb
Normal file
@@ -0,0 +1,20 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_tags
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# value :string not null
|
||||
# e621_id :integer
|
||||
# type :integer
|
||||
# e621_count :integer
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
|
||||
class Legacy::E621::Tag < LegacyApplicationRecord
|
||||
self.table_name = "e621_tags"
|
||||
self.inheritance_column = nil
|
||||
validates_presence_of :value
|
||||
end
|
||||
20
app/models/legacy/e621/tagging.rb
Normal file
20
app/models/legacy/e621/tagging.rb
Normal file
@@ -0,0 +1,20 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_taggings
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# tag_id :integer
|
||||
# post_id :integer
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
|
||||
class Legacy::E621::Tagging < LegacyApplicationRecord
|
||||
belongs_to :post, class_name: "Legacy::E621::Post"
|
||||
belongs_to :tag, class_name: "Legacy::E621::Tag"
|
||||
|
||||
validates_presence_of :post, :tag
|
||||
validates_uniqueness_of :tag_id, scope: :post_id
|
||||
end
|
||||
@@ -59,7 +59,7 @@
|
||||
<tr>
|
||||
<td class="leftb">
|
||||
<%= HexUtil.humansize(hle.response.size) %> <br>
|
||||
(<%= (hle.response.bytes_stored.to_f / hle.response.size).round(2) %>)
|
||||
(<%= (hle.response.bytes_stored.to_f / hle.response.size).round(2) %>, <%= hle.performed_by %>)
|
||||
</td>
|
||||
<td class="time leftb">
|
||||
<%= time_ago_in_words(hle.created_at) %>
|
||||
|
||||
@@ -24,6 +24,9 @@
|
||||
table td:last-child {
|
||||
border-right: none;
|
||||
}
|
||||
table td:first-child {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
iframe {
|
||||
width: 100%;
|
||||
@@ -61,6 +64,10 @@
|
||||
<td>response time</td>
|
||||
<td><%= rtms == -1 ? "(not recorded)" : "#{rtms}ms" %></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>performed by</td>
|
||||
<td><%= @log_entry.performed_by %></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>content type</td>
|
||||
<td><%= @log_entry.content_type %></td>
|
||||
|
||||
@@ -24,7 +24,7 @@ module ReduxScraper
|
||||
# config.eager_load_paths << Rails.root.join("extras")
|
||||
config.x.cookies.fa = ReduxScraper::Application.config_for("cookies/fa")
|
||||
config.x.proxies = {
|
||||
"proxy-1" => "http://proxy-1.local:8999",
|
||||
"proxy-1" => "http://proxy-1.local:9292",
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
53
db/redux_migrate/20230226223215_create_domain_e621_posts.rb
Normal file
53
db/redux_migrate/20230226223215_create_domain_e621_posts.rb
Normal file
@@ -0,0 +1,53 @@
|
||||
class CreateDomainE621Posts < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_e621_posts do |t|
|
||||
t.integer :e621_id, null: false
|
||||
t.integer :state, null: false
|
||||
t.jsonb :state_detail
|
||||
|
||||
t.string :file_url_str
|
||||
t.string :description
|
||||
t.integer :rating
|
||||
t.integer :score
|
||||
t.integer :up_score
|
||||
t.integer :down_score
|
||||
t.integer :status
|
||||
t.integer :favorites
|
||||
|
||||
t.integer :file_width
|
||||
t.integer :file_height
|
||||
|
||||
t.jsonb :sources_array
|
||||
t.jsonb :tags_array
|
||||
t.references :file
|
||||
t.references :parent_e621
|
||||
|
||||
t.timestamps
|
||||
|
||||
t.index :e621_id, unique: :true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# id :integer not null, primary key
|
||||
# e621_id :integer not null
|
||||
# md5 :string not null
|
||||
# sources :string
|
||||
# file_url :string not null
|
||||
# file_ext :string not null
|
||||
# description :string
|
||||
# rating :integer
|
||||
# width :integer
|
||||
# height :integer not null
|
||||
# tags_string :string not null
|
||||
# status :integer
|
||||
# score :integer
|
||||
# removed :boolean
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# artists :string
|
||||
# e621_count :integer
|
||||
# author :string
|
||||
# e621_status :string
|
||||
# blob_entry_id :integer
|
||||
# imgsearch_entry_id :integer
|
||||
14
db/redux_migrate/20230226224328_create_domain_e621_tags.rb
Normal file
14
db/redux_migrate/20230226224328_create_domain_e621_tags.rb
Normal file
@@ -0,0 +1,14 @@
|
||||
class CreateDomainE621Tags < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_e621_tags do |t|
|
||||
t.string :name, null: false
|
||||
t.integer :e621_id
|
||||
|
||||
t.integer :type
|
||||
t.string :description
|
||||
t.timestamps
|
||||
t.index :name, unique: true
|
||||
t.index :e621_id, unique: true
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,9 @@
|
||||
class CreateDomainE621Taggings < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
create_table :domain_e621_taggings do |t|
|
||||
t.integer :type
|
||||
t.references :post
|
||||
t.references :tag
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,11 @@
|
||||
class AddPerformedByToHttpLogEntries < ActiveRecord::Migration[7.0]
|
||||
def up
|
||||
add_column :http_log_entries, :performed_by, :integer
|
||||
HttpLogEntry.update_all(performed_by: 0) # direct
|
||||
change_column :http_log_entries, :performed_by, null: false
|
||||
end
|
||||
|
||||
def down
|
||||
remove_column :http_log_entries, :performed_by
|
||||
end
|
||||
end
|
||||
44
db/schema.rb
generated
44
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_03_01_013456) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "plpgsql"
|
||||
@@ -42,6 +42,47 @@ ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
|
||||
t.jsonb "args"
|
||||
t.index ["priority", "run_at"], name: "delayed_jobs_priority_run_at_idx"
|
||||
t.index ["queue"], name: "delayed_jobs_queue_idx"
|
||||
t.index ["signature"], name: "delayed_jobs_signature_idx", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_e621_posts", force: :cascade do |t|
|
||||
t.integer "e621_id", null: false
|
||||
t.integer "state", null: false
|
||||
t.jsonb "state_detail"
|
||||
t.string "file_url_str"
|
||||
t.string "description"
|
||||
t.integer "rating"
|
||||
t.integer "score"
|
||||
t.integer "status"
|
||||
t.integer "favorites"
|
||||
t.integer "file_width"
|
||||
t.integer "file_height"
|
||||
t.jsonb "sources_array"
|
||||
t.jsonb "tags_array"
|
||||
t.bigint "file_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["e621_id"], name: "index_domain_e621_posts_on_e621_id", unique: true
|
||||
t.index ["file_id"], name: "index_domain_e621_posts_on_file_id"
|
||||
end
|
||||
|
||||
create_table "domain_e621_taggings", force: :cascade do |t|
|
||||
t.integer "type"
|
||||
t.bigint "post_id"
|
||||
t.bigint "tag_id"
|
||||
t.index ["post_id"], name: "index_domain_e621_taggings_on_post_id"
|
||||
t.index ["tag_id"], name: "index_domain_e621_taggings_on_tag_id"
|
||||
end
|
||||
|
||||
create_table "domain_e621_tags", force: :cascade do |t|
|
||||
t.string "name", null: false
|
||||
t.integer "e621_id"
|
||||
t.integer "type"
|
||||
t.string "description"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["e621_id"], name: "index_domain_e621_tags_on_e621_id", unique: true
|
||||
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
|
||||
end
|
||||
|
||||
create_table "domain_fa_posts", force: :cascade do |t|
|
||||
@@ -118,6 +159,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_02_26_062154) do
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.bigint "caused_by_id"
|
||||
t.integer "performed_by", null: false
|
||||
t.index ["caused_by_id"], name: "index_http_log_entries_on_caused_by_id"
|
||||
t.index ["request_headers_id"], name: "index_http_log_entries_on_request_headers_id"
|
||||
t.index ["response_headers_id"], name: "index_http_log_entries_on_response_headers_id"
|
||||
|
||||
@@ -37,7 +37,7 @@ end
|
||||
[:default, 1],
|
||||
[:fa_user_page, 2],
|
||||
[:fa_user_gallery, 1],
|
||||
[:fa_post, 4],
|
||||
[:fa_post, 3],
|
||||
].each do |queue, workers|
|
||||
worker_group(queue) do |g|
|
||||
g.read_ahead = 8
|
||||
|
||||
@@ -34,7 +34,7 @@ end
|
||||
worker_group(:static_file) do |g|
|
||||
g.read_ahead = 1
|
||||
g.sleep_delay = 5
|
||||
g.workers = 2
|
||||
g.workers = 4
|
||||
g.queues = ["static_file"]
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user