Remove legacy related code

This commit is contained in:
Dylan Knutson
2024-12-25 23:53:29 +00:00
parent 3c41cd5b7d
commit 6d5f494c64
92 changed files with 35 additions and 3421 deletions

View File

@@ -1,73 +0,0 @@
class Domain::Fa::UserAvatarFixer < LegacyImport::BulkImportJob
def initialize(start_at:, limit: nil, url_name: nil)
@start_at = start_at
@limit = limit
@url_name = url_name
end
def name
"user_avatar_fixer"
end
def run_impl
@processed = 0
if @url_name
user =
Domain::Fa::User.find_by(url_name: @url_name) || raise("user not found")
process_avatar(user.avatar)
else
Domain::Fa::UserAvatar
.where(state: "no_file_on_guessed_user_page_error")
.find_each(start: @start_at, batch_size: 5) do |avatar|
@processed += 1
break if @limit && @processed > @limit
process_avatar(avatar)
end
end
@processed
end
private
def process_avatar(avatar)
user = avatar.user
logger.prefix =
proc do
"[avatar #{avatar.id.to_s.bold}, user #{user.url_name.to_s.bold}]"
end
logger.info("guessing...")
if avatar.file
logger.warn("have file, skipping")
return
end
if !avatar.file_url_str.blank?
if avatar.file
logger.warn("have file_url_str, skipping")
else
avatar.state = :ok
avatar.save!
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
logger.info("existing file_url_str: #{avatar.file_url_str}")
logger.warn("have url, but no file, enqueue job")
end
return
end
guessed_avatar_uri = avatar.guess_file_uri_from_hles_with_info
if guessed_avatar_uri.first == :not_found
logger.error("did not find avatar url: #{guessed_avatar_uri.to_s.bold}")
else
logger.info("found uri, enqueue job: #{guessed_avatar_uri.to_s.bold}")
avatar.state = :ok
avatar.file_uri = guessed_avatar_uri[1]
avatar.state_detail["user_avatar_fixer_job"] = guessed_avatar_uri
avatar.save!
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
end
end
end

View File

@@ -1,64 +0,0 @@
class LegacyImport::AdaptiveCache
Entry = Struct.new(:score, :id, :obj, :extra)
def initialize(max_size = 32, reward = 1.0, punish = 0.1)
@max_size = max_size
@candidates = []
@reward = reward
@punish = punish
end
def at_capacity?
@candidates.count == @max_size
end
def candidates
@candidates.map { |c| c.obj }
end
def scores
@candidates.map { |c| c.score }
end
def reward(candidate_id)
@candidates.each do |entry|
if entry.id == candidate_id
entry.score += @reward
else
entry.score -= @punish
end
end
sort!
end
def contains?(candidate_id)
!!@candidates.find { |entry| entry.id == candidate_id }
end
def insert(id, candidate, extra = nil)
new_entry = Entry.new(0.0, id, candidate, extra)
idx = @candidates.bsearch_index { |entry| entry.score <= 0 }
if idx == nil
@candidates.push(new_entry)
else
@candidates.insert(idx, new_entry)
end
@candidates.pop while @candidates.size > @max_size
end
def to_s
@candidates
.map do |entry|
" - #{entry.score.round(1)} score, id #{entry.id} - #{entry.extra}"
end
.join("\n")
end
private
def sort!
@candidates.sort_by! { |entry| -entry.score }
end
end

View File

@@ -1,51 +0,0 @@
class LegacyImport::BulkImportJob
attr_reader :logger_prefix
include HasColorLogger
def name
raise NotImplementedError.new("implement #name")
end
def run_impl
raise NotImplementedError.new("implement #run_impl")
end
def profile?
false
end
def run
start_profiling!
start_at = Time.now
total_work = run_impl
duration = (Time.now - start_at)
logger.info "finish, total #{duration.round(1)}s, #{total_work} items, #{(total_work / duration).round(1)} items/s"
end_profiling!
end
def write_last_id(last_id)
logger.prefix = proc { "[last_id #{last_id.to_s.bold}]" }
File.write("tmp/#{name}_progress", last_id.to_s)
end
def start_profiling!
RubyProf.start if profile?
end
def end_profiling!
if profile?
base = "profiler/#{name}"
Dir.mkdir_p(base) unless File.exist?(base)
result = RubyProf.stop
File.open("#{base}/profile.txt", "w") do |f|
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
end
File.open("#{base}/profile.html", "w") do |f|
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
end
File.open("#{base}/profile.rubyprof", "w") do |f|
RubyProf::SpeedscopePrinter.new(result).print(f, { min_percent: 1 })
end
end
end
end

View File

@@ -1,90 +0,0 @@
class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
def initialize(batch_size:, forks:, start_at:)
@forks = forks || 16
@batch_size = batch_size || @forks * 32
@start_at = start_at || 0
@start_time = Time.now
logger.info "forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
end
def name
"e621_legacy_post_importer"
end
def profile?
false
end
def run_impl
progress = 0
query = ::Legacy::E621::Post.includes(:blob_entry, { taggings: :tag })
# finish = @start_at + (2 * 32 * 32)
finish = nil
query.find_in_batches(
start: @start_at,
finish: finish,
batch_size: @batch_size * @forks
) do |batch|
last_id = batch.last&.id
if @forks <= 1
progress += import_e621_posts(batch)
else
progress +=
ForkFuture
.parallel_map_slice(@forks, batch) do |fork_batch|
import_e621_posts(fork_batch)
end
.sum
end
rate = progress.to_f / (Time.now - @start_time)
logger.info "finish batch, last id #{last_id} - #{progress} - #{rate.round(1)} / second"
write_last_id last_id
end
progress
end
private
def import_e621_posts(legacy_posts)
progress = 0
skip_posts_ids =
Set.new(
::Domain::E621::Post
.select(:e621_id)
.where(e621_id: legacy_posts.map(&:e621_id))
.pluck(:e621_id)
)
legacy_posts.reject! do |legacy_post|
skip_posts_ids.include?(legacy_post.e621_id)
end
legacy_posts.each do |legacy_post|
retries = 0
begin
ReduxApplicationRecord.transaction do
post = ::Domain::E621::Post.find_or_build_from_legacy(legacy_post)
unless post.valid?
raise(
"error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}"
)
end
post.save!
progress += 1
end
rescue StandardError
retries += 1
sleep 0.1 and retry if retries < 3
raise
end
end
ReduxApplicationRecord.clear_active_connections!
LegacyApplicationRecord.clear_active_connections!
progress
end
end

View File

@@ -1,85 +0,0 @@
class LegacyImport::FaPostImporter < LegacyImport::BulkImportJob
def initialize(batch_size:, forks:, start_at:)
@forks = forks || 16
@batch_size = batch_size || @forks * 32
@start_at = start_at || 0
@start_time = Time.now
puts "FaPostImporter forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
end
def name
"fa_post_importer"
end
def profile?
false
end
def run
start_profiling!
progress = 0
query = ::Legacy::Fa::Post.includes(:blob_entry, :description_ref)
query.find_in_batches(start: @start_at, batch_size: @batch_size) do |batch|
last_id = batch.last&.id
if @forks <= 1
progress += import_fa_posts(batch)
else
progress +=
ForkFuture
.parallel_map_slice(@forks, batch) do |fork_batch|
import_fa_posts(fork_batch)
end
.sum
end
rate = progress.to_f / (Time.now - @start_time)
puts "finish batch, last id #{last_id} - #{progress} - #{rate.round(1)} / second"
write_progress last_id
end
end_profiling!
end
def import_fa_posts(legacy_posts)
progress = 0
skip_posts_ids =
Set.new(
::Domain::Fa::Post
.select(:fa_id, :creator_id)
.where(fa_id: legacy_posts.map(&:fa_id))
.where("creator_id is not null")
.pluck(:fa_id)
)
legacy_posts.reject! do |legacy_post|
skip_posts_ids.include?(legacy_post.fa_id)
end
legacy_posts.each do |legacy_post|
retries = 0
begin
ReduxApplicationRecord.transaction do
post = ::Domain::Fa::Post.find_or_build_from_legacy(legacy_post)
unless post.valid?
raise(
" !! error building post #{post.id} / #{post.fa_id}: #{post.errors.full_messages}"
)
end
post.save!
progress += 1
end
rescue StandardError
retries += 1
sleep 0.1 and retry if retries < 3
raise
end
end
ReduxApplicationRecord.clear_active_connections!
LegacyApplicationRecord.clear_active_connections!
progress
end
end

View File

@@ -1,508 +0,0 @@
require "set"
class LegacyImport::HttpLogEntryBulkImporter
PROFILE = false
def initialize(batch_size, cache_size, start_id, end_id)
@batch_size = batch_size
@cache_size = cache_size
@start_id = start_id
@end_id = end_id
@fork_amount = 10
@insert_stats = InsertStats.new
@timings = Timings.new
# key is content_type|domain
# value is the adaptive cache
@blob_entry_cache =
Hash.new do |hash, key|
hash[key] = LegacyImport::AdaptiveCache.new(cache_size, 1.0, 0.1)
end
end
def run
RubyProf.start if PROFILE
puts "HttpLogEntryBulkImporter: " +
"#{@start_id || "(nil)"} -> #{@end_id || "(nil)"}, batch size #{@batch_size}, cache size #{@cache_size}, forking #{@fork_amount}"
start_at = Time.now
last_model_id = nil
stats_printer =
Thread.new do
Thread.current.name = "stats-printer"
i = 0
loop do
sleep 3
duration = Time.now - start_at
rate = @insert_stats.http_entries_inserted / duration
hr
puts "insert stats: #{@insert_stats} - " +
"#{rate.round(2)}/sec (last id: #{last_model_id})"
i += 1
if i % 5 == 0
hr
dump_timings
end
end
end
query = Legacy::HttpLogEntry.order(id: :asc)
@timings.start :bulk_load
query.find_in_batches(
batch_size: @batch_size,
start: @start_id,
finish: @end_id
) do |legacy_models|
@timings.finish :bulk_load
import_legacy_models(legacy_models)
last_model_id = legacy_models.last&.id
@timings.start :bulk_load
end
@timings.finish :bulk_load
stats_printer.kill if stats_printer
duration = Time.now - start_at
bytes_stored = @insert_stats.bytes_stored
bytes_length = @insert_stats.bytes_length
ratio = bytes_stored.to_f / bytes_length
rate = @insert_stats.http_entries_inserted / duration
hr
dump_timings
hr
puts "Last id: #{last_model_id}"
puts "Cache size: #{@cache_size}"
puts "Batch size: #{@batch_size}"
puts "Total content stored: #{InsertStats.humansize(bytes_stored)}"
puts "Total content length: #{InsertStats.humansize(bytes_length)}"
puts "Size ratio: #{ratio.round(2)}"
puts "Total http inserted: #{@insert_stats.http_entries_inserted}"
puts "Total blobs inserted: #{@insert_stats.blob_entries_inserted}"
puts "Total duration: #{duration.round(0)} seconds (#{rate.round(2)}/second)"
hr
if PROFILE
Dir.mkdir("profiler") unless File.exist?("profiler")
result = RubyProf.stop
File.open("profiler/migrate_legacy_http_entries.txt", "w") do |f|
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
end
File.open("profiler/migrate_legacy_http_entries.html", "w") do |f|
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
end
end
end
private
def dump_timings
tp(
@timings.entries.map do |entry|
{
key: entry[:key],
duration: "#{entry[:key_secs].round(1)} sec",
percent: "#{(100 * entry[:proportion]).round(1)}%".rjust(5)
}
end
)
end
def import_legacy_models(legacy_models)
@timings.start :lookup_existing_http
already_exist_ids =
::HttpLogEntry.where(id: legacy_models.map(&:id)).pluck(:id).to_set
@timings.finish :lookup_existing_http
# ignore the models which have no stored content (for now)
@timings.start :reject_empty_legacy
legacy_models =
ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
next nil if already_exist_ids.include?(legacy_model.id)
begin
next nil if legacy_model.response_body.nil?
rescue StandardError
puts "legacy model #{legacy_model.id} (#{legacy_model.full_path}): error reading response body"
next nil
end
# legacy model now has response body loaded
legacy_model
end
legacy_models.reject!(&:nil?)
@timings.finish :reject_empty_legacy
blob_creation_future =
ForkFuture.new { bulk_import_blob_entries(legacy_models) }
header_creation_future =
ForkFuture.new { bulk_import_headers(legacy_models) }
insert_stats, timings, cache_ops, legacy_model_id_to_response_sha256 =
blob_creation_future.join
@insert_stats.merge!(insert_stats)
@timings.merge!(timings)
cache_ops.each { |op| @blob_entry_cache[op[0]].send(op[1], *op[2..]) }
insert_stats,
timings,
legacy_model_id_to_header_sha256s,
header_sha256_to_header_id =
header_creation_future.join
@insert_stats.merge!(insert_stats)
@timings.merge!(timings)
@timings.start :build_new_https
http_models =
legacy_models.map do |legacy_model|
request_headers_id =
header_sha256_to_header_id[
legacy_model_id_to_header_sha256s[legacy_model.id][:req_sha256]
]
response_headers_id =
header_sha256_to_header_id[
legacy_model_id_to_header_sha256s[legacy_model.id][:res_sha256]
]
response_sha256 = legacy_model_id_to_response_sha256[legacy_model.id]
request_headers_id || raise("no request header id")
response_headers_id || raise("no response header id")
response_sha256 || raise("no response sha256")
build_http_log_entry(
legacy_model,
request_headers_id,
response_headers_id,
response_sha256
)
end
@timings.finish :build_new_https
@timings.start :insert_new_https
if http_models.any?
::HttpLogEntry.insert_all!(http_models.map(&:to_bulk_insert_hash))
end
@insert_stats.http_entries_inserted += http_models.size
@timings.finish :insert_new_https
end
def bulk_import_blob_entries(legacy_models)
insert_stats = InsertStats.new
timings = Timings.new
merged_cache_ops = []
# compute all blob entries for the legacy models, removing duplicates
timings.start :lookup_existing_bes
legacy_model_id_to_response_sha256 =
legacy_models
.map { |m| [m.id, Digest::SHA256.digest(m.response_body)] }
.to_h
sha256_to_existing_blob_entry =
::BlobEntryP
.where(sha256: legacy_model_id_to_response_sha256.values)
.map { |be| [be.sha256, be] }
.to_h
timings.finish :lookup_existing_bes
timings.start :build_new_bes
blob_entries_to_insert =
ForkFuture
.parallel_map(@fork_amount, legacy_models) do |legacy_model|
sha256 = legacy_model_id_to_response_sha256[legacy_model.id] || raise
next nil if sha256_to_existing_blob_entry[sha256]
content_type = legacy_model.content_type
cache_key = "#{legacy_model.host}|#{content_type}"
cache = @blob_entry_cache[cache_key]
# N% chance (if we're not at cache capacity) to not supply any candidates,
# to give new entries in the cache a chance to replace poor performing ones
candidates =
if cache.at_capacity? # && rand(0..100) >= 5
cache.candidates
else
[]
end
blob_entry =
::BlobEntryP.build_record(
content_type: content_type,
sha256: sha256,
contents: legacy_model.response_body,
candidates: candidates
)
# reward the base if it was used, if not, insert this blob into the
# cache so it'll be a future candidate (unless it's not a new model)
# cache keys are hex encoded for easier viewing / debugging
cache_op = nil
if !blob_entry.persisted? && @cache_size > 0
if blob_entry.base_sha256
cache_op = [
cache_key,
:reward,
HexUtil.bin2hex(blob_entry.base_sha256)[0..8]
]
else
cache_op = [
cache_key,
:insert,
HexUtil.bin2hex(blob_entry.sha256)[0..8],
blob_entry,
legacy_model.full_path
]
end
end
blob_entry.valid? ||
raise(
"invalid blob entry (legacy model id #{legacy_model.id}): #{blob_entry.errors.full_messages}"
)
cache.send(cache_op[1], *cache_op[2..]) if cache_op
[blob_entry, cache_op]
end
.reject(&:nil?)
.map do |pair|
blob_entry = pair[0]
cache_op = pair[1]
merged_cache_ops << cache_op if cache_op
blob_entry
end
.uniq { |blob_entry| blob_entry.sha256 }
timings.finish :build_new_bes
# bulk-insert all the new blob entries
timings.start :insert_new_bes
slice_size = [(blob_entries_to_insert.size.to_f / @fork_amount).ceil, 1].max
if blob_entries_to_insert.any?
blob_entries_to_insert
.each_slice(slice_size)
.map do |slice|
ForkFuture.new do
if slice.any?
BlobEntryP.insert_all!(slice.map(&:to_bulk_insert_hash))
end
end
end
.to_a
.map(&:join)
end
insert_stats.blob_entries_inserted += blob_entries_to_insert.size
insert_stats.bytes_length +=
blob_entries_to_insert.map(&:contents).map(&:size).sum
insert_stats.bytes_stored += blob_entries_to_insert.map(&:bytes_stored).sum
timings.finish :insert_new_bes
[
insert_stats,
timings,
merged_cache_ops,
legacy_model_id_to_response_sha256
]
end
def bulk_import_headers(legacy_models)
insert_stats = InsertStats.new
timings = Timings.new
timings.start :build_new_headers
header_sha256_to_header_model = {}
legacy_model_id_to_header_sha256s =
ForkFuture
.parallel_map(@fork_amount / 2, legacy_models) do |legacy_model|
req_headers =
::HttpLogEntryHeader.build_record(headers: legacy_model.req_headers)
res_headers =
::HttpLogEntryHeader.build_record(headers: legacy_model.res_headers)
[legacy_model.id, { req: req_headers, res: res_headers }]
end
.map do |pair|
legacy_model_id = pair[0]
req_headers = pair[1][:req]
res_headers = pair[1][:res]
header_sha256_to_header_model[req_headers.sha256] = req_headers
header_sha256_to_header_model[res_headers.sha256] = res_headers
[
legacy_model_id,
{ req_sha256: req_headers.sha256, res_sha256: res_headers.sha256 }
]
end
.to_h
timings.finish :build_new_headers
# excluding existing headers, and bulk-insert the new headers
timings.start :insert_new_headers
header_sha256_to_header_id =
::HttpLogEntryHeader
.where(sha256: header_sha256_to_header_model.keys)
.map { |model| [model.sha256, model.id] }
.to_h
headers_to_insert =
header_sha256_to_header_model
.map do |sha256, header_model|
next nil if header_sha256_to_header_id[sha256]
header_model.valid? || raise("invalid header models")
header_model
end
.reject(&:nil?)
.uniq { |header_model| header_model.sha256 }
::HttpLogEntryHeader
.insert_all!(
headers_to_insert.map(&:to_bulk_insert_hash),
returning: %i[id sha256]
)
.rows
.each do |row|
id, sha256 = row
# rails does not deserialize the returned sha256 - we have to do that ourselves
# postgres prefixes hex-encoded binaries with "\x", must strip that first
raise("invariant") unless sha256[0..1] == "\\x"
sha256 = ::HexUtil.hex2bin(sha256[2..])
header_sha256_to_header_id[sha256] = id
end if headers_to_insert.any?
insert_stats.header_entries_inserted += headers_to_insert.size
timings.finish :insert_new_headers
[
insert_stats,
timings,
legacy_model_id_to_header_sha256s,
header_sha256_to_header_id
]
end
def build_http_log_entry(
legacy_model,
request_headers_id,
response_headers_id,
response_sha256
)
model =
::HttpLogEntry.new(
id: legacy_model.id,
uri_scheme: legacy_model.scheme,
uri_host: legacy_model.host,
uri_path: legacy_model.path,
uri_query: legacy_model.query,
verb: legacy_model.verb,
content_type: legacy_model.content_type,
status_code: legacy_model.status,
response_time_ms: legacy_model.response_time,
request_headers_id: request_headers_id,
response_headers_id: response_headers_id,
response_sha256: response_sha256,
requested_at: legacy_model.requested_at,
created_at: legacy_model.created_at,
updated_at: legacy_model.updated_at
)
model
end
def hr
puts "-" * 40
end
Timings =
Struct.new(:keys, :totals) do
def initialize
@start_at = Time.now
self.keys = []
self.totals = {}
end
def merge!(other)
raise if other.nil?
other.keys.each do |key|
self.keys << key unless self.keys.include?(key)
self.entry_for(key)[:secs] += other.totals[key][:secs]
end
end
def start(key)
self.keys << key unless self.keys.include?(key)
entry = self.entry_for(key)
raise("#{key} already started") if entry[:started]
entry[:started] = Time.now
end
def finish(key)
entry = self.totals[key]
raise("#{key} does not exist") unless entry
started = entry[:started]
entry[:started] = nil
raise("#{key} not started") unless started
entry[:secs] += Time.now - started
end
def entries
total_secs = Time.now - @start_at
total_measured_secs = self.totals.values.map { |e| e[:secs] }.sum
self
.keys
.map do |key|
key_secs = self.totals[key][:secs]
{
key: key,
key_secs: key_secs,
proportion: key_secs / total_measured_secs
}
end
.chain(
[
{
key: :measured_total,
key_secs: total_measured_secs,
proportion: total_measured_secs / total_secs
},
{ key: :actual_total, key_secs: total_secs, proportion: 1.0 }
]
)
end
def entry_for(key)
self.totals[key] ||= { started: nil, secs: 0.0 }
end
end
InsertStats =
Struct.new(
:http_entries_inserted,
:blob_entries_inserted,
:header_entries_inserted,
:bytes_stored,
:bytes_length
) do
def initialize
self.http_entries_inserted = 0
self.blob_entries_inserted = 0
self.header_entries_inserted = 0
self.bytes_stored = 0
self.bytes_length = 0
end
def merge!(other)
self.http_entries_inserted += other.http_entries_inserted
self.blob_entries_inserted += other.blob_entries_inserted
self.header_entries_inserted += other.header_entries_inserted
self.bytes_stored += other.bytes_stored
self.bytes_length += other.bytes_length
end
def to_s
ratio = self.bytes_stored.to_f / self.bytes_length
[
"+#{self.http_entries_inserted} requests, +#{self.blob_entries_inserted} blobs, +#{self.header_entries_inserted} headers",
"size ratio: #{ratio.round(2)} - #{self.class.humansize(self.bytes_stored)}/#{self.class.humansize(self.bytes_length)}"
].join("\n")
end
def self.humansize(size)
HexUtil.humansize(size)
end
end
end

View File

@@ -15,7 +15,7 @@ class Scraper::GalleryDlClient
:response_time_ms,
:body,
:log_entry,
keyword_init: true
keyword_init: true,
)
TweetEvent = Struct.new(:tweet, :author)
TweetMediaEvent =
@@ -26,7 +26,7 @@ class Scraper::GalleryDlClient
:media_num,
:extension,
:height,
:width
:width,
)
def initialize(name, host)
@@ -35,11 +35,6 @@ class Scraper::GalleryDlClient
logger.info("build #{name.to_s.green.bold} - #{host.green}")
@performed_by = name
@client = Ripcord::Client.new(host)
@max_cache_size = 8
@blob_entry_cache =
Hash.new do |hash, key|
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
end
end
def start_twitter_user(username, caused_by_entry: nil)
@@ -47,7 +42,7 @@ class Scraper::GalleryDlClient
rpc =
@client.call(
"start_user",
[@token, "https://twitter.com/#{username}/tweets"]
[@token, "https://twitter.com/#{username}/tweets"],
)
raise rpc_error_str(rpc) unless rpc.successful?
decode_message(rpc.result, caused_by_entry)
@@ -87,7 +82,7 @@ class Scraper::GalleryDlClient
response_code: http_request[:status_code],
response_time_ms: (http_request[:duration] * 1000).to_i,
body: Base64.decode64(http_request[:content_base64]),
log_entry: nil
log_entry: nil,
)
log_and_set_http_request_event(event, caused_by_entry)
event
@@ -102,7 +97,7 @@ class Scraper::GalleryDlClient
media[:media_num],
media[:extension],
media[:height],
media[:width]
media[:width],
)
end
end
@@ -124,26 +119,12 @@ class Scraper::GalleryDlClient
url = uri.to_s
cache_key = "#{uri.host}|#{content_type}"
blob_entry_cache = @blob_entry_cache[cache_key]
candidates =
if blob_entry_cache.at_capacity? && rand(0..100) >= 5
blob_entry_cache.candidates
else
[]
end
candidates << caused_by_entry.response if caused_by_entry&.response
if caused_by_entry&.response&.base
candidates << caused_by_entry.response.base
end
retries = 0
begin
response_blob_entry =
BlobEntryP.find_or_build(
content_type: content_type,
contents: http_event.body,
candidates: candidates
)
log_entry =
@@ -161,8 +142,8 @@ class Scraper::GalleryDlClient
response_time_ms: http_event.response_time_ms,
requested_at: http_event.requested_at,
caused_by_entry: caused_by_entry,
performed_by: @performed_by
}
performed_by: @performed_by,
},
)
log_entry.save!
@@ -174,17 +155,5 @@ class Scraper::GalleryDlClient
logger.debug "insert http log entry #{log_entry.id.to_s.bold}"
http_event.log_entry = log_entry
if response_blob_entry.base_sha256
blob_entry_cache.reward(
HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8]
)
else
blob_entry_cache.insert(
HexUtil.bin2hex(response_blob_entry.sha256)[0..8],
response_blob_entry,
url
)
end
end
end

View File

@@ -36,97 +36,6 @@ class Domain::E621::Post < ReduxApplicationRecord
foreign_key: :e621_id,
optional: true
SKIP_MISMATCH_LEGACY_IDS = Set.new([836_414, 1_070_178])
def self.find_or_build_from_legacy(legacy_model)
model = self.find_by(e621_id: legacy_model.e621_id)
return model if model
model =
self.new(
{
state: :ok,
file_url_str: legacy_model.file_url,
rating: legacy_model.rating,
sources_array: legacy_model.sources,
tags_array: legacy_model.tags.map(&:value),
artists_array: legacy_model.artists || [],
},
)
if legacy_model.e621_status != "active"
model.flags_array << legacy_model.e621_status
model.flags_array.uniq!
end
%i[e621_id md5 description score created_at].each do |attr|
model.send(:"#{attr}=", legacy_model.send(attr))
end
http_log_entries =
::HttpLogEntry.where(
uri_host: model.file_uri.host,
uri_path: model.file_uri.path,
)
http_log_entry = http_log_entries.first
if !http_log_entry && legacy_model.blob_entry
legacy_hles =
::Legacy::HttpLogEntry.where(
host: model.file_uri.host,
path: model.file_uri.path,
)
legacy_hle = legacy_hles.first
if legacy_hle
http_log_entry = ::HttpLogEntry.build_from_legacy(legacy_hle)
else
http_log_entry =
::HttpLogEntry.new(
{
uri: model.file_uri || raise,
status_code: 200,
verb: "get",
response_time_ms: -1,
requested_at: Time.now,
request_headers: ::HttpLogEntryHeader.empty,
response_headers: ::HttpLogEntryHeader.empty,
performed_by: "legacy",
},
)
end
http_log_entry.response ||=
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
blob_entry = http_log_entry.response
if blob_entry && http_log_entry
http_log_entry.content_type ||= blob_entry.content_type
else
# unable to construct http & blob entries, skip
File.write(
Rails.root.join("tmp/e621_legacy_post_importer_failures"),
"#{model.e621_id} - (no hle) - unable to reconstruct http / blob entry\n",
)
http_log_entry = nil
end
end
if http_log_entry
blob_entry = http_log_entry.response
if model.md5 != Digest::MD5.hexdigest(blob_entry.contents)
File.write(
Rails.root.join("tmp/e621_legacy_post_importer_failures"),
"#{model.e621_id} - #{http_log_entry.status_code} - expected #{model.md5} != actual #{Digest::MD5.hexdigest(blob_entry.contents)}\n",
)
http_log_entry = nil
end
end
model.file = http_log_entry
model
end
def file_uri
Addressable::URI.parse(self.file_url_str) if self.file_url_str.present?
end

View File

@@ -36,7 +36,7 @@ class HttpLogEntry < ReduxApplicationRecord
:status_code,
:response_time_ms,
:content_type,
:requested_at
:requested_at,
)
def self.find_by_uri_host_path(uri)
@@ -44,49 +44,6 @@ class HttpLogEntry < ReduxApplicationRecord
find_by(uri_host: uri.host, uri_path: uri.path)
end
def self.build_from_legacy(legacy_model)
response_body = legacy_model.response_body
can_reconstruct_be =
response_body.nil? && legacy_model.parent_log_entry_id.nil? &&
legacy_model.resp_body.present? && legacy_model.blob_entry.present?
if can_reconstruct_be
blob_entry =
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
blob_sha256 = HexUtil.hex2bin(legacy_model.resp_body)
unless blob_entry.sha256 == blob_sha256
raise(
"mismatch for legacy http entry #{legacy_model.id} / legacy blob entry #{legacy_model.blob_entry.id}"
)
end
else
blob_entry = nil
end
uri = Addressable::URI.parse(legacy_model.full_path)
uri.scheme ||= "https"
uri.path ||= "/"
::HttpLogEntry.new(
{
verb: legacy_model.verb,
uri: uri,
content_type: legacy_model.content_type,
status_code: legacy_model.status,
response_time_ms: legacy_model.response_time,
request_headers:
::HttpLogEntryHeader.find_or_build(headers: legacy_model.req_headers),
response_headers:
::HttpLogEntryHeader.find_or_build(headers: legacy_model.res_headers),
response: blob_entry,
requested_at: legacy_model.requested_at,
created_at: legacy_model.created_at,
updated_at: legacy_model.updated_at,
performed_by: "legacy"
}
)
end
def uri=(uri)
uri = Addressable::URI.parse(uri)
self.uri_scheme = uri.scheme
@@ -128,7 +85,7 @@ class HttpLogEntry < ReduxApplicationRecord
response_sha256: self.response_sha256,
requested_at: self.requested_at,
created_at: self.created_at,
updated_at: self.updated_at
updated_at: self.updated_at,
}
end
end

View File

@@ -1,3 +0,0 @@
module Legacy
autoload(:Fa, "legacy/fa.rb")
end

View File

@@ -1,131 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: blob_entries
#
# id :integer not null, primary key
# created_at :datetime
# updated_at :datetime
# file_size :integer
# refcount :integer
# sha256 :string(64)
# dir_depth :integer default(2), not null
#
require "digest"
class Legacy::BlobEntry < LegacyApplicationRecord
self.table_name = "blob_entries"
validates_presence_of :sha256
validates_presence_of :refcount
validates_presence_of :file_size
validates_uniqueness_of :sha256
validates_length_of :sha256, is: 64
validates_presence_of :dir_depth
before_validation do
self.dir_depth ||= 2
self.file_size = File.size(file_path)
self.refcount ||= 0
end
def file_relative_path
sha256 || raise
dir_depth || raise
self.class.file_path_at_depth(sha256: sha256, depth: dir_depth)
end
def ensure_file_path
sha256 || raise
dir_depth || raise
unless File.exist?(file_path)
found = false
(2..5).each do |depth|
path =
File.join(
Legacy::SConfig.blob_static_dir,
self.class.file_path_at_depth(sha256: sha256, depth: depth)
)
next unless File.exist?(path)
self.dir_depth = depth
save!
found = true
Legacy::SConfig.logger.warn(
"found fixed path at #{depth} for BE id #{id}"
)
break
end
return nil unless found
end
file_path
end
def self.file_path_at_depth(sha256:, depth:, stride: 1, hash_length: 64)
# generate something like sha256[0]/sha256[1]/sha256
raise("invalid sha256: #{sha256}") unless sha256.length == hash_length
parts =
(0...depth).map { |idx| sha256[(idx * stride)...((idx + 1) * stride)] } +
[sha256]
File.join(*parts)
end
def file_path
File.join Legacy::SConfig.blob_static_dir, file_relative_path
end
def inc_refcount
::Legacy::BlobEntry.increment_counter(:refcount, id)
end
def dec_refcount
::Legacy::BlobEntry.decrement_counter(:refcount, id)
end
def self.create_from_blob(blob:, opts: {})
sha256 = Digest::SHA256.hexdigest blob
write_out =
lambda do |be, _contents|
dir = File.dirname be.file_path
FileUtils.mkdir_p dir
f = File.open(be.file_path, "wb")
begin
f.write(blob)
f.fsync
unless File.exist?(be.file_path)
raise("error ensuring blob exists for #{be.id}")
end
ensure
f.close
end
end
be = nil
::Legacy::BlobEntry.transaction do
be = ::Legacy::BlobEntry.find_by(sha256: sha256)
if be && !be.ensure_file_path
# correct directory depth as well
Legacy::SConfig.logger.warn(
"file doesn't exist for #{be.id}, writing again..."
)
write_out.call(be, blob)
elsif !be
new_be = ::Legacy::BlobEntry.new(opts.merge(sha256: sha256))
write_out.call(new_be, blob)
new_be.save!
be = new_be
end
end
be
end
end

View File

@@ -1,142 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_posts
#
# id :integer not null, primary key
# e621_id :integer not null
# md5 :string not null
# sources :string
# file_url :string not null
# file_ext :string not null
# description :string
# rating :integer
# width :integer
# height :integer not null
# tags_string :string not null
# status :integer
# score :integer
# removed :boolean
# created_at :datetime not null
# updated_at :datetime not null
# artists :string
# e621_count :integer
# author :string
# e621_status :string
# blob_entry_id :integer
# imgsearch_entry_id :integer
#
class Legacy::E621::Post < LegacyApplicationRecord
self.table_name = "e621_posts"
validates_presence_of :e621_id,
:md5,
:author,
:file_url,
:file_ext,
:rating,
:tags_string,
:status,
:score
validates_uniqueness_of :md5, :e621_id
serialize :sources, coder: JSON
serialize :artists, coder: JSON
belongs_to :blob_entry, class_name: "Legacy::BlobEntry"
# just inserted into db: :not_processed
# we've checked for the existance of its file on the
# disk and it isn't there: :should_download
# we've made an attempt to download its file: :processed
enum :status,
%i[not_processed should_download processed processed_404 processed_err]
validates_inclusion_of :status, in: statuses.keys
has_many :taggings, class_name: "Legacy::E621::Tagging"
has_many :tags, through: :taggings
enum :rating, %i[s q e]
validates_inclusion_of :rating, in: ratings.keys
def file_relative_path
base = File.basename(file_url)
"#{base[0]}/#{base[1]}/#{base}"
end
before_validation { self.file_ext ||= File.extname(file_path)[1..-1] }
before_destroy { blob_entry.dec_refcount }
def file_path
File.join SConfig.e621_static_dir, file_relative_path
end
def resized_file_path(style)
raise("no md5") unless md5
hashed_path =
Legacy::BlobEntry.file_path_at_depth(
sha256: md5,
depth: 4,
stride: 2,
hash_length: 32
)
File.join SConfig.e621_data_dir,
"resized",
style.to_s,
(hashed_path + "." + file_ext)
end
FASource = Struct.new(:type, :id, :url)
def fa_sources
self
.sources
.flatten
.map do |source|
if matches = %r{furaffinity.net/view/(\d+)}.match(source)
fa_id = matches[1]
FASource.new(:post, fa_id.to_i, source)
elsif matches = %r{furaffinity.net/(gallery|user)/([^/]+)}.match(source)
url_name = FA::User.name_to_url_name(matches[2])
FASource.new(:user, url_name, source)
else
nil
end
end
.reject(&:nil?)
end
def update_taggings(assume_total_overwrite: false)
tags_string_split = tags_string.split(/\s+/).map(&:strip).reject(&:blank?)
tags_arr =
Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
missing = Set.new(tags_string_split) - Set.new(tags_arr.map(&:value))
missing.each do |missing_val|
tags_arr << Legacy::E621::Tag.find_or_create_by(value: missing_val)
end
# SConfig.logger.info "had to create tags: #{missing.to_a.join(", ")}" if missing.any?
if assume_total_overwrite
self.tags = tags_arr
else
should_be = Set.new(tags_arr)
but_is = Set.new(tags)
removed = but_is - should_be
added = should_be - but_is
tags.delete(removed.to_a)
tags << added.to_a
end
if Set.new(tags.map(&:value)) != Set.new(tags_string_split)
puts "tagging mismatch on #{id} (#{e621_id})"
end
end
end

View File

@@ -1,20 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_tags
#
# id :integer not null, primary key
# value :string not null
# e621_id :integer
# type :integer
# e621_count :integer
# created_at :datetime not null
# updated_at :datetime not null
#
class Legacy::E621::Tag < LegacyApplicationRecord
self.table_name = "e621_tags"
self.inheritance_column = nil
validates_presence_of :value
end

View File

@@ -1,22 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: e621_taggings
#
# id :integer not null, primary key
# tag_id :integer
# post_id :integer
# created_at :datetime not null
# updated_at :datetime not null
#
class Legacy::E621::Tagging < LegacyApplicationRecord
self.table_name = "e621_taggings"
belongs_to :post, class_name: "Legacy::E621::Post"
belongs_to :tag, class_name: "Legacy::E621::Tag"
validates_presence_of :post, :tag
validates_uniqueness_of :tag_id, scope: :post_id
end

View File

@@ -1,7 +0,0 @@
# frozen_string_literal: true
module Legacy::Fa
def self.table_name_prefix
"fa_"
end
end

View File

@@ -1,131 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fa_posts
#
# id :integer not null, primary key
# fa_id :integer not null
# creator_name :string
# creator_id :integer
# title :string
# category :string
# theme :string
# species :string
# gender :string
# keywords :string
# file_url :string
# blob_entry_id :integer
# old_files :string
# num_favorites :integer
# num_comments :integer
# num_views :integer
# posted_at :datetime
# created_at :datetime not null
# updated_at :datetime not null
# in_gallery :boolean
# in_folders :string
# state :integer default(0)
# state_error :string
# folder_name :string
# gallery_page_logfile :string
# submission_page_logfile :string
# submission_page_log_entry_id :integer
# description_id :integer
# imgsearch_entry_id :integer
#
class Legacy::Fa::Post < LegacyApplicationRecord
self.table_name = "fa_posts"
validates_presence_of :fa_id
# array of [{be: blob_entry_id, file_url: old_file_url}]
serialize :old_files, coder: JSON
serialize :keywords, coder: JSON
serialize :in_folders, coder: JSON
enum :state,
[
:seen_listing, # have seen a reference to this post on a listing page
:scanned_submission, # have scanned the actual submission page
:scan_error, # error scanning the submission page
:have_static, # have the static asset associated with the page
:static_error
] # error getting the static asset
validates_inclusion_of :state, in: Legacy::Fa::Post.states.keys
# serialize :state_error
validates_presence_of :state_error, if: -> { scan_error? || static_error? }
belongs_to :creator, class_name: "::Legacy::Fa::User"
belongs_to :blob_entry, class_name: "::Legacy::BlobEntry"
belongs_to :submission_page_log_entry, class_name: "::Legacy::HttpLogEntry"
belongs_to :description_ref,
nil,
class_name: "::Legacy::Fa::PostDescription",
foreign_key: :description_id,
inverse_of: :fa_post
before_destroy do
blob_entry.dec_refcount
true
end
def description
description_ref.try(:value)
end
def self.file_name_filter(part)
part.gsub(/[^\w\.\_\-\#\@\(\)\^\[\]\$\{\}\<\>\!\ ]/, "_")
end
def relative_file_path
raise("no file_url") unless file_url
name = ensure_creator_name_filename
file = self.class.file_name_filter(File.basename(file_url))
File.join(name, file).encode(
Encoding.find("UTF-8"),
invalid: :replace,
undef: :replace,
replace: ""
)
end
def file_path
File.join SConfig.fa_post_static_dir, relative_file_path
end
def file_ext
raise("no file_url") unless file_url
File.extname(self.class.file_name_filter(file_url))
end
def resized_file_path(style)
raise("no fa_id") unless fa_id
hashed_path =
Legacy::BlobEntry.file_path_at_depth(
sha256: Digest::SHA256.hexdigest(fa_id.to_s),
depth: 4,
stride: 2
)
File.join Legacy::SConfig.fa_data_dir,
"static",
"resized",
style.to_s,
(hashed_path + file_ext)
end
private
def ensure_creator_name_filename
raise("no creator name") unless creator_name
self.class.file_name_filter(Legacy::Fa::User.name_to_url_name(creator_name))
end
end

View File

@@ -1,17 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fa_post_descriptions
#
# id :integer not null, primary key
# value :text not null
# created_at :datetime not null
# updated_at :datetime not null
#
class Legacy::Fa::PostDescription < LegacyApplicationRecord
self.table_name = "fa_post_descriptions"
has_one :fa_post, class_name: "Legacy::Fa::Post", foreign_key: :description_id
end

View File

@@ -1,86 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: fa_users
#
# id :integer not null, primary key
# name :string not null
# full_name :string
# artist_type :string
# mood :string
# profile_html :text
# num_pageviews :integer
# num_submissions :integer
# num_comments_recieved :integer
# num_comments_given :integer
# num_journals :integer
# num_favorites :integer
# registered_at :datetime
# created_at :datetime not null
# updated_at :datetime not null
# url_name :string not null
# scanned_gallery :datetime
# scanned_page :datetime
# user_page_logfile :string
# user_page_log_entry_id :integer
#
class Legacy::Fa::User < LegacyApplicationRecord
self.table_name = "fa_users"
validates :name, uniqueness: true, presence: true
validates :url_name, uniqueness: true, presence: true
has_many :posts,
class_name: "Legacy::Fa::Post",
foreign_key: :creator_name,
primary_key: :name
# if present, this user is being watched
has_one :watched_user, foreign_key: :user_id, inverse_of: :user
belongs_to :user_page_log_entry, class_name: "Legacy::Cache::HttpLogEntry"
before_validation do
self.name ||= url_name
self.url_name ||= self.class.name_to_url_name(name)
end
def up_to_date_gallery!
now = Time.now
self.scanned_gallery = now
watched_user.scanned_user_gallery = now if watched_user?
Legacy::Fa::User.transaction do
watched_user.save! if watched_user?
save!
end
end
def up_to_date_gallery?
if watched_user? && !!scanned_gallery
watched_user.scanned_user_gallery == scanned_gallery
else
!!scanned_gallery
end
end
def up_to_date_page?
if watched_user? && !!scanned_page
watched_user.scanned_user_page == scanned_page
else
!!scanned_page
end
end
def watched_user?
!!watched_user
end
def self.name_to_url_name(name)
name.delete("_").downcase
end
end

View File

@@ -1,472 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: cache_http_log_entries
#
# id :integer not null, primary key
# scheme :string
# host :string
# path :string
# query :string
# verb :integer
# status :integer
# response_time :integer
# content_type :string
# response_size :integer
# parent_log_entry_id :integer
# blob_entry_id :integer
# gzipped :boolean
# requested_at :datetime
# created_at :datetime not null
# updated_at :datetime not null
# resp_body :binary
# imported_from_file :string
# req_headers_id :integer
# res_headers_id :integer
# diff_type :integer default(0)
#
require "zlib"
require "stringio"
module Diffy
class Diff
def tempfile(string)
t = Tempfile.new("diffy")
# ensure tempfiles aren't unlinked when GC runs by maintaining a
# reference to them.
@tempfiles ||= []
@tempfiles.push(t)
t.binmode
t.print(string)
t.flush
t.close
t.path
end
end
end
class Legacy::HttpLogEntry < LegacyApplicationRecord
self.table_name = "cache_http_log_entries"
# threshold or less: store in database directly
KEEP_INTERNALLY_THRESHOLD = 1024 * 64
belongs_to :parent_log_entry, class_name: "Legacy::HttpLogEntry"
belongs_to :blob_entry, class_name: "Legacy::BlobEntry"
validates_presence_of(
:scheme,
:host,
:path,
:status,
:response_time,
:content_type,
:response_size
)
enum :verb, %i[get post]
validates_inclusion_of :verb, in: Legacy::HttpLogEntry.verbs.keys
# text: use Diffy diffing
# binary: use BSDiff
# native: use the native LogStore server to store the entry
enum :diff_type, %i[text binary native]
validates_inclusion_of :diff_type, in: Legacy::HttpLogEntry.diff_types.keys
after_initialize { self.diff_type = "native" if new_record? }
# out of line req/response headers
belongs_to :req_headers_ref,
foreign_key: :req_headers_id,
class_name: "Legacy::HttpLogEntryHeader"
belongs_to :res_headers_ref,
foreign_key: :res_headers_id,
class_name: "Legacy::HttpLogEntryHeader"
belongs_to :native_blob_entry,
foreign_key: :native_blob_entry_sha256,
primary_key: :key,
class_name: "::LogStoreSstEntry"
attr_accessor :can_force_update
before_update do
if can_force_update
true
else
raise("HttpLogEntry is immutable!")
false
end
end
private
def set_header_impl(method, headers)
headers.delete("date")
headers.delete("expires")
headers.delete("cf-ray")
send(
"#{method}=",
Legacy::HttpLogEntryHeader.find_or_create(headers: headers)
)
end
public
def req_headers=(headers)
set_header_impl(:req_headers_ref, headers)
end
def res_headers=(headers)
set_header_impl(:res_headers_ref, headers)
end
private
def get_header_impl(method)
ref = send("#{method}_ref")
if ref
ref.headers
else
self.class.superclass.instance_method(method).bind(self).call
end
end
public
def req_headers
ref = req_headers_ref
ref ? ref.headers : {}
end
def res_headers
ref = res_headers_ref
ref ? ref.headers : {}
end
before_create { self.requested_at ||= DateTime.now }
def body_stored?
!!(blob_entry_id || parent_log_entry_id || resp_body)
end
def response_body=(body_string)
if diff_type == "native"
set_response_body_native(body_string, {})
else
set_response_body(body_string)
end
end
def full_path
"#{scheme}://#{host}#{path}#{query ? "?#{query}" : ""}"
end
def set_response_body_native(body_string, opts = {})
raise("legacy can't write")
# try and find a good HttpLogEntry to diff this against
candidate_keys =
if !opts[:skip_find_candidates]
Legacy::HttpLogEntry
.where(
host: host,
path: path,
diff_type: self.class.diff_types["native"]
)
.limit(5)
.to_a
.map(&:resp_body)
.reject(&:nil?)
.reject(&:empty?)
else
[]
end
SConfig.with_log_store_client do |lsc|
ret =
lsc.put_binary(
LogStore::PutBinaryArgs.new(
hint_hashes: candidate_keys,
contents: body_string
)
)
self.resp_body = ret.key
end
body_string
end
# string ->
def set_response_body(body_string, opts = {})
return set_response_body_native(body_string, opts) if diff_type == "native"
# try and find a good HttpLogEntry to diff this against
candidate_entries =
Legacy::HttpLogEntry
.where(host: host, path: path, parent_log_entry_id: nil)
.limit(3)
.to_a
# add or remove trailing slash to each of the paths
hint_paths = opts[:similar_content_path_hints] || []
hint_paths +=
hint_paths.map do |p|
if p == "/"
p
elsif p[-1] == "/"
p[0..-2]
else
p + "/"
end
end
body_string = body_string.force_encoding("UTF-8")
good_ce = nil
use_string = body_string
gzipped = false
if body_string.valid_encoding?
if hint_paths.any?
candidate_entries +=
Legacy::HttpLogEntry
.where(host: host, path: hint_paths, parent_log_entry_id: nil)
.limit(50)
.to_a
end
SConfig.logger.info(
"Comparing against #{candidate_entries.length} " \
"candidates: #{candidate_entries.map(&:path).join(", ")}"
)
candidate_entries.each do |ce|
SConfig.logger.info "Comparing diff against HLE (#{ce.id}: #{ce.path})"
ce_body = ce.response_body
if !ce_body || (!ce_body.valid_encoding? && diff_type == "text")
SConfig.logger.info "HLE #{ce.id} has invalid encoded response body"
next
end
ce_diff = self.class.get_diff(ce_body, body_string, diff_type)
if (diff_type == "text") &&
(/^Binary files .+ and .+ differ/ =~ ce_diff)
SConfig.logger.warn(
"diff detected HLE #{ce.id} was a binary, skipping..."
)
next
end
# verify we can reconstruct the original body string
if self.class.apply_patch(ce_body, ce_diff, diff_type) != body_string
SConfig.logger.error(
"couldn't succesfully apply patch to get orig..."
)
next
end
gzipped_diff = self.class.gzip(ce_diff)
ce_use_string = nil
ce_gzipped = nil
if gzipped_diff.length < ce_diff.length
ce_gzipped = true
ce_use_string = gzipped_diff
else
ce_gzipped = false
ce_use_string = ce_diff
end
# haven't found a smaller use_string
if use_string.length < ce_use_string.length
SConfig.logger.info(
"Previous config was still smaller (#{use_string.length} vs" \
" #{ce_use_string.length} bytes)"
)
next
else
SConfig.logger.info(
"HLE (#{ce.id}) is good candidate: #{ce_use_string.length} bytes " \
"(gz: #{ce_gzipped})"
)
end
good_ce = ce
gzipped = ce_gzipped
use_string = ce_use_string
end
else
SConfig.logger.error("Invalid encoding detected, not storing diff")
end
self.parent_log_entry = good_ce # or nil, if none found
self.gzipped = gzipped
if use_string.length < self.class::KEEP_INTERNALLY_THRESHOLD
self.resp_body = use_string
SConfig.logger.info "Storing data interally"
else
self.blob_entry =
Legacy::BlobEntry.create_from_blob(
blob: use_string,
opts: {
dir_depth: 4
}
)
blob_entry.inc_refcount
SConfig.logger.info "Storing data in blob entry #{blob_entry.id}..."
end
if response_body != body_string
raise("internal error, response_body != body_string")
end
stored_bytes = use_string.length
total_bytes = body_string.length
SConfig.logger.info(
"Stored #{stored_bytes}/#{total_bytes} bytes" \
" (#{(stored_bytes.to_f / total_bytes.to_f * 100.0).round(1)}\% of original)"
)
response_body
rescue StandardError
blob_entry && blob_entry.dec_refcount
raise
end
class NoBEPathException < RuntimeError
end
# -> string
def response_body
@response_body ||=
begin
return response_body_native if diff_type == "native"
our_string =
if blob_entry
path = blob_entry.ensure_file_path
unless path
raise NoBEPathException,
"no path for blob entry " \
"#{blob_entry_id} (HLE id: #{id}) (#{blob_entry.file_path})"
end
File.read(path)
else
resp_body
end
our_string = self.class.gunzip(our_string) if gzipped
return nil if our_string.nil?
# our_string = our_string.force_encoding("UTF-8")
if parent_log_entry
self.class.apply_patch(
parent_log_entry.response_body,
our_string,
diff_type
)
else
our_string
end
end
end
def response_body_native
raise unless diff_type == "native"
return "" unless resp_body
# new:
@response_body_native ||=
self.native_blob_entry&.patched_value ||
::FlatSstEntry.find_by_hex_key(resp_body)&.contents ||
::LogStoreSstEntry.find_by_hex_key(resp_body)&.patched_value
end
def self.encode_str(str)
str.encode(
Encoding.find("UTF-8"),
invalid: :replace,
undef: :replace,
replace: ""
)
end
def self.gunzip(data)
io = StringIO.new(data, "rb")
Zlib::GzipReader.new(io).read
end
def self.gzip(string)
wio = StringIO.new("w")
w_gz = Zlib::GzipWriter.new(wio)
w_gz.write(string)
w_gz.close
wio.string
end
def self.get_diff(old_bytes, new_bytes, diff_type)
if diff_type == "text"
return Diffy::Diff.new(old_bytes, new_bytes, diff: "-e").to_s
end
raise("unknown diff type '#{diff_type}'") if diff_type != "binary"
tf_old = Tempfile.new("old-file")
tf_new = Tempfile.new("new-file")
tf_out = Tempfile.new("patch")
files = [tf_old, tf_new, tf_out]
begin
files.each(&:binmode)
tf_old.write(old_bytes)
tf_new.write(new_bytes)
files.each(&:close)
if BSDiff.diff(tf_old.path, tf_new.path, tf_out.path)
tf_out.open
bytes = tf_out.read
tf_out.close
return bytes
else
return nil
end
ensure
files.each(&:unlink)
end
end
def self.apply_patch(old_text, patch, diff_type)
tf_orig = Tempfile.new("apply-patch", encoding: "ascii-8bit")
tf_patch = Tempfile.new("apply-patch", encoding: "ascii-8bit")
tf_out = Tempfile.new("applied-patch", encoding: "ascii-8bit")
files = [tf_orig, tf_patch, tf_out]
begin
tf_out.close
tf_orig.write(old_text)
tf_patch.write(patch)
tf_orig.close
tf_patch.close
if diff_type == "text"
`patch -e #{tf_orig.path} #{tf_patch.path} -o #{tf_out.path}`
tf_out.open
ret = tf_out.read
tf_out.close
ret
elsif diff_type == "binary"
if BSDiff.patch(tf_orig.path, tf_out.path, tf_patch.path)
tf_out.open
ret = tf_out.read
tf_out.close
ret
end
else
raise("invalid diff type #{diff_type}")
end
ensure
files.each(&:unlink)
end
end
end

View File

@@ -1,25 +0,0 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: cache_http_log_entry_headers
#
# id :integer not null, primary key
# headers :hstore not null
# sha256 :binary not null
# created_at :datetime
# updated_at :datetime
#
class Legacy::HttpLogEntryHeader < LegacyApplicationRecord
self.table_name = "cache_http_log_entry_headers"
def self.find_or_create(headers:)
temp = Legacy::HttpLogEntryHeader.new(headers: headers)
sha256 = Digest::SHA256.digest(temp.headers.to_s)
Legacy::HttpLogEntryHeader.find_or_create_by!(sha256: sha256) do |c|
c.headers = temp.headers
end
end
end

View File

@@ -1,105 +0,0 @@
class LegacyApplicationRecord < ActiveRecord::Base
# self.primary_abstract_class = true
self.abstract_class = true
connects_to database: { writing: :legacy, reading: :legacy }
end
module Legacy
end
class Legacy::SConfig
def self.data_dir
if Rails.env.test?
Rails.root.join "test/fixtures/legacy/s_config"
else
"/home/scraper/scraper_data_original/scraper_data"
end
end
def self.blob_data_dir
File.join data_dir, "blobs"
end
def self.e621_data_dir
File.join data_dir, "e621"
end
def self.fa_data_dir
File.join data_dir, "fa"
end
def self.ib_data_dir
File.join data_dir, "ib"
end
def self.blob_static_dir
File.join blob_data_dir, "static"
end
def self.e621_static_dir
File.join e621_data_dir, "static"
end
def self.fa_post_static_dir
File.join fa_data_dir, "static/posts"
end
def self.fa_icons_static_dir
File.join fa_data_dir, "static/icons"
end
def self.ib_post_static_dir
File.join ib_data_dir, "static/posts"
end
def self.e621_json_dir
File.join e621_data_dir, "json"
end
def self.fa_html_dir
File.join fa_data_dir, "html"
end
def self.fa_cookie_jar_dir
File.join fa_data_dir, "cookies"
end
def self.ib_logs_dir
File.join ib_data_dir, "logs"
end
def self.ib_cookie_jar_dir
File.join ib_data_dir, "cookies"
end
def self.http_logger_data_dir
File.join data_dir, "http_logger"
end
def self.logger
@@logger ||=
begin
l = Logger.new(STDOUT)
l.level = Logger::INFO
l.datetime_format = "%Y-%m-%d %H:%M:%S"
l.formatter =
proc do |sev, datetime, _prog, msg|
color =
case sev
when "INFO"
:blue
when "ERROR"
:red
when "DEBUG"
:yellow
else
:white
end
date_format = datetime.strftime("%Y-%m-%d %H:%M:%S")
"[#{date_format}] #{sev.ljust(5).send(color)}: #{msg}\n"
end
l
end
end
end

View File

@@ -1,5 +1,4 @@
class ReduxApplicationRecord < ActiveRecord::Base
self.abstract_class = true
connects_to database: { writing: :redux, reading: :redux }
logger.level = Logger::ERROR
end

View File

@@ -16,7 +16,6 @@ redux_prod: &redux_prod
database: redux_prod
username: scraper_redux
password: pdkFLqRmQwPUPaDDC4pX
migrations_paths: db/redux_migrate
pool: 4
redux_staging: &redux_staging
@@ -34,26 +33,8 @@ redux_dev: &redux_dev
database: postgres
username: postgres
password: postgres
migrations_paths: db/redux_migrate
pool: 4
legacy_prod: &legacy_prod
adapter: postgresql
host: 10.166.33.171
port: 5432
database: legacy_prod
username: scraper_redux
password: pdkFLqRmQwPUPaDDC4pX
migrations_paths: db/legacy_migrate
database_tasks: false
pool: 2
legacy_staging: &legacy_staging
<<: *legacy_prod
host: postgres
username: scraper_redux_staging
password: q6Jf8mXEUkAxdyHq1tUtCTPa1raX1QAT
local_redux_test: &local_redux_test
adapter: postgresql
host: db
@@ -63,53 +44,23 @@ local_redux_test: &local_redux_test
# password: pdkFLqRmQwPUPaDDC4pX
username: postgres
password: postgres
migrations_paths: db/redux_migrate
pool: 4
local_legacy_test: &local_legacy_test
adapter: postgresql
host: db
port: 5432
database: legacy_test
# username: scraper_redux
# password: pdkFLqRmQwPUPaDDC4pX
username: postgres
password: postgres
migrations_paths: db/legacy_migrate
pool: 4
development:
redux:
<<: *redux_dev
# at the moment, no "real" legacy database. just fixtures.
# legacy:
# <<: *legacy_prod
<<: *redux_dev
# Warning: The database defined as "test" will be erased and
# re-generated from your development database when you run "rake".
# Do not set this db to the same as development or production.
test:
redux:
<<: *local_redux_test
legacy:
<<: *local_legacy_test
<<: *local_redux_test
production:
redux:
<<: *redux_prod
legacy:
<<: *legacy_prod
<<: *redux_prod
staging:
redux:
<<: *redux_staging
legacy:
<<: *legacy_staging
<<: *redux_staging
worker:
redux:
<<: *redux_prod
pool: 16
legacy:
<<: *legacy_prod
pool: 16
<<: *redux_prod
pool: 16

View File

@@ -1,398 +0,0 @@
# This file is auto-generated from the current state of the database. Instead
# of editing this file, please use the migrations feature of Active Record to
# incrementally modify your database, and then regenerate this schema definition.
#
# This file is the source Rails uses to define your schema when running `bin/rails
# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to
# be faster and is potentially less error prone than running all of your
# migrations from scratch. Old migrations may fail to apply correctly if those
# migrations use external dependencies or application code.
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 0) do
# These are extensions that must be enabled in order to support this database
enable_extension "hstore"
enable_extension "intarray"
enable_extension "pg_stat_statements"
enable_extension "pg_trgm"
enable_extension "plpgsql"
create_table "blob_entries", id: :serial, force: :cascade do |t|
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "file_size"
t.integer "refcount"
t.string "sha256", limit: 64
t.integer "dir_depth", default: 2, null: false
t.index ["sha256"], name: "index_blob_entries_on_sha256", unique: true
end
create_table "cache_http_log_entries", id: :serial, force: :cascade do |t|
t.string "scheme"
t.string "host"
t.string "path"
t.string "query"
t.integer "verb"
t.integer "status"
t.integer "response_time"
t.string "content_type"
t.integer "response_size"
t.integer "parent_log_entry_id"
t.integer "blob_entry_id"
t.boolean "gzipped"
t.datetime "requested_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.binary "resp_body"
t.string "imported_from_file"
t.integer "req_headers_id"
t.integer "res_headers_id"
t.integer "diff_type", default: 0
t.binary "native_blob_entry_sha256"
t.index ["imported_from_file"], name: "index_cache_http_log_entries_on_imported_from_file"
t.index ["path", "host"], name: "index_cache_http_log_entries_on_path_and_host"
t.index ["path"], name: "cache_http_log_entries_path_idx", opclass: :gist_trgm_ops, using: :gist
t.index ["path"], name: "index_pattern_ops_on_hle_entry_path"
t.index ["requested_at"], name: "index_cache_http_log_entries_on_requested_at"
end
create_table "cache_http_log_entry_headers", id: :serial, force: :cascade do |t|
t.hstore "headers", null: false
t.binary "sha256", null: false
t.datetime "created_at"
t.datetime "updated_at"
t.index ["sha256"], name: "index_cache_http_log_entry_headers_on_sha256", unique: true
end
create_table "e621_posts", id: :serial, force: :cascade do |t|
t.integer "e621_id", null: false
t.string "md5", null: false
t.string "sources"
t.string "file_url", null: false
t.string "file_ext", null: false
t.string "description"
t.integer "rating"
t.integer "width"
t.integer "height", null: false
t.string "tags_string", null: false
t.integer "status"
t.integer "score"
t.boolean "removed"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "artists"
t.integer "e621_count"
t.string "author"
t.string "e621_status"
t.integer "blob_entry_id"
t.integer "imgsearch_entry_id"
t.index ["blob_entry_id"], name: "index_e621_posts_on_blob_entry_id"
t.index ["e621_id"], name: "index_e621_posts_on_e621_id", unique: true
t.index ["imgsearch_entry_id"], name: "index_e621_posts_on_imgsearch_entry_id"
t.index ["md5"], name: "index_e621_posts_on_md5"
end
create_table "e621_taggings", id: :serial, force: :cascade do |t|
t.integer "tag_id"
t.integer "post_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["post_id"], name: "index_e621_taggings_on_post_id"
t.index ["tag_id"], name: "index_e621_taggings_on_tag_id"
end
create_table "e621_tags", id: :serial, force: :cascade do |t|
t.string "value", null: false
t.integer "e621_id"
t.integer "type"
t.integer "e621_count"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["value"], name: "index_e621_tags_on_value"
end
create_table "fa_failed_users", id: :serial, force: :cascade do |t|
t.string "url_name"
t.datetime "created_at"
t.datetime "updated_at"
end
create_table "fa_post_descriptions", id: :serial, force: :cascade do |t|
t.text "value", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "fa_posts", id: :serial, force: :cascade do |t|
t.integer "fa_id", null: false
t.string "creator_name"
t.integer "creator_id"
t.string "title"
t.string "category"
t.string "theme"
t.string "species"
t.string "gender"
t.string "keywords"
t.string "file_url"
t.integer "blob_entry_id"
t.string "old_files"
t.integer "num_favorites"
t.integer "num_comments"
t.integer "num_views"
t.datetime "posted_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.boolean "in_gallery"
t.string "in_folders"
t.integer "state", default: 0
t.string "state_error"
t.string "folder_name"
t.string "gallery_page_logfile"
t.string "submission_page_logfile"
t.integer "submission_page_log_entry_id"
t.integer "description_id"
t.integer "imgsearch_entry_id"
t.index ["blob_entry_id"], name: "index_fa_posts_on_blob_entry_id"
t.index ["creator_id"], name: "index_fa_posts_on_creator_id"
t.index ["creator_name"], name: "index_fa_posts_on_creator_name"
t.index ["fa_id"], name: "index_fa_posts_on_fa_id", unique: true
t.index ["imgsearch_entry_id"], name: "index_fa_posts_on_imgsearch_entry_id"
end
create_table "fa_users", id: :serial, force: :cascade do |t|
t.string "name", null: false
t.string "full_name"
t.string "artist_type"
t.string "mood"
t.text "profile_html"
t.integer "num_pageviews"
t.integer "num_submissions"
t.integer "num_comments_recieved"
t.integer "num_comments_given"
t.integer "num_journals"
t.integer "num_favorites"
t.datetime "registered_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "url_name", null: false
t.datetime "scanned_gallery"
t.datetime "scanned_page"
t.string "user_page_logfile"
t.integer "user_page_log_entry_id"
t.index ["name"], name: "index_fa_users_on_name", unique: true
t.index ["scanned_gallery"], name: "index_fa_users_on_scanned_gallery"
t.index ["url_name"], name: "index_fa_users_on_url_name", unique: true
end
create_table "ib_posts", id: :serial, force: :cascade do |t|
t.string "gallery_logfile"
t.datetime "created_at"
t.datetime "updated_at"
end
create_table "ib_user", id: :serial, force: :cascade do |t|
end
create_table "imgsearch_dbs", id: :serial, force: :cascade do |t|
t.string "path", null: false
t.string "name", null: false
t.integer "hash_bits", null: false
t.integer "max_error", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "lite_ad_hoc_posts", force: :cascade do |t|
t.string "file_ext"
t.string "file"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "lite_e621_posts", primary_key: "e621_id", force: :cascade do |t|
t.string "e621_file_url"
t.string "file_ext"
t.integer "main_asset_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "lite_fa_posts", primary_key: "fa_id", force: :cascade do |t|
t.string "file_ext"
t.integer "main_asset_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.string "file_url"
t.integer "creator_id"
t.string "title"
t.index ["creator_id"], name: "index_lite_fa_posts_on_creator_id"
end
create_table "lite_fa_users", force: :cascade do |t|
t.string "url_name", null: false
t.index ["url_name"], name: "index_lite_fa_users_on_url_name"
end
create_table "lite_media_file_fingerprints", force: :cascade do |t|
t.integer "object_type", limit: 2, null: false
t.integer "object_id", null: false
t.bigint "fingerprints", array: true
t.index ["object_type", "object_id"], name: "index_lite_media_file_fingerprints_on_object_type_and_object_id", unique: true
end
create_table "lite_s3_objects", force: :cascade do |t|
t.string "sha256"
t.string "key"
t.integer "size"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["key"], name: "index_lite_s3_objects_on_key", unique: true
end
create_table "pghero_query_stats", id: :serial, force: :cascade do |t|
t.text "database"
t.text "user"
t.text "query"
t.bigint "query_hash"
t.float "total_time"
t.bigint "calls"
t.datetime "captured_at"
t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at"
end
create_table "pghero_space_stats", id: :serial, force: :cascade do |t|
t.text "database"
t.text "schema"
t.text "relation"
t.bigint "size"
t.datetime "captured_at"
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
end
create_table "watch_jobs", id: :serial, force: :cascade do |t|
t.string "execute_class"
t.string "params"
t.string "return"
t.integer "state"
t.datetime "execute_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "enqueue_strategy", default: 0
t.integer "redo_delay_seconds"
t.integer "priority", default: 0
t.string "http_class"
t.index ["execute_at"], name: "index_watch_jobs_on_execute_at"
t.index ["execute_class"], name: "index_watch_jobs_on_execute_class"
t.index ["priority"], name: "index_watch_jobs_on_priority"
t.index ["state"], name: "index_watch_jobs_on_state"
end
create_table "watched_users", id: :serial, force: :cascade do |t|
t.integer "user_id", null: false
t.integer "watch_job_id", null: false
t.datetime "scanned_user_gallery"
t.datetime "scanned_user_page"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.integer "scan_delay"
end
create_table "weasyl_attempted_posts", id: :serial, force: :cascade do |t|
t.integer "weasyl_id", null: false
t.integer "enum_type", null: false
t.integer "status"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id", unique: true
end
create_table "weasyl_descriptions", id: :serial, force: :cascade do |t|
t.text "value", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "weasyl_joins_user_follows", id: :serial, force: :cascade do |t|
t.integer "follower_id", null: false
t.integer "followed_id", null: false
t.index ["follower_id", "followed_id"], name: "index_weasyl_joins_user_follows_on_follower_id_and_followed_id", unique: true
end
create_table "weasyl_joins_user_friends", id: :serial, force: :cascade do |t|
t.integer "a_id", null: false
t.integer "b_id", null: false
t.index ["a_id", "b_id"], name: "index_weasyl_joins_user_friends_on_a_id_and_b_id", unique: true
end
create_table "weasyl_medias", id: :serial, force: :cascade do |t|
t.string "url", null: false
t.integer "mediaid"
t.integer "blob_entry_id"
t.integer "status"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["mediaid"], name: "index_weasyl_medias_on_mediaid", unique: true
t.index ["url"], name: "index_weasyl_medias_on_url", unique: true
end
create_table "weasyl_posts", id: :serial, force: :cascade do |t|
t.integer "weasyl_id", null: false
t.string "title", null: false
t.integer "description_id"
t.datetime "posted_at"
t.string "tags", array: true
t.integer "enum_type", null: false
t.integer "enum_subtype"
t.integer "enum_rating", null: false
t.integer "num_comments"
t.integer "num_views"
t.integer "num_favorites"
t.string "folder_name"
t.integer "weasyl_folder_id"
t.integer "owner_id", null: false
t.integer "submission_media_id"
t.datetime "full_scanned_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_posts_on_enum_type_and_weasyl_id", unique: true
end
create_table "weasyl_users", id: :serial, force: :cascade do |t|
t.string "full_name"
t.string "login_name", null: false
t.integer "description_id"
t.integer "avatar_media_id"
t.datetime "scanned_gallery_at"
t.datetime "scanned_userpage_at"
t.datetime "scanned_followers_following_at"
t.integer "userid"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["login_name"], name: "index_weasyl_users_on_login_name", unique: true
end
create_table "xtwitter_tweets", id: :serial, force: :cascade do |t|
t.string "creator_screen_name", null: false
t.integer "creator_user_id", null: false
t.string "twitter_id", null: false
t.json "attrs"
t.string "media_uri"
t.string "media_ext"
t.integer "http_log_entry_page_id"
t.integer "http_log_entry_media_id"
t.integer "blob_entry_id"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
create_table "xtwitter_users", id: :serial, force: :cascade do |t|
t.string "screen_name", null: false
t.integer "user_id", null: false
t.datetime "scanned_timeline_at"
t.datetime "scanned_profile_at"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
end
end

View File

@@ -1,24 +0,0 @@
# This migration comes from active_storage (originally 20190112182829)
class AddServiceNameToActiveStorageBlobs < ActiveRecord::Migration[6.0]
def up
return unless table_exists?(:active_storage_blobs)
unless column_exists?(:active_storage_blobs, :service_name)
add_column :active_storage_blobs, :service_name, :string
if configured_service = ActiveStorage::Blob.service.name
ActiveStorage::Blob.unscoped.update_all(
service_name: configured_service
)
end
change_column :active_storage_blobs, :service_name, :string, null: false
end
end
def down
return unless table_exists?(:active_storage_blobs)
remove_column :active_storage_blobs, :service_name
end
end

View File

@@ -1,36 +0,0 @@
# This migration comes from active_storage (originally 20191206030411)
class CreateActiveStorageVariantRecords < ActiveRecord::Migration[6.0]
def change
return unless table_exists?(:active_storage_blobs)
# Use Active Record's configured type for primary key
create_table :active_storage_variant_records,
id: primary_key_type,
if_not_exists: true do |t|
t.belongs_to :blob,
null: false,
index: false,
type: blobs_primary_key_type
t.string :variation_digest, null: false
t.index %i[blob_id variation_digest],
name: "index_active_storage_variant_records_uniqueness",
unique: true
t.foreign_key :active_storage_blobs, column: :blob_id
end
end
private
def primary_key_type
config = Rails.configuration.generators
config.options[config.orm][:primary_key_type] || :primary_key
end
def blobs_primary_key_type
pkey_name = connection.primary_key(:active_storage_blobs)
pkey_column =
connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
pkey_column.bigint? ? :bigint : pkey_column.type
end
end

View File

@@ -1,8 +0,0 @@
# This migration comes from active_storage (originally 20211119233751)
class RemoveNotNullOnActiveStorageBlobsChecksum < ActiveRecord::Migration[6.0]
def change
return unless table_exists?(:active_storage_blobs)
change_column_null(:active_storage_blobs, :checksum, true)
end
end

View File

@@ -1,17 +1,6 @@
namespace :e621 do
desc "import legacy e621 posts"
task :import_legacy => :environment do |t, args|
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i
forks = args[:forks]&.to_i || ENV["forks"]&.to_i
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i
LegacyImport::E621LegacyPostImporter.
new(batch_size: batch_size, forks: forks, start_at: start_at).
run
end
desc "import e621 data from csv"
task :import_csv => :environment do |t, args|
task import_csv: :environment do |t, args|
start_at = ENV["start_at"]&.to_i
limit = ENV["limit"]&.to_i
csv_path = ENV["csv"] || raise("must supply `csv`")
@@ -24,9 +13,7 @@ namespace :e621 do
end
desc "run a single e621 posts index job"
task :posts_index_job => :environment do
Domain::E621::Job::PostsIndexJob.
set(priority: -10).
perform_later({})
task posts_index_job: :environment do
Domain::E621::Job::PostsIndexJob.set(priority: -10).perform_later({})
end
end

View File

@@ -11,7 +11,7 @@ namespace :fa do
parser =
Domain::Fa::Parser::Page.new(
page_log_entry.response.contents,
require_logged_in: false
require_logged_in: false,
)
unless parser.probably_user_page?
# Rails.logger.error("user #{user.id} / #{user.url_name} page #{page_log_entry.id} is not a user page")
@@ -24,7 +24,7 @@ namespace :fa do
user.save!
puts ""
Rails.logger.info(
"updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}"
"updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}",
)
else
print "."
@@ -55,7 +55,7 @@ namespace :fa do
"number of posts the user has favorited, as per the user page",
num_pageviews:
"number of pageviews of the user's page, as per the user page",
registered_at: "when the account was registered, as per the user page"
registered_at: "when the account was registered, as per the user page",
}
model_ids = Domain::Fa::User.order(url_name: :asc).pluck(:id)
File.open(out_file, "wt") do |file|
@@ -89,7 +89,7 @@ namespace :fa do
reverse_scan_holes: false,
start_at: start_at,
low_water_mark: low_water_mark,
high_water_mark: high_water_mark
high_water_mark: high_water_mark,
)
loop { sleep poll_duration if enqueuer.run_once == :sleep }
@@ -108,7 +108,7 @@ namespace :fa do
reverse_scan_holes: true,
start_at: start_at,
low_water_mark: low_water_mark,
high_water_mark: high_water_mark
high_water_mark: high_water_mark,
)
loop { sleep poll_duration if enqueuer.run_once == :sleep }
@@ -125,7 +125,7 @@ namespace :fa do
Domain::Fa::UserEnqueuer.new(
start_at: start_at,
low_water_mark: low_water_mark,
high_water_mark: high_water_mark
high_water_mark: high_water_mark,
)
loop { sleep poll_duration if enqueuer.run_once == :sleep }
@@ -158,7 +158,7 @@ namespace :fa do
total: total,
format: "%t: %c/%C %B %p%% %a %e",
output: $stderr,
throttle_rate: 0.2
throttle_rate: 0.2,
)
# make stdout unbuffered
@@ -175,24 +175,11 @@ namespace :fa do
$stdout.flush
end
desc "Import existing FA posts"
task :import_existing, [:start_at] => [:environment] do |t, args|
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i
forks = args[:forks]&.to_i || ENV["forks"]&.to_i
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i
LegacyImport::FaPostImporter.new(
batch_size: batch_size,
forks: forks,
start_at: start_at
).run
end
desc "run a single browse page job"
task browse_page_job: %i[set_logger_stdout environment] do
Domain::Fa::Job::BrowsePageJob.set(
priority: -20,
queue: "manual"
queue: "manual",
).perform_later({})
puts "#{Time.now} - browse_page_job - Domain::Fa::Job::BrowsePageJob"
end
@@ -201,7 +188,7 @@ namespace :fa do
task home_page_job: %i[set_logger_stdout environment] do
Domain::Fa::Job::HomePageJob.set(
priority: -20,
queue: "manual"
queue: "manual",
).perform_later({})
puts "#{Time.now} - home_page_job - Domain::Fa::Job::HomePageJob"
end
@@ -211,7 +198,7 @@ namespace :fa do
fa_id = ENV["fa_id"] || raise("must provide fa_id")
Domain::Fa::Job::ScanPostJob.set(
priority: -10,
queue: "manual"
queue: "manual",
).perform_later({ fa_id: fa_id, force_scan: true })
end
@@ -241,7 +228,7 @@ namespace :fa do
for fa_id in (fa_id_start..fa_id_end)
Domain::Fa::Job::ScanPostJob.set(
priority: -10,
queue: "manual"
queue: "manual",
).perform_later({ fa_id: fa_id })
end
end
@@ -251,7 +238,7 @@ namespace :fa do
url_name = ENV["url_name"] || raise("must provide url_name")
Domain::Fa::Job::UserPageJob.set(
priority: -10,
queue: "manual"
queue: "manual",
).perform_later({ url_name: url_name, force_scan: true })
end
@@ -260,7 +247,7 @@ namespace :fa do
url_name = ENV["url_name"] || raise("must provide url_name")
Domain::Fa::Job::UserGalleryJob.set(
priority: -10,
queue: "manual"
queue: "manual",
).perform_later({ url_name: url_name, force_scan: true })
end
@@ -283,13 +270,13 @@ namespace :fa do
post.save!
Domain::Fa::Job::UserPageJob.set(priority: -10).perform_later(
{ user: real_user }
{ user: real_user },
)
Domain::Fa::Job::UserGalleryJob.set(priority: -10).perform_later(
{ user: real_user }
{ user: real_user },
)
Domain::Fa::Job::ScanPostJob.set(priority: -10).perform_later(
{ post: post }
{ post: post },
)
end
end
@@ -302,7 +289,7 @@ namespace :fa do
tables =
ENV["tables"] ||
raise(
"'tables' required (all, #{Domain::Fa::SqliteExporter::TABLES.keys.join(", ")})"
"'tables' required (all, #{Domain::Fa::SqliteExporter::TABLES.keys.join(", ")})",
)
tables = tables.split(",").map(&:to_sym)
@@ -312,23 +299,4 @@ namespace :fa do
exporter.run
exporter.end_profiling! if profile
end
task fix_fa_user_avatars: %i[environment set_logger_stdout] do
url_name = ENV["url_name"]
if url_name
start_at = 0
limit = 1
else
start_at =
ENV["start_at"]&.to_i || raise("need start_at (user avatar id)")
limit = ENV["limit"]&.to_i
end
job =
Domain::Fa::UserAvatarFixer.new(
start_at: start_at,
limit: limit,
url_name: url_name
)
job.run
end
end

View File

@@ -1,75 +0,0 @@
namespace :log_entry do
desc "Fix up entries that have '' contents but shouldn't"
task :fix_up_empty_response_contents => [:environment] do
query = ::HttpLogEntry.where(response_sha256: Digest::SHA256.digest("")).limit(100)
query.find_each do |model|
puts "uri: #{model.uri_str}"
end
end
desc "Find an HttpLogEntry that is missing its response body, ignoring those with a missing blob entry file"
task :find_missing_response_body_ignore_file => [:environment] do
Legacy::HttpLogEntry.where(
"resp_body is not null or blob_entry_id is not null"
).find_in_batches do |batch|
id_to_legacy = batch.map { |l| [l.id, l] }.to_h
legacy_ids = batch.map(&:id)
log_ids = HttpLogEntry.select(:id).where(id: legacy_ids).map(&:id).to_a
missing_ids = legacy_ids - log_ids
# ignore the ones which have a missing blob entry
missing_ids.filter! do |id|
model = id_to_legacy[id]
model.response_body
true
rescue Legacy::HttpLogEntry::NoBEPathException
false
rescue
true
end
if missing_ids.any?
puts "Found missing: #{missing_ids}"
break
end
end
end
desc "Bulk import Legacy::HLE -> ::HLE"
task :http_log_entry_bulk_importer, [:batch_size, :cache_size, :start_at, :finish_at] => [:environment] do |t, args|
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i || 8192
cache_size = args[:cache_size]&.to_i || ENV["cache_size"]&.to_i || 6
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i || 0
finish_at = args[:finish_at]&.to_i || ENV["finish_at"]&.to_i || nil
LegacyImport::HttpLogEntryBulkImporter.
new(batch_size, cache_size, start_at, finish_at).
run
end
task :legacy_http_log_entry_native_blob_entry => [:environment] do
batch_size = ENV["batch_size"]&.to_i || 4000
parallelism = ENV["parallelism"]&.to_i || 8
query = Legacy::HttpLogEntry.
where(diff_type: :native).
where(native_blob_entry_sha256: nil).
where("resp_body is not null")
start_time = Time.now
progress = 0
query.find_in_batches(batch_size: batch_size, start: 0, finish: 10) do |batch|
ForkFuture.parallel_each(parallelism, batch) do |http_log_entry|
http_log_entry.update_columns(
native_blob_entry_sha256: HexUtil.hex2bin(http_log_entry.resp_body),
)
end
progress += batch.size
rate = progress.to_f / (Time.now - start_time)
puts "finish batch, last id #{batch.last&.id} - #{progress} - #{rate.round(1)} / second"
end
end
end

View File

@@ -1,35 +0,0 @@
require "rails_helper"
describe LegacyImport::AdaptiveCache do
it "works" do
cache = LegacyImport::AdaptiveCache.new(4, 1.0, 0.0)
refute cache.at_capacity?
expect(cache.candidates).to eq([])
# works even when the candidate doesn't exist
cache.reward 1
expect(cache.candidates).to eq([])
cache.insert :a, "a"
cache.insert :b, "b"
cache.insert :c, "c"
cache.insert :d, "d"
assert cache.at_capacity?
expect(cache.candidates).to eq(%w[d c b a])
5.times { cache.reward :a }
3.times { cache.reward :b }
1.times { cache.reward :c }
expect(cache.candidates).to eq(%w[a b c d])
expect(cache.scores).to eq([5.0, 3.0, 1.0, 0.0])
3.times { cache.reward :c } # 1 => 4
expect(cache.candidates).to eq(%w[a c b d])
expect(cache.scores).to eq([5.0, 4.0, 3.0, 0.0])
# new 'e' should bump off 'd' which has a 0 score
cache.insert :e, "e"
expect(cache.candidates).to eq(%w[a c b e])
expect(cache.scores).to eq([5.0, 4.0, 3.0, 0.0])
end
end

View File

@@ -1,20 +0,0 @@
#!/usr/bin/env fish
set SESSION migration
set NUM_SHARDS $argv[1]
set START_AT $argv[2]
tmux new-session -d -s $SESSION
for i in (seq 2 $NUM_SHARDS)
tmux select-layout tiled
tmux split-window -t $SESSION:0.0 -h -d
end
tmux select-layout tiled
for i in (seq 0 $NUM_SHARDS)
tmux send-keys -t $SESSION:0.$i 'bin/rake migrate_legacy_http_entries[' $START_AT ', ' $NUM_SHARDS ', ' $i ']' C-m
end
tmux attach-session -t $SESSION

View File

@@ -1,32 +0,0 @@
---
id_1370198:
id: 1370198
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-02-27 12:00:51.488220000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-02-27 12:00:51.488220000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-02-27 12:00:51.488220000 Z
zone: *1
time: 2016-02-27 12:00:51.488220000 Z
file_size: 313065
refcount: 1
sha256: 5ed3a0400ac50f721123c7a8c638da8b19bf563f8e880f9abb36dcb38395bc82
dir_depth: 2
id_8663902:
id: 8663902
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-11-26 22:09:59.707602000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-11-26 22:09:59.707602000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-11-26 22:09:59.707602000 Z
zone: *1
time: 2016-11-26 22:09:59.707602000 Z
file_size: 6936
refcount: 2
sha256: 41f8daf7772d11f80afe56b742087a2d1ab372e08b69e1284be4fefec2ad0c7f
dir_depth: 4

View File

@@ -1,50 +0,0 @@
---
id_11551023:
id: 11551023
value: "<a href=\"/user/kenny-mccormick/\"><img class=\"avatar\" alt=\"kenny-mccormick\"
src=\"//a.facdn.net/1485168311/kenny-mccormick.gif\"></a>\n <br><br>\n
\ [copied from my DA page]<br>\r\n<br>\r\nLike last time,
mid week posts are just being dedicated to images that come from the south park
stories, starting with a particular fav of mine from chapter 2.<br>\r\n<br>\r\nIt
was very fun to design the outfit for Kenny, though it's very different from his
show attire. I think he looks really cute, tempted to make the design part of another
character at some point, it's just so cute &lt;3<br>\r\n<br>\r\n-<br>\r\n<br>\r\nImage
was made by Tato and colored by me.<br>\r\n<br>\r\nWant to support this content?<br>\r\n<a
href=\"https://www.patreon.com/KennyCrusader\" title=\"https://www.patreon.com/KennyCrusader\"
class=\"auto_link\">https://www.patreon.com/KennyCrusader</a>"
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:22.199757000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2018-02-01 07:50:22.199757000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:22.199757000 Z
zone: *1
time: 2018-02-01 07:50:22.199757000 Z
id_144:
id: 144
value: "\n<a href=\"/user/meesh/\"><img class=\"avatar\" alt=\"meesh\" src=\"//a.facdn.net/1424255659/meesh.gif\"></a>\n<br><br>\n<a
href=\"http://meesh.pandachan.org/massive-art-collections/\" title=\"http://meesh.pandachan.org/massive-art-collections/\"
class=\"auto_link auto_link_shortened\">http://meesh.pandachan.org/massive-.....t-collections/</a><br>\n<br>\nMy
latest Art Pack is here! And its a doozy. It includes:<br>\n<br>\n-95% explicit
adult content, naturally. Gay, straight and some in-between<br>\n<br>\n-143 unique
sketch commissions, with a handful that have special edits. 56 of which have never
been posted to my account<br>\n<br>\n-69 sketches that never saw the light of day
outside of my weekly streams, or not at all<br>\n<br>\n-27 unique Tier 1 and Tier
2 commissions. Including Dog Pound, Pills Chronicles and others, with preliminary
stages such as sketches, concepts, and inks<br>\n<br>\n-High resolution and resized
versions of every single sketch commission, Tier 1 and Tier 2. All personal sketches
are in hi-res or have a hi-res version included<br>\n<br>\n-590 image files in total<br>\n<br>\nIt
all can be yours for just $10! You can purchase it right here:<a href=\"http://meesh.pandachan.org/massive-art-collections/\"
title=\"http://meesh.pandachan.org/massive-art-collections/\" class=\"auto_link
auto_link_shortened\">http://meesh.pandachan.org/massive-.....t-collections/</a> "
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-12-22 17:06:09.549412000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-12-22 17:06:09.549412000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-12-22 17:06:09.549412000 Z
zone: *1
time: 2016-12-22 17:06:09.549412000 Z

View File

@@ -1,273 +0,0 @@
---
id_13950325:
id: 13950325
fa_id: 25793413
creator_name: Kenny-Mccormick
creator_id:
title: "[Commissioned] [Collab] Little Kenny - By Tato"
category: Artwork (Digital)
theme: Baby fur
species: Unspecified / Any
gender: Male
keywords:
- south
- park
- kenny
- mccormick
- tato
- age
- regression
- diaper
file_url: "//d.facdn.net/art/kenny-mccormick/1513691072/1513691072.kenny-mccormick_kenny_s_new_outfitf.png"
blob_entry_id:
old_files: []
num_favorites: 25
num_comments: 4
num_views: 263
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:22.177857000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2018-02-01 07:50:22.177857000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:22.201057000 Z
zone: *1
time: 2018-02-01 07:50:22.201057000 Z
in_gallery:
in_folders: []
state: scanned_submission
state_error:
folder_name:
gallery_page_logfile:
submission_page_logfile:
submission_page_log_entry_id: 19252126
description_id: 11551023
imgsearch_entry_id:
id_13950327:
id: 13950327
fa_id: 25793411
creator_name:
creator_id:
title:
category:
theme:
species:
gender:
keywords: []
file_url:
blob_entry_id:
old_files: []
num_favorites:
num_comments:
num_views:
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:23.214173000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2018-02-01 07:50:23.214173000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-02-01 07:50:23.214173000 Z
zone: *1
time: 2018-02-01 07:50:23.214173000 Z
in_gallery:
in_folders: []
state: scan_error
state_error: post removed
folder_name:
gallery_page_logfile:
submission_page_logfile:
submission_page_log_entry_id:
description_id:
imgsearch_entry_id:
id_4936259:
id: 4936259
fa_id: 7126769
creator_name: Rodrick-Dragon
creator_id: 27392
title: Profile ID
category: Artwork (Digital)
theme: Doodle
species: Dragon (Other)
gender: Male
keywords: []
file_url: "//d.facdn.net/art/rodrick-dragon/1325297838/1325297838.rodrick-dragon_bio.jpg"
blob_entry_id: 11923806
old_files: []
num_favorites: 5
num_comments: 3
num_views: 30
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-03-09 12:21:45.224977000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-03-09 12:21:45.224977000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-01-29 01:14:47.165636000 Z
zone: *1
time: 2018-01-29 01:14:47.165636000 Z
in_gallery:
in_folders: []
state: have_static
state_error:
folder_name: Scraps
gallery_page_logfile: listing_pages/rodrick-dragon/1457526104-Scraps_1.html
submission_page_logfile:
submission_page_log_entry_id: 6026946
description_id: 4858488
imgsearch_entry_id: 6310759
id_3234144:
id: 3234144
fa_id: 8489215
creator_name: LokiLover
creator_id: 14101
title: Scribbles with Kier
category: Artwork (Digital)
theme: Doodle
species: Unspecified / Any
gender: Any
keywords:
- doodles
- OC
- opencanvas
- goofy
- silly
- chubby
- things
- dinosaurs
- cows
- kitties
- spider
- gremlins
- chibi
- super
- sexy
- cheerleader
file_url: "/full/8489215/"
blob_entry_id: 6877172
old_files: []
num_favorites: 5
num_comments: 0
num_views: 152
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-03-04 20:41:26.393592000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-03-04 20:41:26.393592000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-12-23 03:56:27.425752000 Z
zone: *1
time: 2016-12-23 03:56:27.425752000 Z
in_gallery:
in_folders: []
state: have_static
state_error:
folder_name: Gallery
gallery_page_logfile: listing_pages/lokilover/1476614903-Gallery_2.html
submission_page_logfile: submissions/lokilover/1463959274-8489215.html
submission_page_log_entry_id:
description_id: 3173583
imgsearch_entry_id:
id_10117853:
id: 10117853
fa_id: 21826851
creator_name: Drake_Ergenthal
creator_id: 106983
title: |-
Font size adjustment: smallerlarger
Locust vs. Nature
category: Story
theme: All
species:
gender:
keywords:
- Locust
- Grenadier
- Hunter
- Elite
- Savage
- Kantus
- Miner
- quicksand
- Gears
- of
- War
file_url: "//d.facdn.net/art/drakeergenthal/stories/1480188610/1480188610.drakeergenthal_gow_fanfic.txt"
blob_entry_id: 8663902
old_files: []
num_favorites: 1
num_comments: 0
num_views: 11
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-11-26 19:44:43.419488000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-11-26 19:44:43.419488000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-12-23 20:32:59.798452000 Z
zone: *1
time: 2016-12-23 20:32:59.798452000 Z
in_gallery:
in_folders: []
state: have_static
state_error: guessing exists
folder_name:
gallery_page_logfile:
submission_page_logfile:
submission_page_log_entry_id: 1449
description_id: 8039358
imgsearch_entry_id:
fa_id_19177819:
id: 1
fa_id: 19177819
creator_name: Meesh
creator_id: 1
title: MASSIVE ART PACK 6 - Available now!
category: Artwork (Digital)
theme: All
species: Unspecified / Any
gender: Any
keywords:
- meesh
- nsfw
- art
- pack
- adult
- boner
- touching
file_url: "//d.facdn.net/art/meesh/1456189705/1456189705.meesh_production5.png"
blob_entry_id: 1370198
old_files: []
num_favorites: 8
num_comments: 2
num_views: 904
posted_at:
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-02-28 00:26:18.966788000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-02-28 00:26:18.966788000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-01-27 23:26:36.605202000 Z
zone: *1
time: 2018-01-27 23:26:36.605202000 Z
in_gallery: true
in_folders: []
state: have_static
state_error:
folder_name: Main Gallery
gallery_page_logfile: listing_pages/meesh/1480115720-MainGallery_1.html
submission_page_logfile:
submission_page_log_entry_id:
description_id: 144
imgsearch_entry_id: 154

View File

@@ -1,80 +0,0 @@
---
id_1:
id: 1
name: Meesh
full_name: Meesh
artist_type: PrOn Artist
mood: optimistic
profile_html: "\n<b>Full
Name:</b> Meesh<br>\n<b>Artist
Type:</b> PrOn Artist<br>\n<b>Registered
since:</b> Dec 11th, 2005 11:28<br>\n<b>Current
mood:</b> optimistic<br>\n<b>Artist
Profile:</b><br>\n<div
class=\"bbcode bbcode_center\">Male | 28 | Housecat | Single | Straight</div>\n<br>\r\n<br>\r\n<a
href=\"/user/patreon\" class=\"iconusername\"><img src=\"//a.facdn.net/20170212/patreon.gif\"
align=\"middle\" title=\"patreon\" alt=\"patreon\"></a> I have a Patreon - Early
access for comics starts at $6/month! <a href=\"http://www.patreon.com/meesh\" title=\"http://www.patreon.com/meesh\"
class=\"auto_link\">www.patreon.com/meesh</a><br>\r\n<br>\r\n<span class=\"bbcode\"
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">BUSINESS INQUIRIES:
MEESHYMEESH@GMAIL.COM</strong></span><br>\r\n<a href=\"/user/oregonfurs\" class=\"iconusername\"><img
src=\"//a.facdn.net/20170212/oregonfurs.gif\" align=\"middle\" title=\"oregonfurs\"
alt=\"oregonfurs\">oregonfurs</a><br>\r\n<strong class=\"bbcode bbcode_b\">MY
STORE:</strong> <a href=\"http://www.meesh.pandachan.org\" title=\"http://www.meesh.pandachan.org\"
class=\"auto_link\">www.meesh.pandachan.org</a><br>\r\n<br>\r\n<span class=\"bbcode\"
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">TUMBLR:</strong></span><br>\r\n<a
href=\"http://www.meeshmeat.tumblr.com\" title=\"http://www.meeshmeat.tumblr.com\"
class=\"auto_link\">www.meeshmeat.tumblr.com</a><br>\r\n<br>\r\n<span class=\"bbcode\"
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">COMMISSIONS:</strong></span><strong
class=\"bbcode bbcode_b\">CLOSED.</strong> Email me at <a class=\"auto_link email\"
href=\"mailto:meeshymeesh@gmail.com\">meeshymeesh[at]gmail.com</a> to inquire about
commissions. Current turnaround time is approx one month.<br>\r\n<strong class=\"bbcode
bbcode_b\">Tier 1:</strong> <a href=\"https://www.furaffinity.net/view/17970865/\"
title=\"https://www.furaffinity.net/view/17970865/\" class=\"auto_link\">https://www.furaffinity.net/view/17970865/</a>
$600/character<br>\r\n<strong class=\"bbcode bbcode_b\">Tier 2:</strong> <a href=\"https://www.furaffinity.net/view/18356127/\"
title=\"https://www.furaffinity.net/view/18356127/\" class=\"auto_link\">https://www.furaffinity.net/view/18356127/</a>
$200/character<br>\r\n<strong class=\"bbcode bbcode_b\">Sketch:</strong> <a href=\"https://www.furaffinity.net/view/18329853/\"
title=\"https://www.furaffinity.net/view/18329853/\" class=\"auto_link\">https://www.furaffinity.net/view/18329853/</a>
$80/character<br>\r\n<br>\r\nMy gallery is a good example of what I will and will
not draw. <br>\r\n<br>\r\n<span class=\"bbcode\" style=\"color: #d92464;\"><strong
class=\"bbcode bbcode_b\">Terms of Service for commissions:</strong></span> I will
always have control over what I will and will not draw. I will not work towards
any illustration that is against the law, or is intended to be slanderous or hateful
towards any other individual or business. I have the right to change an estimated
date of completion depending on the situation. I have the right to refuse service
to anyone. Money must be paid in advance before work can begin on a project. You
have the right to ask for changes both in the finished product, and mid-process.
By hiring me, you understand and agree to these terms.<br>\r\n<br>\r\n<span class=\"bbcode\"
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">Refund policy:</strong></span>
Refunds will be granted in full only if work on a project has not yet begun. If
necessary, proof can be provided of the work done."
num_pageviews: 1715730
num_submissions: 1225
num_comments_recieved: 40123
num_comments_given: 17386
num_journals: 13
num_favorites: 893478
registered_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2005-12-11 11:28:00.000000000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2005-12-11 11:28:00.000000000 Z
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-02-28 00:26:18.692514000 Z
zone: *1
time: 2016-02-28 00:26:18.692514000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-10-01 11:16:55.673308000 Z
zone: *1
time: 2018-10-01 11:16:55.673308000 Z
url_name: meesh
scanned_gallery: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-10-01 11:16:55.671846000 Z
zone: *1
time: 2018-10-01 11:16:55.671846000 Z
scanned_page: !ruby/object:ActiveSupport::TimeWithZone
utc: 2017-02-12 23:13:56.517709000 Z
zone: *1
time: 2017-02-12 23:13:56.517709000 Z
user_page_logfile: user/meesh/1480115630-meesh.html
user_page_log_entry_id: 12175910

View File

@@ -1,34 +0,0 @@
1766_id:
id: 1766
scheme: https
host: d.facdn.net
path: "/art/drakeergenthal/stories/1480188610/1480188610.drakeergenthal_gow_fanfic.txt"
query:
verb: get
status: 200
response_time: 192
content_type: text/plain
response_size: 6936
parent_log_entry_id:
blob_entry_id: 8663902
gzipped: false
requested_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-11-26 22:09:59.699304000 Z
zone: &1 !ruby/object:ActiveSupport::TimeZone
name: Etc/UTC
time: 2016-11-26 22:09:59.699304000 Z
created_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2016-11-26 22:09:59.753081000 Z
zone: *1
time: 2016-11-26 22:09:59.753081000 Z
updated_at: !ruby/object:ActiveSupport::TimeWithZone
utc: 2018-04-16 03:35:12.524953000 Z
zone: *1
time: 2018-04-16 03:35:12.524953000 Z
resp_body: 41F8DAF7772D11F80AFE56B742087A2D1AB372E08B69E1284BE4FEFEC2AD0C7F
imported_from_file:
req_headers_id: 3
res_headers_id: 575
diff_type: native
native_blob_entry_sha256: !binary |-
Qfja93ctEfgK/la3Qgh6LRqzcuCLaeEoS+T+/sKtDH8=

View File

@@ -1,19 +0,0 @@
Locust vs. Nature
Not much are known for what has happened to the rest of the Locust, during the war. Some even faced a worse fate than dying from any weapon or from the Imulsion Countermeasure weapon. The Lambent are the actual threat of all, if only the Locust would see it that way and join forces with the humans (aka ground walkers). Let's just say what would happen to four unlucky Locust characters, while around the timeline of Gears of War 3.
We have a Hunter Elite, on a mission to search for other members of the Locust that were apparently lost from the Jacinto incident. Let's call him Chuck for this one. The Locust don't have much for names around the low ranks. The only names that were mentioned are RAAM, Skorge, and Karn. As for Chuck, he has to venture above ground for his search. But he wasn't alone, as he brings around a miner for connection to the lost ones. We're gonna call the miner as Jules. Chuck and Jules are wearing their proper Locust clothes to notify their ranks. As much as they hate the humans living above ground, the two have to focus on searching for the lost Locust. They stayed hidden from the inhabitants from certain cities.
Chuck and Jules did come across some Lambent along the way, which are a nuisance to everyone. Chuck has his Hammerburst with him to take some out, but Jules doesn't have any weapon on him. But he did grab a Lancer from a dead COG soldier, and uses it against the Lambent. Chuck was even surprised about Jules' capabilities. As much as the Lambent are their enemy from the threat, they decide to hide from the humans and COG soldiers who arrived to provide reinforcements. Jules keeps the Lancer with him, as it was his only thing that protected him.
They soon arrive at a place called the Deadlands, and there are no signs of Lambent anywhere. Let alone any Locust survivors to find. Jules starts to have a feeling that they being watched. Chuck notices a trap door around the middle of the arena. For good measures, he shoots the door open and it only shows some ground underneath. Like a pitfall. While they avoided a sudden trap, they soon get jumped by their own Locust militia. Only their outfits are much different compared to their own, when the regular Locust are well organized too. These Locust are acting a bit feral of their way, and doesn't follow the ways of the Locust Queen. They are savages compared to Chuck and Jules, and the two are also surrounded. One of the savages is a grenadier, who soon falls into the trap while rushing towards Chuck and Jules. We'll call him Zeke for that. Despite putting up a good fight, the two were subdued by the Savage Locust.
Chuck and Jules were disarmed and the savages prevents them from moving, as their leader shows up, who happens to be a Savage Kantus. We'll call him Travis for no apparent reason. Anyway, Travis was not amused about the outsider Locust putting up resistance against his own community. As punishment, the two are thrown down where the trap door was to suffer endlessly. The savage drone threw the two in, Travis decides to join them to make sure the outsiders will suffer to no end. The savages waved goodbye to Travis, as he's gonna be joining them for eternity. As for Chuck and Jules, they've reached the bottom of the fall, just to see Zeke with them. They plan on getting out of this, only there's another reason why these characters are doomed here. The pitfall is slowly sucking them under. Zeke is already up to his knees in the sand, so he can't pull himself out. Jules then notices that he can't lift up his feet, as his ankles are already under. Chuck tries to climb out, as he quick to act from the situation. While climbing, Travis falls on top of Chuck and they plummet down to Jules and Zeke. Chuck opens his eyes, only to find himself up to his thighs within the sand. Travis explains to them as this is quicksand, and it's impossible to escape. He's willing to enjoy every minute of it, while the others struggle to get free. Travis decides to take off some of his armor while he's sinking with the others, to reveal his red undergarment to them.
Jules then yells for help, as the quicksand starts sucking up his thighs. It doesn't help the dismay for Chuck or Zeke, as the living ground soon touches their groins. Travis soon starts touching his crotch while laying on his knees. The quicksand soon reaches Jules' crotch, and now three of them can barely move their legs. It will soon include Travis as he's ready to unload his bladder. The sinking Locust were taken by surprise, as the kantus would do something so very, very different!!! Chuck, Jules and Zeke then realizes that they have to pee! The two tried to resist the temptation, but Zeke points out that it's pointless to fight. The savage grenadier soon let's it go, as his crotch is already suck under by the sands. The piss was hard to ignore, and soon the buildup within Chuck's and Jules' became too great. They unload their fill, as the quicksand begins to suck up their stomaches. Travis was happy to watch his crotch sinking within the hungry sands.
Chuck, and Jules were so deep in, the hunter even tried to pull himself out. It didn't work, and now he can't even pull up his arms from the sands. Jules uses up his remaining arm that was still up the surface, to activate his full helmet. It didn't help, now that three of them are up to their shoulders and continues to sink. Travis was thrilled to see their despair, even if the quicksand is reaching up to his chest. Travis explains to them, that it's not the end for them. Chuck believes that it is, as the sands begins to start sucking up his head down. The kantus soon says they will not be forgotten, only going to sleep for a long time. Zeke was soon sucked under, and soon claims Chuck too. Jules did give out a question, saying if they'll be alright. Travis says yes, before the miner was sucked under.
As for Travis now up to his armpits within his quicksand, he begins playing around until it was time to enchant their long sleep within the sands. Before he was submerged along with the others, Travis conjures his spell. They disappeared from the earth, but somehow they will survive everything.
For over twenty-five years, Travis will revive himself as well as the others since the quicksand has dried up. Chuck, Jules and Zeke wake up to find themselves only up to their armpits in just dirt. They dug themselves out, and notices Travis up to his neck within the solid sands. Since he preserved them for all this time, the remaining Locust dug him up and climbed out. Upon getting out, they soon noticed that everything has changed! Travis soon wakes up, and was willing to help find a way to survive their new world. They are also unaware that the essence of the Queen still exist somewhere. Perhaps they are lucky than they thought.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 306 KiB

View File

@@ -1,9 +0,0 @@
class Domain::Fa::PostTest < ActiveSupport::TestCase
# test "remove buggy prefixes" do
# TODO - implement this
# Some posts have a title prefixed with "Font size adjustment: smallerlarger"
# which should be removed
# Legacy::Fa::Post.where("title like ?", "Font size adjustment: smallerlarger%").count
# => 7056
# end
end

View File

@@ -1 +0,0 @@