Remove legacy related code
This commit is contained in:
@@ -1,73 +0,0 @@
|
||||
class Domain::Fa::UserAvatarFixer < LegacyImport::BulkImportJob
|
||||
def initialize(start_at:, limit: nil, url_name: nil)
|
||||
@start_at = start_at
|
||||
@limit = limit
|
||||
@url_name = url_name
|
||||
end
|
||||
|
||||
def name
|
||||
"user_avatar_fixer"
|
||||
end
|
||||
|
||||
def run_impl
|
||||
@processed = 0
|
||||
|
||||
if @url_name
|
||||
user =
|
||||
Domain::Fa::User.find_by(url_name: @url_name) || raise("user not found")
|
||||
process_avatar(user.avatar)
|
||||
else
|
||||
Domain::Fa::UserAvatar
|
||||
.where(state: "no_file_on_guessed_user_page_error")
|
||||
.find_each(start: @start_at, batch_size: 5) do |avatar|
|
||||
@processed += 1
|
||||
break if @limit && @processed > @limit
|
||||
process_avatar(avatar)
|
||||
end
|
||||
end
|
||||
|
||||
@processed
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def process_avatar(avatar)
|
||||
user = avatar.user
|
||||
logger.prefix =
|
||||
proc do
|
||||
"[avatar #{avatar.id.to_s.bold}, user #{user.url_name.to_s.bold}]"
|
||||
end
|
||||
|
||||
logger.info("guessing...")
|
||||
|
||||
if avatar.file
|
||||
logger.warn("have file, skipping")
|
||||
return
|
||||
end
|
||||
|
||||
if !avatar.file_url_str.blank?
|
||||
if avatar.file
|
||||
logger.warn("have file_url_str, skipping")
|
||||
else
|
||||
avatar.state = :ok
|
||||
avatar.save!
|
||||
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
|
||||
logger.info("existing file_url_str: #{avatar.file_url_str}")
|
||||
logger.warn("have url, but no file, enqueue job")
|
||||
end
|
||||
return
|
||||
end
|
||||
|
||||
guessed_avatar_uri = avatar.guess_file_uri_from_hles_with_info
|
||||
if guessed_avatar_uri.first == :not_found
|
||||
logger.error("did not find avatar url: #{guessed_avatar_uri.to_s.bold}")
|
||||
else
|
||||
logger.info("found uri, enqueue job: #{guessed_avatar_uri.to_s.bold}")
|
||||
avatar.state = :ok
|
||||
avatar.file_uri = guessed_avatar_uri[1]
|
||||
avatar.state_detail["user_avatar_fixer_job"] = guessed_avatar_uri
|
||||
avatar.save!
|
||||
Domain::Fa::Job::UserAvatarJob.perform_later({ user: user })
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,64 +0,0 @@
|
||||
class LegacyImport::AdaptiveCache
|
||||
Entry = Struct.new(:score, :id, :obj, :extra)
|
||||
|
||||
def initialize(max_size = 32, reward = 1.0, punish = 0.1)
|
||||
@max_size = max_size
|
||||
@candidates = []
|
||||
@reward = reward
|
||||
@punish = punish
|
||||
end
|
||||
|
||||
def at_capacity?
|
||||
@candidates.count == @max_size
|
||||
end
|
||||
|
||||
def candidates
|
||||
@candidates.map { |c| c.obj }
|
||||
end
|
||||
|
||||
def scores
|
||||
@candidates.map { |c| c.score }
|
||||
end
|
||||
|
||||
def reward(candidate_id)
|
||||
@candidates.each do |entry|
|
||||
if entry.id == candidate_id
|
||||
entry.score += @reward
|
||||
else
|
||||
entry.score -= @punish
|
||||
end
|
||||
end
|
||||
sort!
|
||||
end
|
||||
|
||||
def contains?(candidate_id)
|
||||
!!@candidates.find { |entry| entry.id == candidate_id }
|
||||
end
|
||||
|
||||
def insert(id, candidate, extra = nil)
|
||||
new_entry = Entry.new(0.0, id, candidate, extra)
|
||||
idx = @candidates.bsearch_index { |entry| entry.score <= 0 }
|
||||
|
||||
if idx == nil
|
||||
@candidates.push(new_entry)
|
||||
else
|
||||
@candidates.insert(idx, new_entry)
|
||||
end
|
||||
|
||||
@candidates.pop while @candidates.size > @max_size
|
||||
end
|
||||
|
||||
def to_s
|
||||
@candidates
|
||||
.map do |entry|
|
||||
" - #{entry.score.round(1)} score, id #{entry.id} - #{entry.extra}"
|
||||
end
|
||||
.join("\n")
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def sort!
|
||||
@candidates.sort_by! { |entry| -entry.score }
|
||||
end
|
||||
end
|
||||
@@ -1,51 +0,0 @@
|
||||
class LegacyImport::BulkImportJob
|
||||
attr_reader :logger_prefix
|
||||
include HasColorLogger
|
||||
|
||||
def name
|
||||
raise NotImplementedError.new("implement #name")
|
||||
end
|
||||
|
||||
def run_impl
|
||||
raise NotImplementedError.new("implement #run_impl")
|
||||
end
|
||||
|
||||
def profile?
|
||||
false
|
||||
end
|
||||
|
||||
def run
|
||||
start_profiling!
|
||||
start_at = Time.now
|
||||
total_work = run_impl
|
||||
duration = (Time.now - start_at)
|
||||
logger.info "finish, total #{duration.round(1)}s, #{total_work} items, #{(total_work / duration).round(1)} items/s"
|
||||
end_profiling!
|
||||
end
|
||||
|
||||
def write_last_id(last_id)
|
||||
logger.prefix = proc { "[last_id #{last_id.to_s.bold}]" }
|
||||
File.write("tmp/#{name}_progress", last_id.to_s)
|
||||
end
|
||||
|
||||
def start_profiling!
|
||||
RubyProf.start if profile?
|
||||
end
|
||||
|
||||
def end_profiling!
|
||||
if profile?
|
||||
base = "profiler/#{name}"
|
||||
Dir.mkdir_p(base) unless File.exist?(base)
|
||||
result = RubyProf.stop
|
||||
File.open("#{base}/profile.txt", "w") do |f|
|
||||
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
File.open("#{base}/profile.html", "w") do |f|
|
||||
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
File.open("#{base}/profile.rubyprof", "w") do |f|
|
||||
RubyProf::SpeedscopePrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,90 +0,0 @@
|
||||
class LegacyImport::E621LegacyPostImporter < LegacyImport::BulkImportJob
|
||||
def initialize(batch_size:, forks:, start_at:)
|
||||
@forks = forks || 16
|
||||
@batch_size = batch_size || @forks * 32
|
||||
@start_at = start_at || 0
|
||||
@start_time = Time.now
|
||||
|
||||
logger.info "forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
|
||||
end
|
||||
|
||||
def name
|
||||
"e621_legacy_post_importer"
|
||||
end
|
||||
|
||||
def profile?
|
||||
false
|
||||
end
|
||||
|
||||
def run_impl
|
||||
progress = 0
|
||||
query = ::Legacy::E621::Post.includes(:blob_entry, { taggings: :tag })
|
||||
# finish = @start_at + (2 * 32 * 32)
|
||||
finish = nil
|
||||
query.find_in_batches(
|
||||
start: @start_at,
|
||||
finish: finish,
|
||||
batch_size: @batch_size * @forks
|
||||
) do |batch|
|
||||
last_id = batch.last&.id
|
||||
|
||||
if @forks <= 1
|
||||
progress += import_e621_posts(batch)
|
||||
else
|
||||
progress +=
|
||||
ForkFuture
|
||||
.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_e621_posts(fork_batch)
|
||||
end
|
||||
.sum
|
||||
end
|
||||
|
||||
rate = progress.to_f / (Time.now - @start_time)
|
||||
logger.info "finish batch, last id #{last_id} - #{progress} - #{rate.round(1)} / second"
|
||||
write_last_id last_id
|
||||
end
|
||||
progress
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def import_e621_posts(legacy_posts)
|
||||
progress = 0
|
||||
|
||||
skip_posts_ids =
|
||||
Set.new(
|
||||
::Domain::E621::Post
|
||||
.select(:e621_id)
|
||||
.where(e621_id: legacy_posts.map(&:e621_id))
|
||||
.pluck(:e621_id)
|
||||
)
|
||||
|
||||
legacy_posts.reject! do |legacy_post|
|
||||
skip_posts_ids.include?(legacy_post.e621_id)
|
||||
end
|
||||
|
||||
legacy_posts.each do |legacy_post|
|
||||
retries = 0
|
||||
begin
|
||||
ReduxApplicationRecord.transaction do
|
||||
post = ::Domain::E621::Post.find_or_build_from_legacy(legacy_post)
|
||||
unless post.valid?
|
||||
raise(
|
||||
"error building post #{post.id} / #{post.e621_id}: #{post.errors.full_messages}"
|
||||
)
|
||||
end
|
||||
post.save!
|
||||
progress += 1
|
||||
end
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
sleep 0.1 and retry if retries < 3
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
ReduxApplicationRecord.clear_active_connections!
|
||||
LegacyApplicationRecord.clear_active_connections!
|
||||
progress
|
||||
end
|
||||
end
|
||||
@@ -1,85 +0,0 @@
|
||||
class LegacyImport::FaPostImporter < LegacyImport::BulkImportJob
|
||||
def initialize(batch_size:, forks:, start_at:)
|
||||
@forks = forks || 16
|
||||
@batch_size = batch_size || @forks * 32
|
||||
@start_at = start_at || 0
|
||||
@start_time = Time.now
|
||||
|
||||
puts "FaPostImporter forks=#{@forks} batch_size=#{@batch_size} start_at=#{@start_at}"
|
||||
end
|
||||
|
||||
def name
|
||||
"fa_post_importer"
|
||||
end
|
||||
|
||||
def profile?
|
||||
false
|
||||
end
|
||||
|
||||
def run
|
||||
start_profiling!
|
||||
|
||||
progress = 0
|
||||
query = ::Legacy::Fa::Post.includes(:blob_entry, :description_ref)
|
||||
query.find_in_batches(start: @start_at, batch_size: @batch_size) do |batch|
|
||||
last_id = batch.last&.id
|
||||
|
||||
if @forks <= 1
|
||||
progress += import_fa_posts(batch)
|
||||
else
|
||||
progress +=
|
||||
ForkFuture
|
||||
.parallel_map_slice(@forks, batch) do |fork_batch|
|
||||
import_fa_posts(fork_batch)
|
||||
end
|
||||
.sum
|
||||
end
|
||||
|
||||
rate = progress.to_f / (Time.now - @start_time)
|
||||
puts "finish batch, last id #{last_id} - #{progress} - #{rate.round(1)} / second"
|
||||
write_progress last_id
|
||||
end
|
||||
|
||||
end_profiling!
|
||||
end
|
||||
|
||||
def import_fa_posts(legacy_posts)
|
||||
progress = 0
|
||||
skip_posts_ids =
|
||||
Set.new(
|
||||
::Domain::Fa::Post
|
||||
.select(:fa_id, :creator_id)
|
||||
.where(fa_id: legacy_posts.map(&:fa_id))
|
||||
.where("creator_id is not null")
|
||||
.pluck(:fa_id)
|
||||
)
|
||||
|
||||
legacy_posts.reject! do |legacy_post|
|
||||
skip_posts_ids.include?(legacy_post.fa_id)
|
||||
end
|
||||
|
||||
legacy_posts.each do |legacy_post|
|
||||
retries = 0
|
||||
begin
|
||||
ReduxApplicationRecord.transaction do
|
||||
post = ::Domain::Fa::Post.find_or_build_from_legacy(legacy_post)
|
||||
unless post.valid?
|
||||
raise(
|
||||
" !! error building post #{post.id} / #{post.fa_id}: #{post.errors.full_messages}"
|
||||
)
|
||||
end
|
||||
post.save!
|
||||
progress += 1
|
||||
end
|
||||
rescue StandardError
|
||||
retries += 1
|
||||
sleep 0.1 and retry if retries < 3
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
ReduxApplicationRecord.clear_active_connections!
|
||||
LegacyApplicationRecord.clear_active_connections!
|
||||
progress
|
||||
end
|
||||
end
|
||||
@@ -1,508 +0,0 @@
|
||||
require "set"
|
||||
|
||||
class LegacyImport::HttpLogEntryBulkImporter
|
||||
PROFILE = false
|
||||
|
||||
def initialize(batch_size, cache_size, start_id, end_id)
|
||||
@batch_size = batch_size
|
||||
@cache_size = cache_size
|
||||
@start_id = start_id
|
||||
@end_id = end_id
|
||||
@fork_amount = 10
|
||||
@insert_stats = InsertStats.new
|
||||
@timings = Timings.new
|
||||
|
||||
# key is content_type|domain
|
||||
# value is the adaptive cache
|
||||
@blob_entry_cache =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(cache_size, 1.0, 0.1)
|
||||
end
|
||||
end
|
||||
|
||||
def run
|
||||
RubyProf.start if PROFILE
|
||||
|
||||
puts "HttpLogEntryBulkImporter: " +
|
||||
"#{@start_id || "(nil)"} -> #{@end_id || "(nil)"}, batch size #{@batch_size}, cache size #{@cache_size}, forking #{@fork_amount}"
|
||||
|
||||
start_at = Time.now
|
||||
|
||||
last_model_id = nil
|
||||
stats_printer =
|
||||
Thread.new do
|
||||
Thread.current.name = "stats-printer"
|
||||
i = 0
|
||||
loop do
|
||||
sleep 3
|
||||
duration = Time.now - start_at
|
||||
rate = @insert_stats.http_entries_inserted / duration
|
||||
hr
|
||||
puts "insert stats: #{@insert_stats} - " +
|
||||
"#{rate.round(2)}/sec (last id: #{last_model_id})"
|
||||
i += 1
|
||||
if i % 5 == 0
|
||||
hr
|
||||
dump_timings
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
query = Legacy::HttpLogEntry.order(id: :asc)
|
||||
|
||||
@timings.start :bulk_load
|
||||
query.find_in_batches(
|
||||
batch_size: @batch_size,
|
||||
start: @start_id,
|
||||
finish: @end_id
|
||||
) do |legacy_models|
|
||||
@timings.finish :bulk_load
|
||||
import_legacy_models(legacy_models)
|
||||
last_model_id = legacy_models.last&.id
|
||||
@timings.start :bulk_load
|
||||
end
|
||||
@timings.finish :bulk_load
|
||||
|
||||
stats_printer.kill if stats_printer
|
||||
|
||||
duration = Time.now - start_at
|
||||
bytes_stored = @insert_stats.bytes_stored
|
||||
bytes_length = @insert_stats.bytes_length
|
||||
ratio = bytes_stored.to_f / bytes_length
|
||||
rate = @insert_stats.http_entries_inserted / duration
|
||||
|
||||
hr
|
||||
dump_timings
|
||||
hr
|
||||
puts "Last id: #{last_model_id}"
|
||||
puts "Cache size: #{@cache_size}"
|
||||
puts "Batch size: #{@batch_size}"
|
||||
puts "Total content stored: #{InsertStats.humansize(bytes_stored)}"
|
||||
puts "Total content length: #{InsertStats.humansize(bytes_length)}"
|
||||
puts "Size ratio: #{ratio.round(2)}"
|
||||
puts "Total http inserted: #{@insert_stats.http_entries_inserted}"
|
||||
puts "Total blobs inserted: #{@insert_stats.blob_entries_inserted}"
|
||||
puts "Total duration: #{duration.round(0)} seconds (#{rate.round(2)}/second)"
|
||||
hr
|
||||
|
||||
if PROFILE
|
||||
Dir.mkdir("profiler") unless File.exist?("profiler")
|
||||
result = RubyProf.stop
|
||||
File.open("profiler/migrate_legacy_http_entries.txt", "w") do |f|
|
||||
RubyProf::GraphPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
File.open("profiler/migrate_legacy_http_entries.html", "w") do |f|
|
||||
RubyProf::CallStackPrinter.new(result).print(f, { min_percent: 1 })
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def dump_timings
|
||||
tp(
|
||||
@timings.entries.map do |entry|
|
||||
{
|
||||
key: entry[:key],
|
||||
duration: "#{entry[:key_secs].round(1)} sec",
|
||||
percent: "#{(100 * entry[:proportion]).round(1)}%".rjust(5)
|
||||
}
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
def import_legacy_models(legacy_models)
|
||||
@timings.start :lookup_existing_http
|
||||
already_exist_ids =
|
||||
::HttpLogEntry.where(id: legacy_models.map(&:id)).pluck(:id).to_set
|
||||
@timings.finish :lookup_existing_http
|
||||
|
||||
# ignore the models which have no stored content (for now)
|
||||
@timings.start :reject_empty_legacy
|
||||
legacy_models =
|
||||
ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
next nil if already_exist_ids.include?(legacy_model.id)
|
||||
begin
|
||||
next nil if legacy_model.response_body.nil?
|
||||
rescue StandardError
|
||||
puts "legacy model #{legacy_model.id} (#{legacy_model.full_path}): error reading response body"
|
||||
next nil
|
||||
end
|
||||
|
||||
# legacy model now has response body loaded
|
||||
legacy_model
|
||||
end
|
||||
legacy_models.reject!(&:nil?)
|
||||
@timings.finish :reject_empty_legacy
|
||||
|
||||
blob_creation_future =
|
||||
ForkFuture.new { bulk_import_blob_entries(legacy_models) }
|
||||
header_creation_future =
|
||||
ForkFuture.new { bulk_import_headers(legacy_models) }
|
||||
|
||||
insert_stats, timings, cache_ops, legacy_model_id_to_response_sha256 =
|
||||
blob_creation_future.join
|
||||
@insert_stats.merge!(insert_stats)
|
||||
@timings.merge!(timings)
|
||||
cache_ops.each { |op| @blob_entry_cache[op[0]].send(op[1], *op[2..]) }
|
||||
|
||||
insert_stats,
|
||||
timings,
|
||||
legacy_model_id_to_header_sha256s,
|
||||
header_sha256_to_header_id =
|
||||
header_creation_future.join
|
||||
@insert_stats.merge!(insert_stats)
|
||||
@timings.merge!(timings)
|
||||
|
||||
@timings.start :build_new_https
|
||||
http_models =
|
||||
legacy_models.map do |legacy_model|
|
||||
request_headers_id =
|
||||
header_sha256_to_header_id[
|
||||
legacy_model_id_to_header_sha256s[legacy_model.id][:req_sha256]
|
||||
]
|
||||
response_headers_id =
|
||||
header_sha256_to_header_id[
|
||||
legacy_model_id_to_header_sha256s[legacy_model.id][:res_sha256]
|
||||
]
|
||||
response_sha256 = legacy_model_id_to_response_sha256[legacy_model.id]
|
||||
request_headers_id || raise("no request header id")
|
||||
response_headers_id || raise("no response header id")
|
||||
response_sha256 || raise("no response sha256")
|
||||
build_http_log_entry(
|
||||
legacy_model,
|
||||
request_headers_id,
|
||||
response_headers_id,
|
||||
response_sha256
|
||||
)
|
||||
end
|
||||
@timings.finish :build_new_https
|
||||
|
||||
@timings.start :insert_new_https
|
||||
if http_models.any?
|
||||
::HttpLogEntry.insert_all!(http_models.map(&:to_bulk_insert_hash))
|
||||
end
|
||||
|
||||
@insert_stats.http_entries_inserted += http_models.size
|
||||
@timings.finish :insert_new_https
|
||||
end
|
||||
|
||||
def bulk_import_blob_entries(legacy_models)
|
||||
insert_stats = InsertStats.new
|
||||
timings = Timings.new
|
||||
merged_cache_ops = []
|
||||
|
||||
# compute all blob entries for the legacy models, removing duplicates
|
||||
timings.start :lookup_existing_bes
|
||||
|
||||
legacy_model_id_to_response_sha256 =
|
||||
legacy_models
|
||||
.map { |m| [m.id, Digest::SHA256.digest(m.response_body)] }
|
||||
.to_h
|
||||
|
||||
sha256_to_existing_blob_entry =
|
||||
::BlobEntryP
|
||||
.where(sha256: legacy_model_id_to_response_sha256.values)
|
||||
.map { |be| [be.sha256, be] }
|
||||
.to_h
|
||||
timings.finish :lookup_existing_bes
|
||||
|
||||
timings.start :build_new_bes
|
||||
blob_entries_to_insert =
|
||||
ForkFuture
|
||||
.parallel_map(@fork_amount, legacy_models) do |legacy_model|
|
||||
sha256 = legacy_model_id_to_response_sha256[legacy_model.id] || raise
|
||||
next nil if sha256_to_existing_blob_entry[sha256]
|
||||
|
||||
content_type = legacy_model.content_type
|
||||
cache_key = "#{legacy_model.host}|#{content_type}"
|
||||
cache = @blob_entry_cache[cache_key]
|
||||
|
||||
# N% chance (if we're not at cache capacity) to not supply any candidates,
|
||||
# to give new entries in the cache a chance to replace poor performing ones
|
||||
candidates =
|
||||
if cache.at_capacity? # && rand(0..100) >= 5
|
||||
cache.candidates
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
blob_entry =
|
||||
::BlobEntryP.build_record(
|
||||
content_type: content_type,
|
||||
sha256: sha256,
|
||||
contents: legacy_model.response_body,
|
||||
candidates: candidates
|
||||
)
|
||||
|
||||
# reward the base if it was used, if not, insert this blob into the
|
||||
# cache so it'll be a future candidate (unless it's not a new model)
|
||||
# cache keys are hex encoded for easier viewing / debugging
|
||||
cache_op = nil
|
||||
if !blob_entry.persisted? && @cache_size > 0
|
||||
if blob_entry.base_sha256
|
||||
cache_op = [
|
||||
cache_key,
|
||||
:reward,
|
||||
HexUtil.bin2hex(blob_entry.base_sha256)[0..8]
|
||||
]
|
||||
else
|
||||
cache_op = [
|
||||
cache_key,
|
||||
:insert,
|
||||
HexUtil.bin2hex(blob_entry.sha256)[0..8],
|
||||
blob_entry,
|
||||
legacy_model.full_path
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
blob_entry.valid? ||
|
||||
raise(
|
||||
"invalid blob entry (legacy model id #{legacy_model.id}): #{blob_entry.errors.full_messages}"
|
||||
)
|
||||
cache.send(cache_op[1], *cache_op[2..]) if cache_op
|
||||
[blob_entry, cache_op]
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.map do |pair|
|
||||
blob_entry = pair[0]
|
||||
cache_op = pair[1]
|
||||
merged_cache_ops << cache_op if cache_op
|
||||
blob_entry
|
||||
end
|
||||
.uniq { |blob_entry| blob_entry.sha256 }
|
||||
timings.finish :build_new_bes
|
||||
|
||||
# bulk-insert all the new blob entries
|
||||
timings.start :insert_new_bes
|
||||
slice_size = [(blob_entries_to_insert.size.to_f / @fork_amount).ceil, 1].max
|
||||
if blob_entries_to_insert.any?
|
||||
blob_entries_to_insert
|
||||
.each_slice(slice_size)
|
||||
.map do |slice|
|
||||
ForkFuture.new do
|
||||
if slice.any?
|
||||
BlobEntryP.insert_all!(slice.map(&:to_bulk_insert_hash))
|
||||
end
|
||||
end
|
||||
end
|
||||
.to_a
|
||||
.map(&:join)
|
||||
end
|
||||
insert_stats.blob_entries_inserted += blob_entries_to_insert.size
|
||||
insert_stats.bytes_length +=
|
||||
blob_entries_to_insert.map(&:contents).map(&:size).sum
|
||||
insert_stats.bytes_stored += blob_entries_to_insert.map(&:bytes_stored).sum
|
||||
timings.finish :insert_new_bes
|
||||
|
||||
[
|
||||
insert_stats,
|
||||
timings,
|
||||
merged_cache_ops,
|
||||
legacy_model_id_to_response_sha256
|
||||
]
|
||||
end
|
||||
|
||||
def bulk_import_headers(legacy_models)
|
||||
insert_stats = InsertStats.new
|
||||
timings = Timings.new
|
||||
|
||||
timings.start :build_new_headers
|
||||
header_sha256_to_header_model = {}
|
||||
legacy_model_id_to_header_sha256s =
|
||||
ForkFuture
|
||||
.parallel_map(@fork_amount / 2, legacy_models) do |legacy_model|
|
||||
req_headers =
|
||||
::HttpLogEntryHeader.build_record(headers: legacy_model.req_headers)
|
||||
res_headers =
|
||||
::HttpLogEntryHeader.build_record(headers: legacy_model.res_headers)
|
||||
[legacy_model.id, { req: req_headers, res: res_headers }]
|
||||
end
|
||||
.map do |pair|
|
||||
legacy_model_id = pair[0]
|
||||
req_headers = pair[1][:req]
|
||||
res_headers = pair[1][:res]
|
||||
header_sha256_to_header_model[req_headers.sha256] = req_headers
|
||||
header_sha256_to_header_model[res_headers.sha256] = res_headers
|
||||
[
|
||||
legacy_model_id,
|
||||
{ req_sha256: req_headers.sha256, res_sha256: res_headers.sha256 }
|
||||
]
|
||||
end
|
||||
.to_h
|
||||
timings.finish :build_new_headers
|
||||
|
||||
# excluding existing headers, and bulk-insert the new headers
|
||||
timings.start :insert_new_headers
|
||||
header_sha256_to_header_id =
|
||||
::HttpLogEntryHeader
|
||||
.where(sha256: header_sha256_to_header_model.keys)
|
||||
.map { |model| [model.sha256, model.id] }
|
||||
.to_h
|
||||
|
||||
headers_to_insert =
|
||||
header_sha256_to_header_model
|
||||
.map do |sha256, header_model|
|
||||
next nil if header_sha256_to_header_id[sha256]
|
||||
header_model.valid? || raise("invalid header models")
|
||||
header_model
|
||||
end
|
||||
.reject(&:nil?)
|
||||
.uniq { |header_model| header_model.sha256 }
|
||||
|
||||
::HttpLogEntryHeader
|
||||
.insert_all!(
|
||||
headers_to_insert.map(&:to_bulk_insert_hash),
|
||||
returning: %i[id sha256]
|
||||
)
|
||||
.rows
|
||||
.each do |row|
|
||||
id, sha256 = row
|
||||
# rails does not deserialize the returned sha256 - we have to do that ourselves
|
||||
# postgres prefixes hex-encoded binaries with "\x", must strip that first
|
||||
raise("invariant") unless sha256[0..1] == "\\x"
|
||||
sha256 = ::HexUtil.hex2bin(sha256[2..])
|
||||
header_sha256_to_header_id[sha256] = id
|
||||
end if headers_to_insert.any?
|
||||
insert_stats.header_entries_inserted += headers_to_insert.size
|
||||
timings.finish :insert_new_headers
|
||||
|
||||
[
|
||||
insert_stats,
|
||||
timings,
|
||||
legacy_model_id_to_header_sha256s,
|
||||
header_sha256_to_header_id
|
||||
]
|
||||
end
|
||||
|
||||
def build_http_log_entry(
|
||||
legacy_model,
|
||||
request_headers_id,
|
||||
response_headers_id,
|
||||
response_sha256
|
||||
)
|
||||
model =
|
||||
::HttpLogEntry.new(
|
||||
id: legacy_model.id,
|
||||
uri_scheme: legacy_model.scheme,
|
||||
uri_host: legacy_model.host,
|
||||
uri_path: legacy_model.path,
|
||||
uri_query: legacy_model.query,
|
||||
verb: legacy_model.verb,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers_id: request_headers_id,
|
||||
response_headers_id: response_headers_id,
|
||||
response_sha256: response_sha256,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at
|
||||
)
|
||||
model
|
||||
end
|
||||
|
||||
def hr
|
||||
puts "-" * 40
|
||||
end
|
||||
|
||||
Timings =
|
||||
Struct.new(:keys, :totals) do
|
||||
def initialize
|
||||
@start_at = Time.now
|
||||
self.keys = []
|
||||
self.totals = {}
|
||||
end
|
||||
|
||||
def merge!(other)
|
||||
raise if other.nil?
|
||||
other.keys.each do |key|
|
||||
self.keys << key unless self.keys.include?(key)
|
||||
self.entry_for(key)[:secs] += other.totals[key][:secs]
|
||||
end
|
||||
end
|
||||
|
||||
def start(key)
|
||||
self.keys << key unless self.keys.include?(key)
|
||||
entry = self.entry_for(key)
|
||||
raise("#{key} already started") if entry[:started]
|
||||
entry[:started] = Time.now
|
||||
end
|
||||
|
||||
def finish(key)
|
||||
entry = self.totals[key]
|
||||
raise("#{key} does not exist") unless entry
|
||||
started = entry[:started]
|
||||
entry[:started] = nil
|
||||
raise("#{key} not started") unless started
|
||||
entry[:secs] += Time.now - started
|
||||
end
|
||||
|
||||
def entries
|
||||
total_secs = Time.now - @start_at
|
||||
total_measured_secs = self.totals.values.map { |e| e[:secs] }.sum
|
||||
self
|
||||
.keys
|
||||
.map do |key|
|
||||
key_secs = self.totals[key][:secs]
|
||||
{
|
||||
key: key,
|
||||
key_secs: key_secs,
|
||||
proportion: key_secs / total_measured_secs
|
||||
}
|
||||
end
|
||||
.chain(
|
||||
[
|
||||
{
|
||||
key: :measured_total,
|
||||
key_secs: total_measured_secs,
|
||||
proportion: total_measured_secs / total_secs
|
||||
},
|
||||
{ key: :actual_total, key_secs: total_secs, proportion: 1.0 }
|
||||
]
|
||||
)
|
||||
end
|
||||
|
||||
def entry_for(key)
|
||||
self.totals[key] ||= { started: nil, secs: 0.0 }
|
||||
end
|
||||
end
|
||||
|
||||
InsertStats =
|
||||
Struct.new(
|
||||
:http_entries_inserted,
|
||||
:blob_entries_inserted,
|
||||
:header_entries_inserted,
|
||||
:bytes_stored,
|
||||
:bytes_length
|
||||
) do
|
||||
def initialize
|
||||
self.http_entries_inserted = 0
|
||||
self.blob_entries_inserted = 0
|
||||
self.header_entries_inserted = 0
|
||||
self.bytes_stored = 0
|
||||
self.bytes_length = 0
|
||||
end
|
||||
|
||||
def merge!(other)
|
||||
self.http_entries_inserted += other.http_entries_inserted
|
||||
self.blob_entries_inserted += other.blob_entries_inserted
|
||||
self.header_entries_inserted += other.header_entries_inserted
|
||||
self.bytes_stored += other.bytes_stored
|
||||
self.bytes_length += other.bytes_length
|
||||
end
|
||||
|
||||
def to_s
|
||||
ratio = self.bytes_stored.to_f / self.bytes_length
|
||||
[
|
||||
"+#{self.http_entries_inserted} requests, +#{self.blob_entries_inserted} blobs, +#{self.header_entries_inserted} headers",
|
||||
"size ratio: #{ratio.round(2)} - #{self.class.humansize(self.bytes_stored)}/#{self.class.humansize(self.bytes_length)}"
|
||||
].join("\n")
|
||||
end
|
||||
|
||||
def self.humansize(size)
|
||||
HexUtil.humansize(size)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -15,7 +15,7 @@ class Scraper::GalleryDlClient
|
||||
:response_time_ms,
|
||||
:body,
|
||||
:log_entry,
|
||||
keyword_init: true
|
||||
keyword_init: true,
|
||||
)
|
||||
TweetEvent = Struct.new(:tweet, :author)
|
||||
TweetMediaEvent =
|
||||
@@ -26,7 +26,7 @@ class Scraper::GalleryDlClient
|
||||
:media_num,
|
||||
:extension,
|
||||
:height,
|
||||
:width
|
||||
:width,
|
||||
)
|
||||
|
||||
def initialize(name, host)
|
||||
@@ -35,11 +35,6 @@ class Scraper::GalleryDlClient
|
||||
logger.info("build #{name.to_s.green.bold} - #{host.green}")
|
||||
@performed_by = name
|
||||
@client = Ripcord::Client.new(host)
|
||||
@max_cache_size = 8
|
||||
@blob_entry_cache =
|
||||
Hash.new do |hash, key|
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
|
||||
end
|
||||
end
|
||||
|
||||
def start_twitter_user(username, caused_by_entry: nil)
|
||||
@@ -47,7 +42,7 @@ class Scraper::GalleryDlClient
|
||||
rpc =
|
||||
@client.call(
|
||||
"start_user",
|
||||
[@token, "https://twitter.com/#{username}/tweets"]
|
||||
[@token, "https://twitter.com/#{username}/tweets"],
|
||||
)
|
||||
raise rpc_error_str(rpc) unless rpc.successful?
|
||||
decode_message(rpc.result, caused_by_entry)
|
||||
@@ -87,7 +82,7 @@ class Scraper::GalleryDlClient
|
||||
response_code: http_request[:status_code],
|
||||
response_time_ms: (http_request[:duration] * 1000).to_i,
|
||||
body: Base64.decode64(http_request[:content_base64]),
|
||||
log_entry: nil
|
||||
log_entry: nil,
|
||||
)
|
||||
log_and_set_http_request_event(event, caused_by_entry)
|
||||
event
|
||||
@@ -102,7 +97,7 @@ class Scraper::GalleryDlClient
|
||||
media[:media_num],
|
||||
media[:extension],
|
||||
media[:height],
|
||||
media[:width]
|
||||
media[:width],
|
||||
)
|
||||
end
|
||||
end
|
||||
@@ -124,26 +119,12 @@ class Scraper::GalleryDlClient
|
||||
|
||||
url = uri.to_s
|
||||
|
||||
cache_key = "#{uri.host}|#{content_type}"
|
||||
blob_entry_cache = @blob_entry_cache[cache_key]
|
||||
candidates =
|
||||
if blob_entry_cache.at_capacity? && rand(0..100) >= 5
|
||||
blob_entry_cache.candidates
|
||||
else
|
||||
[]
|
||||
end
|
||||
candidates << caused_by_entry.response if caused_by_entry&.response
|
||||
if caused_by_entry&.response&.base
|
||||
candidates << caused_by_entry.response.base
|
||||
end
|
||||
|
||||
retries = 0
|
||||
begin
|
||||
response_blob_entry =
|
||||
BlobEntryP.find_or_build(
|
||||
content_type: content_type,
|
||||
contents: http_event.body,
|
||||
candidates: candidates
|
||||
)
|
||||
|
||||
log_entry =
|
||||
@@ -161,8 +142,8 @@ class Scraper::GalleryDlClient
|
||||
response_time_ms: http_event.response_time_ms,
|
||||
requested_at: http_event.requested_at,
|
||||
caused_by_entry: caused_by_entry,
|
||||
performed_by: @performed_by
|
||||
}
|
||||
performed_by: @performed_by,
|
||||
},
|
||||
)
|
||||
|
||||
log_entry.save!
|
||||
@@ -174,17 +155,5 @@ class Scraper::GalleryDlClient
|
||||
|
||||
logger.debug "insert http log entry #{log_entry.id.to_s.bold}"
|
||||
http_event.log_entry = log_entry
|
||||
|
||||
if response_blob_entry.base_sha256
|
||||
blob_entry_cache.reward(
|
||||
HexUtil.bin2hex(response_blob_entry.base_sha256)[0..8]
|
||||
)
|
||||
else
|
||||
blob_entry_cache.insert(
|
||||
HexUtil.bin2hex(response_blob_entry.sha256)[0..8],
|
||||
response_blob_entry,
|
||||
url
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -36,97 +36,6 @@ class Domain::E621::Post < ReduxApplicationRecord
|
||||
foreign_key: :e621_id,
|
||||
optional: true
|
||||
|
||||
SKIP_MISMATCH_LEGACY_IDS = Set.new([836_414, 1_070_178])
|
||||
|
||||
def self.find_or_build_from_legacy(legacy_model)
|
||||
model = self.find_by(e621_id: legacy_model.e621_id)
|
||||
return model if model
|
||||
model =
|
||||
self.new(
|
||||
{
|
||||
state: :ok,
|
||||
file_url_str: legacy_model.file_url,
|
||||
rating: legacy_model.rating,
|
||||
sources_array: legacy_model.sources,
|
||||
tags_array: legacy_model.tags.map(&:value),
|
||||
artists_array: legacy_model.artists || [],
|
||||
},
|
||||
)
|
||||
|
||||
if legacy_model.e621_status != "active"
|
||||
model.flags_array << legacy_model.e621_status
|
||||
model.flags_array.uniq!
|
||||
end
|
||||
|
||||
%i[e621_id md5 description score created_at].each do |attr|
|
||||
model.send(:"#{attr}=", legacy_model.send(attr))
|
||||
end
|
||||
|
||||
http_log_entries =
|
||||
::HttpLogEntry.where(
|
||||
uri_host: model.file_uri.host,
|
||||
uri_path: model.file_uri.path,
|
||||
)
|
||||
http_log_entry = http_log_entries.first
|
||||
|
||||
if !http_log_entry && legacy_model.blob_entry
|
||||
legacy_hles =
|
||||
::Legacy::HttpLogEntry.where(
|
||||
host: model.file_uri.host,
|
||||
path: model.file_uri.path,
|
||||
)
|
||||
legacy_hle = legacy_hles.first
|
||||
|
||||
if legacy_hle
|
||||
http_log_entry = ::HttpLogEntry.build_from_legacy(legacy_hle)
|
||||
else
|
||||
http_log_entry =
|
||||
::HttpLogEntry.new(
|
||||
{
|
||||
uri: model.file_uri || raise,
|
||||
status_code: 200,
|
||||
verb: "get",
|
||||
response_time_ms: -1,
|
||||
requested_at: Time.now,
|
||||
request_headers: ::HttpLogEntryHeader.empty,
|
||||
response_headers: ::HttpLogEntryHeader.empty,
|
||||
performed_by: "legacy",
|
||||
},
|
||||
)
|
||||
end
|
||||
|
||||
http_log_entry.response ||=
|
||||
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
blob_entry = http_log_entry.response
|
||||
|
||||
if blob_entry && http_log_entry
|
||||
http_log_entry.content_type ||= blob_entry.content_type
|
||||
else
|
||||
# unable to construct http & blob entries, skip
|
||||
File.write(
|
||||
Rails.root.join("tmp/e621_legacy_post_importer_failures"),
|
||||
"#{model.e621_id} - (no hle) - unable to reconstruct http / blob entry\n",
|
||||
)
|
||||
http_log_entry = nil
|
||||
end
|
||||
end
|
||||
|
||||
if http_log_entry
|
||||
blob_entry = http_log_entry.response
|
||||
|
||||
if model.md5 != Digest::MD5.hexdigest(blob_entry.contents)
|
||||
File.write(
|
||||
Rails.root.join("tmp/e621_legacy_post_importer_failures"),
|
||||
"#{model.e621_id} - #{http_log_entry.status_code} - expected #{model.md5} != actual #{Digest::MD5.hexdigest(blob_entry.contents)}\n",
|
||||
)
|
||||
http_log_entry = nil
|
||||
end
|
||||
end
|
||||
|
||||
model.file = http_log_entry
|
||||
model
|
||||
end
|
||||
|
||||
def file_uri
|
||||
Addressable::URI.parse(self.file_url_str) if self.file_url_str.present?
|
||||
end
|
||||
|
||||
@@ -36,7 +36,7 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
:status_code,
|
||||
:response_time_ms,
|
||||
:content_type,
|
||||
:requested_at
|
||||
:requested_at,
|
||||
)
|
||||
|
||||
def self.find_by_uri_host_path(uri)
|
||||
@@ -44,49 +44,6 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
find_by(uri_host: uri.host, uri_path: uri.path)
|
||||
end
|
||||
|
||||
def self.build_from_legacy(legacy_model)
|
||||
response_body = legacy_model.response_body
|
||||
can_reconstruct_be =
|
||||
response_body.nil? && legacy_model.parent_log_entry_id.nil? &&
|
||||
legacy_model.resp_body.present? && legacy_model.blob_entry.present?
|
||||
|
||||
if can_reconstruct_be
|
||||
blob_entry =
|
||||
::BlobEntryP.find_or_build_from_legacy(legacy_model.blob_entry)
|
||||
blob_sha256 = HexUtil.hex2bin(legacy_model.resp_body)
|
||||
unless blob_entry.sha256 == blob_sha256
|
||||
raise(
|
||||
"mismatch for legacy http entry #{legacy_model.id} / legacy blob entry #{legacy_model.blob_entry.id}"
|
||||
)
|
||||
end
|
||||
else
|
||||
blob_entry = nil
|
||||
end
|
||||
|
||||
uri = Addressable::URI.parse(legacy_model.full_path)
|
||||
uri.scheme ||= "https"
|
||||
uri.path ||= "/"
|
||||
|
||||
::HttpLogEntry.new(
|
||||
{
|
||||
verb: legacy_model.verb,
|
||||
uri: uri,
|
||||
content_type: legacy_model.content_type,
|
||||
status_code: legacy_model.status,
|
||||
response_time_ms: legacy_model.response_time,
|
||||
request_headers:
|
||||
::HttpLogEntryHeader.find_or_build(headers: legacy_model.req_headers),
|
||||
response_headers:
|
||||
::HttpLogEntryHeader.find_or_build(headers: legacy_model.res_headers),
|
||||
response: blob_entry,
|
||||
requested_at: legacy_model.requested_at,
|
||||
created_at: legacy_model.created_at,
|
||||
updated_at: legacy_model.updated_at,
|
||||
performed_by: "legacy"
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
def uri=(uri)
|
||||
uri = Addressable::URI.parse(uri)
|
||||
self.uri_scheme = uri.scheme
|
||||
@@ -128,7 +85,7 @@ class HttpLogEntry < ReduxApplicationRecord
|
||||
response_sha256: self.response_sha256,
|
||||
requested_at: self.requested_at,
|
||||
created_at: self.created_at,
|
||||
updated_at: self.updated_at
|
||||
updated_at: self.updated_at,
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
module Legacy
|
||||
autoload(:Fa, "legacy/fa.rb")
|
||||
end
|
||||
@@ -1,131 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: blob_entries
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# created_at :datetime
|
||||
# updated_at :datetime
|
||||
# file_size :integer
|
||||
# refcount :integer
|
||||
# sha256 :string(64)
|
||||
# dir_depth :integer default(2), not null
|
||||
#
|
||||
|
||||
require "digest"
|
||||
|
||||
class Legacy::BlobEntry < LegacyApplicationRecord
|
||||
self.table_name = "blob_entries"
|
||||
|
||||
validates_presence_of :sha256
|
||||
validates_presence_of :refcount
|
||||
validates_presence_of :file_size
|
||||
|
||||
validates_uniqueness_of :sha256
|
||||
validates_length_of :sha256, is: 64
|
||||
|
||||
validates_presence_of :dir_depth
|
||||
|
||||
before_validation do
|
||||
self.dir_depth ||= 2
|
||||
self.file_size = File.size(file_path)
|
||||
self.refcount ||= 0
|
||||
end
|
||||
|
||||
def file_relative_path
|
||||
sha256 || raise
|
||||
dir_depth || raise
|
||||
self.class.file_path_at_depth(sha256: sha256, depth: dir_depth)
|
||||
end
|
||||
|
||||
def ensure_file_path
|
||||
sha256 || raise
|
||||
dir_depth || raise
|
||||
|
||||
unless File.exist?(file_path)
|
||||
found = false
|
||||
(2..5).each do |depth|
|
||||
path =
|
||||
File.join(
|
||||
Legacy::SConfig.blob_static_dir,
|
||||
self.class.file_path_at_depth(sha256: sha256, depth: depth)
|
||||
)
|
||||
next unless File.exist?(path)
|
||||
|
||||
self.dir_depth = depth
|
||||
save!
|
||||
found = true
|
||||
Legacy::SConfig.logger.warn(
|
||||
"found fixed path at #{depth} for BE id #{id}"
|
||||
)
|
||||
break
|
||||
end
|
||||
|
||||
return nil unless found
|
||||
end
|
||||
|
||||
file_path
|
||||
end
|
||||
|
||||
def self.file_path_at_depth(sha256:, depth:, stride: 1, hash_length: 64)
|
||||
# generate something like sha256[0]/sha256[1]/sha256
|
||||
raise("invalid sha256: #{sha256}") unless sha256.length == hash_length
|
||||
|
||||
parts =
|
||||
(0...depth).map { |idx| sha256[(idx * stride)...((idx + 1) * stride)] } +
|
||||
[sha256]
|
||||
File.join(*parts)
|
||||
end
|
||||
|
||||
def file_path
|
||||
File.join Legacy::SConfig.blob_static_dir, file_relative_path
|
||||
end
|
||||
|
||||
def inc_refcount
|
||||
::Legacy::BlobEntry.increment_counter(:refcount, id)
|
||||
end
|
||||
|
||||
def dec_refcount
|
||||
::Legacy::BlobEntry.decrement_counter(:refcount, id)
|
||||
end
|
||||
|
||||
def self.create_from_blob(blob:, opts: {})
|
||||
sha256 = Digest::SHA256.hexdigest blob
|
||||
|
||||
write_out =
|
||||
lambda do |be, _contents|
|
||||
dir = File.dirname be.file_path
|
||||
FileUtils.mkdir_p dir
|
||||
f = File.open(be.file_path, "wb")
|
||||
begin
|
||||
f.write(blob)
|
||||
f.fsync
|
||||
|
||||
unless File.exist?(be.file_path)
|
||||
raise("error ensuring blob exists for #{be.id}")
|
||||
end
|
||||
ensure
|
||||
f.close
|
||||
end
|
||||
end
|
||||
|
||||
be = nil
|
||||
::Legacy::BlobEntry.transaction do
|
||||
be = ::Legacy::BlobEntry.find_by(sha256: sha256)
|
||||
if be && !be.ensure_file_path
|
||||
# correct directory depth as well
|
||||
Legacy::SConfig.logger.warn(
|
||||
"file doesn't exist for #{be.id}, writing again..."
|
||||
)
|
||||
write_out.call(be, blob)
|
||||
elsif !be
|
||||
new_be = ::Legacy::BlobEntry.new(opts.merge(sha256: sha256))
|
||||
write_out.call(new_be, blob)
|
||||
new_be.save!
|
||||
be = new_be
|
||||
end
|
||||
end
|
||||
be
|
||||
end
|
||||
end
|
||||
@@ -1,142 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_posts
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# e621_id :integer not null
|
||||
# md5 :string not null
|
||||
# sources :string
|
||||
# file_url :string not null
|
||||
# file_ext :string not null
|
||||
# description :string
|
||||
# rating :integer
|
||||
# width :integer
|
||||
# height :integer not null
|
||||
# tags_string :string not null
|
||||
# status :integer
|
||||
# score :integer
|
||||
# removed :boolean
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# artists :string
|
||||
# e621_count :integer
|
||||
# author :string
|
||||
# e621_status :string
|
||||
# blob_entry_id :integer
|
||||
# imgsearch_entry_id :integer
|
||||
#
|
||||
|
||||
class Legacy::E621::Post < LegacyApplicationRecord
|
||||
self.table_name = "e621_posts"
|
||||
|
||||
validates_presence_of :e621_id,
|
||||
:md5,
|
||||
:author,
|
||||
:file_url,
|
||||
:file_ext,
|
||||
:rating,
|
||||
:tags_string,
|
||||
:status,
|
||||
:score
|
||||
validates_uniqueness_of :md5, :e621_id
|
||||
|
||||
serialize :sources, coder: JSON
|
||||
serialize :artists, coder: JSON
|
||||
|
||||
belongs_to :blob_entry, class_name: "Legacy::BlobEntry"
|
||||
|
||||
# just inserted into db: :not_processed
|
||||
# we've checked for the existance of its file on the
|
||||
# disk and it isn't there: :should_download
|
||||
# we've made an attempt to download its file: :processed
|
||||
enum :status,
|
||||
%i[not_processed should_download processed processed_404 processed_err]
|
||||
validates_inclusion_of :status, in: statuses.keys
|
||||
|
||||
has_many :taggings, class_name: "Legacy::E621::Tagging"
|
||||
has_many :tags, through: :taggings
|
||||
|
||||
enum :rating, %i[s q e]
|
||||
validates_inclusion_of :rating, in: ratings.keys
|
||||
|
||||
def file_relative_path
|
||||
base = File.basename(file_url)
|
||||
"#{base[0]}/#{base[1]}/#{base}"
|
||||
end
|
||||
|
||||
before_validation { self.file_ext ||= File.extname(file_path)[1..-1] }
|
||||
|
||||
before_destroy { blob_entry.dec_refcount }
|
||||
|
||||
def file_path
|
||||
File.join SConfig.e621_static_dir, file_relative_path
|
||||
end
|
||||
|
||||
def resized_file_path(style)
|
||||
raise("no md5") unless md5
|
||||
|
||||
hashed_path =
|
||||
Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: md5,
|
||||
depth: 4,
|
||||
stride: 2,
|
||||
hash_length: 32
|
||||
)
|
||||
File.join SConfig.e621_data_dir,
|
||||
"resized",
|
||||
style.to_s,
|
||||
(hashed_path + "." + file_ext)
|
||||
end
|
||||
|
||||
FASource = Struct.new(:type, :id, :url)
|
||||
|
||||
def fa_sources
|
||||
self
|
||||
.sources
|
||||
.flatten
|
||||
.map do |source|
|
||||
if matches = %r{furaffinity.net/view/(\d+)}.match(source)
|
||||
fa_id = matches[1]
|
||||
FASource.new(:post, fa_id.to_i, source)
|
||||
elsif matches = %r{furaffinity.net/(gallery|user)/([^/]+)}.match(source)
|
||||
url_name = FA::User.name_to_url_name(matches[2])
|
||||
FASource.new(:user, url_name, source)
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
.reject(&:nil?)
|
||||
end
|
||||
|
||||
def update_taggings(assume_total_overwrite: false)
|
||||
tags_string_split = tags_string.split(/\s+/).map(&:strip).reject(&:blank?)
|
||||
tags_arr =
|
||||
Legacy::E621::Tag.where(value: tags_string_split).select(:id, :value).to_a
|
||||
|
||||
missing = Set.new(tags_string_split) - Set.new(tags_arr.map(&:value))
|
||||
|
||||
missing.each do |missing_val|
|
||||
tags_arr << Legacy::E621::Tag.find_or_create_by(value: missing_val)
|
||||
end
|
||||
|
||||
# SConfig.logger.info "had to create tags: #{missing.to_a.join(", ")}" if missing.any?
|
||||
if assume_total_overwrite
|
||||
self.tags = tags_arr
|
||||
else
|
||||
should_be = Set.new(tags_arr)
|
||||
but_is = Set.new(tags)
|
||||
|
||||
removed = but_is - should_be
|
||||
added = should_be - but_is
|
||||
|
||||
tags.delete(removed.to_a)
|
||||
tags << added.to_a
|
||||
end
|
||||
|
||||
if Set.new(tags.map(&:value)) != Set.new(tags_string_split)
|
||||
puts "tagging mismatch on #{id} (#{e621_id})"
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,20 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_tags
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# value :string not null
|
||||
# e621_id :integer
|
||||
# type :integer
|
||||
# e621_count :integer
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
|
||||
class Legacy::E621::Tag < LegacyApplicationRecord
|
||||
self.table_name = "e621_tags"
|
||||
self.inheritance_column = nil
|
||||
validates_presence_of :value
|
||||
end
|
||||
@@ -1,22 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: e621_taggings
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# tag_id :integer
|
||||
# post_id :integer
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
|
||||
class Legacy::E621::Tagging < LegacyApplicationRecord
|
||||
self.table_name = "e621_taggings"
|
||||
|
||||
belongs_to :post, class_name: "Legacy::E621::Post"
|
||||
belongs_to :tag, class_name: "Legacy::E621::Tag"
|
||||
|
||||
validates_presence_of :post, :tag
|
||||
validates_uniqueness_of :tag_id, scope: :post_id
|
||||
end
|
||||
@@ -1,7 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Legacy::Fa
|
||||
def self.table_name_prefix
|
||||
"fa_"
|
||||
end
|
||||
end
|
||||
@@ -1,131 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: fa_posts
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# fa_id :integer not null
|
||||
# creator_name :string
|
||||
# creator_id :integer
|
||||
# title :string
|
||||
# category :string
|
||||
# theme :string
|
||||
# species :string
|
||||
# gender :string
|
||||
# keywords :string
|
||||
# file_url :string
|
||||
# blob_entry_id :integer
|
||||
# old_files :string
|
||||
# num_favorites :integer
|
||||
# num_comments :integer
|
||||
# num_views :integer
|
||||
# posted_at :datetime
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# in_gallery :boolean
|
||||
# in_folders :string
|
||||
# state :integer default(0)
|
||||
# state_error :string
|
||||
# folder_name :string
|
||||
# gallery_page_logfile :string
|
||||
# submission_page_logfile :string
|
||||
# submission_page_log_entry_id :integer
|
||||
# description_id :integer
|
||||
# imgsearch_entry_id :integer
|
||||
#
|
||||
|
||||
class Legacy::Fa::Post < LegacyApplicationRecord
|
||||
self.table_name = "fa_posts"
|
||||
|
||||
validates_presence_of :fa_id
|
||||
|
||||
# array of [{be: blob_entry_id, file_url: old_file_url}]
|
||||
serialize :old_files, coder: JSON
|
||||
serialize :keywords, coder: JSON
|
||||
serialize :in_folders, coder: JSON
|
||||
|
||||
enum :state,
|
||||
[
|
||||
:seen_listing, # have seen a reference to this post on a listing page
|
||||
:scanned_submission, # have scanned the actual submission page
|
||||
:scan_error, # error scanning the submission page
|
||||
:have_static, # have the static asset associated with the page
|
||||
:static_error
|
||||
] # error getting the static asset
|
||||
validates_inclusion_of :state, in: Legacy::Fa::Post.states.keys
|
||||
|
||||
# serialize :state_error
|
||||
validates_presence_of :state_error, if: -> { scan_error? || static_error? }
|
||||
|
||||
belongs_to :creator, class_name: "::Legacy::Fa::User"
|
||||
belongs_to :blob_entry, class_name: "::Legacy::BlobEntry"
|
||||
|
||||
belongs_to :submission_page_log_entry, class_name: "::Legacy::HttpLogEntry"
|
||||
|
||||
belongs_to :description_ref,
|
||||
nil,
|
||||
class_name: "::Legacy::Fa::PostDescription",
|
||||
foreign_key: :description_id,
|
||||
inverse_of: :fa_post
|
||||
|
||||
before_destroy do
|
||||
blob_entry.dec_refcount
|
||||
true
|
||||
end
|
||||
|
||||
def description
|
||||
description_ref.try(:value)
|
||||
end
|
||||
|
||||
def self.file_name_filter(part)
|
||||
part.gsub(/[^\w\.\_\-\#\@\(\)\^\[\]\$\{\}\<\>\!\ ]/, "_")
|
||||
end
|
||||
|
||||
def relative_file_path
|
||||
raise("no file_url") unless file_url
|
||||
|
||||
name = ensure_creator_name_filename
|
||||
file = self.class.file_name_filter(File.basename(file_url))
|
||||
File.join(name, file).encode(
|
||||
Encoding.find("UTF-8"),
|
||||
invalid: :replace,
|
||||
undef: :replace,
|
||||
replace: ""
|
||||
)
|
||||
end
|
||||
|
||||
def file_path
|
||||
File.join SConfig.fa_post_static_dir, relative_file_path
|
||||
end
|
||||
|
||||
def file_ext
|
||||
raise("no file_url") unless file_url
|
||||
|
||||
File.extname(self.class.file_name_filter(file_url))
|
||||
end
|
||||
|
||||
def resized_file_path(style)
|
||||
raise("no fa_id") unless fa_id
|
||||
|
||||
hashed_path =
|
||||
Legacy::BlobEntry.file_path_at_depth(
|
||||
sha256: Digest::SHA256.hexdigest(fa_id.to_s),
|
||||
depth: 4,
|
||||
stride: 2
|
||||
)
|
||||
File.join Legacy::SConfig.fa_data_dir,
|
||||
"static",
|
||||
"resized",
|
||||
style.to_s,
|
||||
(hashed_path + file_ext)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def ensure_creator_name_filename
|
||||
raise("no creator name") unless creator_name
|
||||
|
||||
self.class.file_name_filter(Legacy::Fa::User.name_to_url_name(creator_name))
|
||||
end
|
||||
end
|
||||
@@ -1,17 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: fa_post_descriptions
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# value :text not null
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
#
|
||||
|
||||
class Legacy::Fa::PostDescription < LegacyApplicationRecord
|
||||
self.table_name = "fa_post_descriptions"
|
||||
|
||||
has_one :fa_post, class_name: "Legacy::Fa::Post", foreign_key: :description_id
|
||||
end
|
||||
@@ -1,86 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: fa_users
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# name :string not null
|
||||
# full_name :string
|
||||
# artist_type :string
|
||||
# mood :string
|
||||
# profile_html :text
|
||||
# num_pageviews :integer
|
||||
# num_submissions :integer
|
||||
# num_comments_recieved :integer
|
||||
# num_comments_given :integer
|
||||
# num_journals :integer
|
||||
# num_favorites :integer
|
||||
# registered_at :datetime
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# url_name :string not null
|
||||
# scanned_gallery :datetime
|
||||
# scanned_page :datetime
|
||||
# user_page_logfile :string
|
||||
# user_page_log_entry_id :integer
|
||||
#
|
||||
|
||||
class Legacy::Fa::User < LegacyApplicationRecord
|
||||
self.table_name = "fa_users"
|
||||
|
||||
validates :name, uniqueness: true, presence: true
|
||||
|
||||
validates :url_name, uniqueness: true, presence: true
|
||||
|
||||
has_many :posts,
|
||||
class_name: "Legacy::Fa::Post",
|
||||
foreign_key: :creator_name,
|
||||
primary_key: :name
|
||||
|
||||
# if present, this user is being watched
|
||||
has_one :watched_user, foreign_key: :user_id, inverse_of: :user
|
||||
|
||||
belongs_to :user_page_log_entry, class_name: "Legacy::Cache::HttpLogEntry"
|
||||
|
||||
before_validation do
|
||||
self.name ||= url_name
|
||||
self.url_name ||= self.class.name_to_url_name(name)
|
||||
end
|
||||
|
||||
def up_to_date_gallery!
|
||||
now = Time.now
|
||||
|
||||
self.scanned_gallery = now
|
||||
watched_user.scanned_user_gallery = now if watched_user?
|
||||
|
||||
Legacy::Fa::User.transaction do
|
||||
watched_user.save! if watched_user?
|
||||
save!
|
||||
end
|
||||
end
|
||||
|
||||
def up_to_date_gallery?
|
||||
if watched_user? && !!scanned_gallery
|
||||
watched_user.scanned_user_gallery == scanned_gallery
|
||||
else
|
||||
!!scanned_gallery
|
||||
end
|
||||
end
|
||||
|
||||
def up_to_date_page?
|
||||
if watched_user? && !!scanned_page
|
||||
watched_user.scanned_user_page == scanned_page
|
||||
else
|
||||
!!scanned_page
|
||||
end
|
||||
end
|
||||
|
||||
def watched_user?
|
||||
!!watched_user
|
||||
end
|
||||
|
||||
def self.name_to_url_name(name)
|
||||
name.delete("_").downcase
|
||||
end
|
||||
end
|
||||
@@ -1,472 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: cache_http_log_entries
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# scheme :string
|
||||
# host :string
|
||||
# path :string
|
||||
# query :string
|
||||
# verb :integer
|
||||
# status :integer
|
||||
# response_time :integer
|
||||
# content_type :string
|
||||
# response_size :integer
|
||||
# parent_log_entry_id :integer
|
||||
# blob_entry_id :integer
|
||||
# gzipped :boolean
|
||||
# requested_at :datetime
|
||||
# created_at :datetime not null
|
||||
# updated_at :datetime not null
|
||||
# resp_body :binary
|
||||
# imported_from_file :string
|
||||
# req_headers_id :integer
|
||||
# res_headers_id :integer
|
||||
# diff_type :integer default(0)
|
||||
#
|
||||
|
||||
require "zlib"
|
||||
require "stringio"
|
||||
|
||||
module Diffy
|
||||
class Diff
|
||||
def tempfile(string)
|
||||
t = Tempfile.new("diffy")
|
||||
# ensure tempfiles aren't unlinked when GC runs by maintaining a
|
||||
# reference to them.
|
||||
@tempfiles ||= []
|
||||
@tempfiles.push(t)
|
||||
t.binmode
|
||||
t.print(string)
|
||||
t.flush
|
||||
t.close
|
||||
t.path
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
class Legacy::HttpLogEntry < LegacyApplicationRecord
|
||||
self.table_name = "cache_http_log_entries"
|
||||
|
||||
# threshold or less: store in database directly
|
||||
KEEP_INTERNALLY_THRESHOLD = 1024 * 64
|
||||
|
||||
belongs_to :parent_log_entry, class_name: "Legacy::HttpLogEntry"
|
||||
belongs_to :blob_entry, class_name: "Legacy::BlobEntry"
|
||||
|
||||
validates_presence_of(
|
||||
:scheme,
|
||||
:host,
|
||||
:path,
|
||||
:status,
|
||||
:response_time,
|
||||
:content_type,
|
||||
:response_size
|
||||
)
|
||||
|
||||
enum :verb, %i[get post]
|
||||
validates_inclusion_of :verb, in: Legacy::HttpLogEntry.verbs.keys
|
||||
|
||||
# text: use Diffy diffing
|
||||
# binary: use BSDiff
|
||||
# native: use the native LogStore server to store the entry
|
||||
enum :diff_type, %i[text binary native]
|
||||
validates_inclusion_of :diff_type, in: Legacy::HttpLogEntry.diff_types.keys
|
||||
after_initialize { self.diff_type = "native" if new_record? }
|
||||
|
||||
# out of line req/response headers
|
||||
belongs_to :req_headers_ref,
|
||||
foreign_key: :req_headers_id,
|
||||
class_name: "Legacy::HttpLogEntryHeader"
|
||||
belongs_to :res_headers_ref,
|
||||
foreign_key: :res_headers_id,
|
||||
class_name: "Legacy::HttpLogEntryHeader"
|
||||
|
||||
belongs_to :native_blob_entry,
|
||||
foreign_key: :native_blob_entry_sha256,
|
||||
primary_key: :key,
|
||||
class_name: "::LogStoreSstEntry"
|
||||
|
||||
attr_accessor :can_force_update
|
||||
before_update do
|
||||
if can_force_update
|
||||
true
|
||||
else
|
||||
raise("HttpLogEntry is immutable!")
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def set_header_impl(method, headers)
|
||||
headers.delete("date")
|
||||
headers.delete("expires")
|
||||
headers.delete("cf-ray")
|
||||
|
||||
send(
|
||||
"#{method}=",
|
||||
Legacy::HttpLogEntryHeader.find_or_create(headers: headers)
|
||||
)
|
||||
end
|
||||
|
||||
public
|
||||
|
||||
def req_headers=(headers)
|
||||
set_header_impl(:req_headers_ref, headers)
|
||||
end
|
||||
|
||||
def res_headers=(headers)
|
||||
set_header_impl(:res_headers_ref, headers)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def get_header_impl(method)
|
||||
ref = send("#{method}_ref")
|
||||
if ref
|
||||
ref.headers
|
||||
else
|
||||
self.class.superclass.instance_method(method).bind(self).call
|
||||
end
|
||||
end
|
||||
|
||||
public
|
||||
|
||||
def req_headers
|
||||
ref = req_headers_ref
|
||||
ref ? ref.headers : {}
|
||||
end
|
||||
|
||||
def res_headers
|
||||
ref = res_headers_ref
|
||||
ref ? ref.headers : {}
|
||||
end
|
||||
|
||||
before_create { self.requested_at ||= DateTime.now }
|
||||
|
||||
def body_stored?
|
||||
!!(blob_entry_id || parent_log_entry_id || resp_body)
|
||||
end
|
||||
|
||||
def response_body=(body_string)
|
||||
if diff_type == "native"
|
||||
set_response_body_native(body_string, {})
|
||||
else
|
||||
set_response_body(body_string)
|
||||
end
|
||||
end
|
||||
|
||||
def full_path
|
||||
"#{scheme}://#{host}#{path}#{query ? "?#{query}" : ""}"
|
||||
end
|
||||
|
||||
def set_response_body_native(body_string, opts = {})
|
||||
raise("legacy can't write")
|
||||
|
||||
# try and find a good HttpLogEntry to diff this against
|
||||
candidate_keys =
|
||||
if !opts[:skip_find_candidates]
|
||||
Legacy::HttpLogEntry
|
||||
.where(
|
||||
host: host,
|
||||
path: path,
|
||||
diff_type: self.class.diff_types["native"]
|
||||
)
|
||||
.limit(5)
|
||||
.to_a
|
||||
.map(&:resp_body)
|
||||
.reject(&:nil?)
|
||||
.reject(&:empty?)
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
SConfig.with_log_store_client do |lsc|
|
||||
ret =
|
||||
lsc.put_binary(
|
||||
LogStore::PutBinaryArgs.new(
|
||||
hint_hashes: candidate_keys,
|
||||
contents: body_string
|
||||
)
|
||||
)
|
||||
self.resp_body = ret.key
|
||||
end
|
||||
|
||||
body_string
|
||||
end
|
||||
|
||||
# string ->
|
||||
def set_response_body(body_string, opts = {})
|
||||
return set_response_body_native(body_string, opts) if diff_type == "native"
|
||||
|
||||
# try and find a good HttpLogEntry to diff this against
|
||||
candidate_entries =
|
||||
Legacy::HttpLogEntry
|
||||
.where(host: host, path: path, parent_log_entry_id: nil)
|
||||
.limit(3)
|
||||
.to_a
|
||||
|
||||
# add or remove trailing slash to each of the paths
|
||||
hint_paths = opts[:similar_content_path_hints] || []
|
||||
hint_paths +=
|
||||
hint_paths.map do |p|
|
||||
if p == "/"
|
||||
p
|
||||
elsif p[-1] == "/"
|
||||
p[0..-2]
|
||||
else
|
||||
p + "/"
|
||||
end
|
||||
end
|
||||
|
||||
body_string = body_string.force_encoding("UTF-8")
|
||||
|
||||
good_ce = nil
|
||||
use_string = body_string
|
||||
gzipped = false
|
||||
|
||||
if body_string.valid_encoding?
|
||||
if hint_paths.any?
|
||||
candidate_entries +=
|
||||
Legacy::HttpLogEntry
|
||||
.where(host: host, path: hint_paths, parent_log_entry_id: nil)
|
||||
.limit(50)
|
||||
.to_a
|
||||
end
|
||||
|
||||
SConfig.logger.info(
|
||||
"Comparing against #{candidate_entries.length} " \
|
||||
"candidates: #{candidate_entries.map(&:path).join(", ")}"
|
||||
)
|
||||
candidate_entries.each do |ce|
|
||||
SConfig.logger.info "Comparing diff against HLE (#{ce.id}: #{ce.path})"
|
||||
ce_body = ce.response_body
|
||||
if !ce_body || (!ce_body.valid_encoding? && diff_type == "text")
|
||||
SConfig.logger.info "HLE #{ce.id} has invalid encoded response body"
|
||||
next
|
||||
end
|
||||
|
||||
ce_diff = self.class.get_diff(ce_body, body_string, diff_type)
|
||||
if (diff_type == "text") &&
|
||||
(/^Binary files .+ and .+ differ/ =~ ce_diff)
|
||||
SConfig.logger.warn(
|
||||
"diff detected HLE #{ce.id} was a binary, skipping..."
|
||||
)
|
||||
next
|
||||
end
|
||||
|
||||
# verify we can reconstruct the original body string
|
||||
if self.class.apply_patch(ce_body, ce_diff, diff_type) != body_string
|
||||
SConfig.logger.error(
|
||||
"couldn't succesfully apply patch to get orig..."
|
||||
)
|
||||
next
|
||||
end
|
||||
|
||||
gzipped_diff = self.class.gzip(ce_diff)
|
||||
|
||||
ce_use_string = nil
|
||||
ce_gzipped = nil
|
||||
if gzipped_diff.length < ce_diff.length
|
||||
ce_gzipped = true
|
||||
ce_use_string = gzipped_diff
|
||||
else
|
||||
ce_gzipped = false
|
||||
ce_use_string = ce_diff
|
||||
end
|
||||
|
||||
# haven't found a smaller use_string
|
||||
if use_string.length < ce_use_string.length
|
||||
SConfig.logger.info(
|
||||
"Previous config was still smaller (#{use_string.length} vs" \
|
||||
" #{ce_use_string.length} bytes)"
|
||||
)
|
||||
next
|
||||
else
|
||||
SConfig.logger.info(
|
||||
"HLE (#{ce.id}) is good candidate: #{ce_use_string.length} bytes " \
|
||||
"(gz: #{ce_gzipped})"
|
||||
)
|
||||
end
|
||||
|
||||
good_ce = ce
|
||||
gzipped = ce_gzipped
|
||||
use_string = ce_use_string
|
||||
end
|
||||
else
|
||||
SConfig.logger.error("Invalid encoding detected, not storing diff")
|
||||
end
|
||||
|
||||
self.parent_log_entry = good_ce # or nil, if none found
|
||||
self.gzipped = gzipped
|
||||
|
||||
if use_string.length < self.class::KEEP_INTERNALLY_THRESHOLD
|
||||
self.resp_body = use_string
|
||||
SConfig.logger.info "Storing data interally"
|
||||
else
|
||||
self.blob_entry =
|
||||
Legacy::BlobEntry.create_from_blob(
|
||||
blob: use_string,
|
||||
opts: {
|
||||
dir_depth: 4
|
||||
}
|
||||
)
|
||||
blob_entry.inc_refcount
|
||||
SConfig.logger.info "Storing data in blob entry #{blob_entry.id}..."
|
||||
end
|
||||
|
||||
if response_body != body_string
|
||||
raise("internal error, response_body != body_string")
|
||||
end
|
||||
|
||||
stored_bytes = use_string.length
|
||||
total_bytes = body_string.length
|
||||
SConfig.logger.info(
|
||||
"Stored #{stored_bytes}/#{total_bytes} bytes" \
|
||||
" (#{(stored_bytes.to_f / total_bytes.to_f * 100.0).round(1)}\% of original)"
|
||||
)
|
||||
|
||||
response_body
|
||||
rescue StandardError
|
||||
blob_entry && blob_entry.dec_refcount
|
||||
raise
|
||||
end
|
||||
|
||||
class NoBEPathException < RuntimeError
|
||||
end
|
||||
|
||||
# -> string
|
||||
def response_body
|
||||
@response_body ||=
|
||||
begin
|
||||
return response_body_native if diff_type == "native"
|
||||
|
||||
our_string =
|
||||
if blob_entry
|
||||
path = blob_entry.ensure_file_path
|
||||
unless path
|
||||
raise NoBEPathException,
|
||||
"no path for blob entry " \
|
||||
"#{blob_entry_id} (HLE id: #{id}) (#{blob_entry.file_path})"
|
||||
end
|
||||
File.read(path)
|
||||
else
|
||||
resp_body
|
||||
end
|
||||
|
||||
our_string = self.class.gunzip(our_string) if gzipped
|
||||
|
||||
return nil if our_string.nil?
|
||||
|
||||
# our_string = our_string.force_encoding("UTF-8")
|
||||
|
||||
if parent_log_entry
|
||||
self.class.apply_patch(
|
||||
parent_log_entry.response_body,
|
||||
our_string,
|
||||
diff_type
|
||||
)
|
||||
else
|
||||
our_string
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def response_body_native
|
||||
raise unless diff_type == "native"
|
||||
return "" unless resp_body
|
||||
|
||||
# new:
|
||||
@response_body_native ||=
|
||||
self.native_blob_entry&.patched_value ||
|
||||
::FlatSstEntry.find_by_hex_key(resp_body)&.contents ||
|
||||
::LogStoreSstEntry.find_by_hex_key(resp_body)&.patched_value
|
||||
end
|
||||
|
||||
def self.encode_str(str)
|
||||
str.encode(
|
||||
Encoding.find("UTF-8"),
|
||||
invalid: :replace,
|
||||
undef: :replace,
|
||||
replace: ""
|
||||
)
|
||||
end
|
||||
|
||||
def self.gunzip(data)
|
||||
io = StringIO.new(data, "rb")
|
||||
Zlib::GzipReader.new(io).read
|
||||
end
|
||||
|
||||
def self.gzip(string)
|
||||
wio = StringIO.new("w")
|
||||
w_gz = Zlib::GzipWriter.new(wio)
|
||||
w_gz.write(string)
|
||||
w_gz.close
|
||||
wio.string
|
||||
end
|
||||
|
||||
def self.get_diff(old_bytes, new_bytes, diff_type)
|
||||
if diff_type == "text"
|
||||
return Diffy::Diff.new(old_bytes, new_bytes, diff: "-e").to_s
|
||||
end
|
||||
|
||||
raise("unknown diff type '#{diff_type}'") if diff_type != "binary"
|
||||
|
||||
tf_old = Tempfile.new("old-file")
|
||||
tf_new = Tempfile.new("new-file")
|
||||
tf_out = Tempfile.new("patch")
|
||||
files = [tf_old, tf_new, tf_out]
|
||||
begin
|
||||
files.each(&:binmode)
|
||||
tf_old.write(old_bytes)
|
||||
tf_new.write(new_bytes)
|
||||
files.each(&:close)
|
||||
if BSDiff.diff(tf_old.path, tf_new.path, tf_out.path)
|
||||
tf_out.open
|
||||
bytes = tf_out.read
|
||||
tf_out.close
|
||||
return bytes
|
||||
else
|
||||
return nil
|
||||
end
|
||||
ensure
|
||||
files.each(&:unlink)
|
||||
end
|
||||
end
|
||||
|
||||
def self.apply_patch(old_text, patch, diff_type)
|
||||
tf_orig = Tempfile.new("apply-patch", encoding: "ascii-8bit")
|
||||
tf_patch = Tempfile.new("apply-patch", encoding: "ascii-8bit")
|
||||
tf_out = Tempfile.new("applied-patch", encoding: "ascii-8bit")
|
||||
files = [tf_orig, tf_patch, tf_out]
|
||||
|
||||
begin
|
||||
tf_out.close
|
||||
tf_orig.write(old_text)
|
||||
tf_patch.write(patch)
|
||||
tf_orig.close
|
||||
tf_patch.close
|
||||
if diff_type == "text"
|
||||
`patch -e #{tf_orig.path} #{tf_patch.path} -o #{tf_out.path}`
|
||||
tf_out.open
|
||||
ret = tf_out.read
|
||||
tf_out.close
|
||||
ret
|
||||
elsif diff_type == "binary"
|
||||
if BSDiff.patch(tf_orig.path, tf_out.path, tf_patch.path)
|
||||
tf_out.open
|
||||
ret = tf_out.read
|
||||
tf_out.close
|
||||
ret
|
||||
end
|
||||
else
|
||||
raise("invalid diff type #{diff_type}")
|
||||
end
|
||||
ensure
|
||||
files.each(&:unlink)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,25 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# == Schema Information
|
||||
#
|
||||
# Table name: cache_http_log_entry_headers
|
||||
#
|
||||
# id :integer not null, primary key
|
||||
# headers :hstore not null
|
||||
# sha256 :binary not null
|
||||
# created_at :datetime
|
||||
# updated_at :datetime
|
||||
#
|
||||
|
||||
class Legacy::HttpLogEntryHeader < LegacyApplicationRecord
|
||||
self.table_name = "cache_http_log_entry_headers"
|
||||
|
||||
def self.find_or_create(headers:)
|
||||
temp = Legacy::HttpLogEntryHeader.new(headers: headers)
|
||||
sha256 = Digest::SHA256.digest(temp.headers.to_s)
|
||||
|
||||
Legacy::HttpLogEntryHeader.find_or_create_by!(sha256: sha256) do |c|
|
||||
c.headers = temp.headers
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,105 +0,0 @@
|
||||
class LegacyApplicationRecord < ActiveRecord::Base
|
||||
# self.primary_abstract_class = true
|
||||
self.abstract_class = true
|
||||
connects_to database: { writing: :legacy, reading: :legacy }
|
||||
end
|
||||
|
||||
module Legacy
|
||||
end
|
||||
|
||||
class Legacy::SConfig
|
||||
def self.data_dir
|
||||
if Rails.env.test?
|
||||
Rails.root.join "test/fixtures/legacy/s_config"
|
||||
else
|
||||
"/home/scraper/scraper_data_original/scraper_data"
|
||||
end
|
||||
end
|
||||
|
||||
def self.blob_data_dir
|
||||
File.join data_dir, "blobs"
|
||||
end
|
||||
|
||||
def self.e621_data_dir
|
||||
File.join data_dir, "e621"
|
||||
end
|
||||
|
||||
def self.fa_data_dir
|
||||
File.join data_dir, "fa"
|
||||
end
|
||||
|
||||
def self.ib_data_dir
|
||||
File.join data_dir, "ib"
|
||||
end
|
||||
|
||||
def self.blob_static_dir
|
||||
File.join blob_data_dir, "static"
|
||||
end
|
||||
|
||||
def self.e621_static_dir
|
||||
File.join e621_data_dir, "static"
|
||||
end
|
||||
|
||||
def self.fa_post_static_dir
|
||||
File.join fa_data_dir, "static/posts"
|
||||
end
|
||||
|
||||
def self.fa_icons_static_dir
|
||||
File.join fa_data_dir, "static/icons"
|
||||
end
|
||||
|
||||
def self.ib_post_static_dir
|
||||
File.join ib_data_dir, "static/posts"
|
||||
end
|
||||
|
||||
def self.e621_json_dir
|
||||
File.join e621_data_dir, "json"
|
||||
end
|
||||
|
||||
def self.fa_html_dir
|
||||
File.join fa_data_dir, "html"
|
||||
end
|
||||
|
||||
def self.fa_cookie_jar_dir
|
||||
File.join fa_data_dir, "cookies"
|
||||
end
|
||||
|
||||
def self.ib_logs_dir
|
||||
File.join ib_data_dir, "logs"
|
||||
end
|
||||
|
||||
def self.ib_cookie_jar_dir
|
||||
File.join ib_data_dir, "cookies"
|
||||
end
|
||||
|
||||
def self.http_logger_data_dir
|
||||
File.join data_dir, "http_logger"
|
||||
end
|
||||
|
||||
def self.logger
|
||||
@@logger ||=
|
||||
begin
|
||||
l = Logger.new(STDOUT)
|
||||
l.level = Logger::INFO
|
||||
l.datetime_format = "%Y-%m-%d %H:%M:%S"
|
||||
l.formatter =
|
||||
proc do |sev, datetime, _prog, msg|
|
||||
color =
|
||||
case sev
|
||||
when "INFO"
|
||||
:blue
|
||||
when "ERROR"
|
||||
:red
|
||||
when "DEBUG"
|
||||
:yellow
|
||||
else
|
||||
:white
|
||||
end
|
||||
|
||||
date_format = datetime.strftime("%Y-%m-%d %H:%M:%S")
|
||||
"[#{date_format}] #{sev.ljust(5).send(color)}: #{msg}\n"
|
||||
end
|
||||
l
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,5 +1,4 @@
|
||||
class ReduxApplicationRecord < ActiveRecord::Base
|
||||
self.abstract_class = true
|
||||
connects_to database: { writing: :redux, reading: :redux }
|
||||
logger.level = Logger::ERROR
|
||||
end
|
||||
|
||||
@@ -16,7 +16,6 @@ redux_prod: &redux_prod
|
||||
database: redux_prod
|
||||
username: scraper_redux
|
||||
password: pdkFLqRmQwPUPaDDC4pX
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
|
||||
redux_staging: &redux_staging
|
||||
@@ -34,26 +33,8 @@ redux_dev: &redux_dev
|
||||
database: postgres
|
||||
username: postgres
|
||||
password: postgres
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
|
||||
legacy_prod: &legacy_prod
|
||||
adapter: postgresql
|
||||
host: 10.166.33.171
|
||||
port: 5432
|
||||
database: legacy_prod
|
||||
username: scraper_redux
|
||||
password: pdkFLqRmQwPUPaDDC4pX
|
||||
migrations_paths: db/legacy_migrate
|
||||
database_tasks: false
|
||||
pool: 2
|
||||
|
||||
legacy_staging: &legacy_staging
|
||||
<<: *legacy_prod
|
||||
host: postgres
|
||||
username: scraper_redux_staging
|
||||
password: q6Jf8mXEUkAxdyHq1tUtCTPa1raX1QAT
|
||||
|
||||
local_redux_test: &local_redux_test
|
||||
adapter: postgresql
|
||||
host: db
|
||||
@@ -63,53 +44,23 @@ local_redux_test: &local_redux_test
|
||||
# password: pdkFLqRmQwPUPaDDC4pX
|
||||
username: postgres
|
||||
password: postgres
|
||||
migrations_paths: db/redux_migrate
|
||||
pool: 4
|
||||
|
||||
local_legacy_test: &local_legacy_test
|
||||
adapter: postgresql
|
||||
host: db
|
||||
port: 5432
|
||||
database: legacy_test
|
||||
# username: scraper_redux
|
||||
# password: pdkFLqRmQwPUPaDDC4pX
|
||||
username: postgres
|
||||
password: postgres
|
||||
migrations_paths: db/legacy_migrate
|
||||
pool: 4
|
||||
|
||||
development:
|
||||
redux:
|
||||
<<: *redux_dev
|
||||
# at the moment, no "real" legacy database. just fixtures.
|
||||
# legacy:
|
||||
# <<: *legacy_prod
|
||||
<<: *redux_dev
|
||||
|
||||
# Warning: The database defined as "test" will be erased and
|
||||
# re-generated from your development database when you run "rake".
|
||||
# Do not set this db to the same as development or production.
|
||||
test:
|
||||
redux:
|
||||
<<: *local_redux_test
|
||||
legacy:
|
||||
<<: *local_legacy_test
|
||||
<<: *local_redux_test
|
||||
|
||||
production:
|
||||
redux:
|
||||
<<: *redux_prod
|
||||
legacy:
|
||||
<<: *legacy_prod
|
||||
<<: *redux_prod
|
||||
|
||||
staging:
|
||||
redux:
|
||||
<<: *redux_staging
|
||||
legacy:
|
||||
<<: *legacy_staging
|
||||
<<: *redux_staging
|
||||
|
||||
worker:
|
||||
redux:
|
||||
<<: *redux_prod
|
||||
pool: 16
|
||||
legacy:
|
||||
<<: *legacy_prod
|
||||
pool: 16
|
||||
<<: *redux_prod
|
||||
pool: 16
|
||||
|
||||
@@ -1,398 +0,0 @@
|
||||
# This file is auto-generated from the current state of the database. Instead
|
||||
# of editing this file, please use the migrations feature of Active Record to
|
||||
# incrementally modify your database, and then regenerate this schema definition.
|
||||
#
|
||||
# This file is the source Rails uses to define your schema when running `bin/rails
|
||||
# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to
|
||||
# be faster and is potentially less error prone than running all of your
|
||||
# migrations from scratch. Old migrations may fail to apply correctly if those
|
||||
# migrations use external dependencies or application code.
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 0) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "hstore"
|
||||
enable_extension "intarray"
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
enable_extension "plpgsql"
|
||||
|
||||
create_table "blob_entries", id: :serial, force: :cascade do |t|
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.integer "file_size"
|
||||
t.integer "refcount"
|
||||
t.string "sha256", limit: 64
|
||||
t.integer "dir_depth", default: 2, null: false
|
||||
t.index ["sha256"], name: "index_blob_entries_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "cache_http_log_entries", id: :serial, force: :cascade do |t|
|
||||
t.string "scheme"
|
||||
t.string "host"
|
||||
t.string "path"
|
||||
t.string "query"
|
||||
t.integer "verb"
|
||||
t.integer "status"
|
||||
t.integer "response_time"
|
||||
t.string "content_type"
|
||||
t.integer "response_size"
|
||||
t.integer "parent_log_entry_id"
|
||||
t.integer "blob_entry_id"
|
||||
t.boolean "gzipped"
|
||||
t.datetime "requested_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.binary "resp_body"
|
||||
t.string "imported_from_file"
|
||||
t.integer "req_headers_id"
|
||||
t.integer "res_headers_id"
|
||||
t.integer "diff_type", default: 0
|
||||
t.binary "native_blob_entry_sha256"
|
||||
t.index ["imported_from_file"], name: "index_cache_http_log_entries_on_imported_from_file"
|
||||
t.index ["path", "host"], name: "index_cache_http_log_entries_on_path_and_host"
|
||||
t.index ["path"], name: "cache_http_log_entries_path_idx", opclass: :gist_trgm_ops, using: :gist
|
||||
t.index ["path"], name: "index_pattern_ops_on_hle_entry_path"
|
||||
t.index ["requested_at"], name: "index_cache_http_log_entries_on_requested_at"
|
||||
end
|
||||
|
||||
create_table "cache_http_log_entry_headers", id: :serial, force: :cascade do |t|
|
||||
t.hstore "headers", null: false
|
||||
t.binary "sha256", null: false
|
||||
t.datetime "created_at"
|
||||
t.datetime "updated_at"
|
||||
t.index ["sha256"], name: "index_cache_http_log_entry_headers_on_sha256", unique: true
|
||||
end
|
||||
|
||||
create_table "e621_posts", id: :serial, force: :cascade do |t|
|
||||
t.integer "e621_id", null: false
|
||||
t.string "md5", null: false
|
||||
t.string "sources"
|
||||
t.string "file_url", null: false
|
||||
t.string "file_ext", null: false
|
||||
t.string "description"
|
||||
t.integer "rating"
|
||||
t.integer "width"
|
||||
t.integer "height", null: false
|
||||
t.string "tags_string", null: false
|
||||
t.integer "status"
|
||||
t.integer "score"
|
||||
t.boolean "removed"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.string "artists"
|
||||
t.integer "e621_count"
|
||||
t.string "author"
|
||||
t.string "e621_status"
|
||||
t.integer "blob_entry_id"
|
||||
t.integer "imgsearch_entry_id"
|
||||
t.index ["blob_entry_id"], name: "index_e621_posts_on_blob_entry_id"
|
||||
t.index ["e621_id"], name: "index_e621_posts_on_e621_id", unique: true
|
||||
t.index ["imgsearch_entry_id"], name: "index_e621_posts_on_imgsearch_entry_id"
|
||||
t.index ["md5"], name: "index_e621_posts_on_md5"
|
||||
end
|
||||
|
||||
create_table "e621_taggings", id: :serial, force: :cascade do |t|
|
||||
t.integer "tag_id"
|
||||
t.integer "post_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["post_id"], name: "index_e621_taggings_on_post_id"
|
||||
t.index ["tag_id"], name: "index_e621_taggings_on_tag_id"
|
||||
end
|
||||
|
||||
create_table "e621_tags", id: :serial, force: :cascade do |t|
|
||||
t.string "value", null: false
|
||||
t.integer "e621_id"
|
||||
t.integer "type"
|
||||
t.integer "e621_count"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["value"], name: "index_e621_tags_on_value"
|
||||
end
|
||||
|
||||
create_table "fa_failed_users", id: :serial, force: :cascade do |t|
|
||||
t.string "url_name"
|
||||
t.datetime "created_at"
|
||||
t.datetime "updated_at"
|
||||
end
|
||||
|
||||
create_table "fa_post_descriptions", id: :serial, force: :cascade do |t|
|
||||
t.text "value", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "fa_posts", id: :serial, force: :cascade do |t|
|
||||
t.integer "fa_id", null: false
|
||||
t.string "creator_name"
|
||||
t.integer "creator_id"
|
||||
t.string "title"
|
||||
t.string "category"
|
||||
t.string "theme"
|
||||
t.string "species"
|
||||
t.string "gender"
|
||||
t.string "keywords"
|
||||
t.string "file_url"
|
||||
t.integer "blob_entry_id"
|
||||
t.string "old_files"
|
||||
t.integer "num_favorites"
|
||||
t.integer "num_comments"
|
||||
t.integer "num_views"
|
||||
t.datetime "posted_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.boolean "in_gallery"
|
||||
t.string "in_folders"
|
||||
t.integer "state", default: 0
|
||||
t.string "state_error"
|
||||
t.string "folder_name"
|
||||
t.string "gallery_page_logfile"
|
||||
t.string "submission_page_logfile"
|
||||
t.integer "submission_page_log_entry_id"
|
||||
t.integer "description_id"
|
||||
t.integer "imgsearch_entry_id"
|
||||
t.index ["blob_entry_id"], name: "index_fa_posts_on_blob_entry_id"
|
||||
t.index ["creator_id"], name: "index_fa_posts_on_creator_id"
|
||||
t.index ["creator_name"], name: "index_fa_posts_on_creator_name"
|
||||
t.index ["fa_id"], name: "index_fa_posts_on_fa_id", unique: true
|
||||
t.index ["imgsearch_entry_id"], name: "index_fa_posts_on_imgsearch_entry_id"
|
||||
end
|
||||
|
||||
create_table "fa_users", id: :serial, force: :cascade do |t|
|
||||
t.string "name", null: false
|
||||
t.string "full_name"
|
||||
t.string "artist_type"
|
||||
t.string "mood"
|
||||
t.text "profile_html"
|
||||
t.integer "num_pageviews"
|
||||
t.integer "num_submissions"
|
||||
t.integer "num_comments_recieved"
|
||||
t.integer "num_comments_given"
|
||||
t.integer "num_journals"
|
||||
t.integer "num_favorites"
|
||||
t.datetime "registered_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.string "url_name", null: false
|
||||
t.datetime "scanned_gallery"
|
||||
t.datetime "scanned_page"
|
||||
t.string "user_page_logfile"
|
||||
t.integer "user_page_log_entry_id"
|
||||
t.index ["name"], name: "index_fa_users_on_name", unique: true
|
||||
t.index ["scanned_gallery"], name: "index_fa_users_on_scanned_gallery"
|
||||
t.index ["url_name"], name: "index_fa_users_on_url_name", unique: true
|
||||
end
|
||||
|
||||
create_table "ib_posts", id: :serial, force: :cascade do |t|
|
||||
t.string "gallery_logfile"
|
||||
t.datetime "created_at"
|
||||
t.datetime "updated_at"
|
||||
end
|
||||
|
||||
create_table "ib_user", id: :serial, force: :cascade do |t|
|
||||
end
|
||||
|
||||
create_table "imgsearch_dbs", id: :serial, force: :cascade do |t|
|
||||
t.string "path", null: false
|
||||
t.string "name", null: false
|
||||
t.integer "hash_bits", null: false
|
||||
t.integer "max_error", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "lite_ad_hoc_posts", force: :cascade do |t|
|
||||
t.string "file_ext"
|
||||
t.string "file"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "lite_e621_posts", primary_key: "e621_id", force: :cascade do |t|
|
||||
t.string "e621_file_url"
|
||||
t.string "file_ext"
|
||||
t.integer "main_asset_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "lite_fa_posts", primary_key: "fa_id", force: :cascade do |t|
|
||||
t.string "file_ext"
|
||||
t.integer "main_asset_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.string "file_url"
|
||||
t.integer "creator_id"
|
||||
t.string "title"
|
||||
t.index ["creator_id"], name: "index_lite_fa_posts_on_creator_id"
|
||||
end
|
||||
|
||||
create_table "lite_fa_users", force: :cascade do |t|
|
||||
t.string "url_name", null: false
|
||||
t.index ["url_name"], name: "index_lite_fa_users_on_url_name"
|
||||
end
|
||||
|
||||
create_table "lite_media_file_fingerprints", force: :cascade do |t|
|
||||
t.integer "object_type", limit: 2, null: false
|
||||
t.integer "object_id", null: false
|
||||
t.bigint "fingerprints", array: true
|
||||
t.index ["object_type", "object_id"], name: "index_lite_media_file_fingerprints_on_object_type_and_object_id", unique: true
|
||||
end
|
||||
|
||||
create_table "lite_s3_objects", force: :cascade do |t|
|
||||
t.string "sha256"
|
||||
t.string "key"
|
||||
t.integer "size"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["key"], name: "index_lite_s3_objects_on_key", unique: true
|
||||
end
|
||||
|
||||
create_table "pghero_query_stats", id: :serial, force: :cascade do |t|
|
||||
t.text "database"
|
||||
t.text "user"
|
||||
t.text "query"
|
||||
t.bigint "query_hash"
|
||||
t.float "total_time"
|
||||
t.bigint "calls"
|
||||
t.datetime "captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "pghero_space_stats", id: :serial, force: :cascade do |t|
|
||||
t.text "database"
|
||||
t.text "schema"
|
||||
t.text "relation"
|
||||
t.bigint "size"
|
||||
t.datetime "captured_at"
|
||||
t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at"
|
||||
end
|
||||
|
||||
create_table "watch_jobs", id: :serial, force: :cascade do |t|
|
||||
t.string "execute_class"
|
||||
t.string "params"
|
||||
t.string "return"
|
||||
t.integer "state"
|
||||
t.datetime "execute_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.integer "enqueue_strategy", default: 0
|
||||
t.integer "redo_delay_seconds"
|
||||
t.integer "priority", default: 0
|
||||
t.string "http_class"
|
||||
t.index ["execute_at"], name: "index_watch_jobs_on_execute_at"
|
||||
t.index ["execute_class"], name: "index_watch_jobs_on_execute_class"
|
||||
t.index ["priority"], name: "index_watch_jobs_on_priority"
|
||||
t.index ["state"], name: "index_watch_jobs_on_state"
|
||||
end
|
||||
|
||||
create_table "watched_users", id: :serial, force: :cascade do |t|
|
||||
t.integer "user_id", null: false
|
||||
t.integer "watch_job_id", null: false
|
||||
t.datetime "scanned_user_gallery"
|
||||
t.datetime "scanned_user_page"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.integer "scan_delay"
|
||||
end
|
||||
|
||||
create_table "weasyl_attempted_posts", id: :serial, force: :cascade do |t|
|
||||
t.integer "weasyl_id", null: false
|
||||
t.integer "enum_type", null: false
|
||||
t.integer "status"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_attempted_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_descriptions", id: :serial, force: :cascade do |t|
|
||||
t.text "value", null: false
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "weasyl_joins_user_follows", id: :serial, force: :cascade do |t|
|
||||
t.integer "follower_id", null: false
|
||||
t.integer "followed_id", null: false
|
||||
t.index ["follower_id", "followed_id"], name: "index_weasyl_joins_user_follows_on_follower_id_and_followed_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_joins_user_friends", id: :serial, force: :cascade do |t|
|
||||
t.integer "a_id", null: false
|
||||
t.integer "b_id", null: false
|
||||
t.index ["a_id", "b_id"], name: "index_weasyl_joins_user_friends_on_a_id_and_b_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_medias", id: :serial, force: :cascade do |t|
|
||||
t.string "url", null: false
|
||||
t.integer "mediaid"
|
||||
t.integer "blob_entry_id"
|
||||
t.integer "status"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["mediaid"], name: "index_weasyl_medias_on_mediaid", unique: true
|
||||
t.index ["url"], name: "index_weasyl_medias_on_url", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_posts", id: :serial, force: :cascade do |t|
|
||||
t.integer "weasyl_id", null: false
|
||||
t.string "title", null: false
|
||||
t.integer "description_id"
|
||||
t.datetime "posted_at"
|
||||
t.string "tags", array: true
|
||||
t.integer "enum_type", null: false
|
||||
t.integer "enum_subtype"
|
||||
t.integer "enum_rating", null: false
|
||||
t.integer "num_comments"
|
||||
t.integer "num_views"
|
||||
t.integer "num_favorites"
|
||||
t.string "folder_name"
|
||||
t.integer "weasyl_folder_id"
|
||||
t.integer "owner_id", null: false
|
||||
t.integer "submission_media_id"
|
||||
t.datetime "full_scanned_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["enum_type", "weasyl_id"], name: "index_weasyl_posts_on_enum_type_and_weasyl_id", unique: true
|
||||
end
|
||||
|
||||
create_table "weasyl_users", id: :serial, force: :cascade do |t|
|
||||
t.string "full_name"
|
||||
t.string "login_name", null: false
|
||||
t.integer "description_id"
|
||||
t.integer "avatar_media_id"
|
||||
t.datetime "scanned_gallery_at"
|
||||
t.datetime "scanned_userpage_at"
|
||||
t.datetime "scanned_followers_following_at"
|
||||
t.integer "userid"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["login_name"], name: "index_weasyl_users_on_login_name", unique: true
|
||||
end
|
||||
|
||||
create_table "xtwitter_tweets", id: :serial, force: :cascade do |t|
|
||||
t.string "creator_screen_name", null: false
|
||||
t.integer "creator_user_id", null: false
|
||||
t.string "twitter_id", null: false
|
||||
t.json "attrs"
|
||||
t.string "media_uri"
|
||||
t.string "media_ext"
|
||||
t.integer "http_log_entry_page_id"
|
||||
t.integer "http_log_entry_media_id"
|
||||
t.integer "blob_entry_id"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
create_table "xtwitter_users", id: :serial, force: :cascade do |t|
|
||||
t.string "screen_name", null: false
|
||||
t.integer "user_id", null: false
|
||||
t.datetime "scanned_timeline_at"
|
||||
t.datetime "scanned_profile_at"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
end
|
||||
|
||||
end
|
||||
@@ -1,24 +0,0 @@
|
||||
# This migration comes from active_storage (originally 20190112182829)
|
||||
class AddServiceNameToActiveStorageBlobs < ActiveRecord::Migration[6.0]
|
||||
def up
|
||||
return unless table_exists?(:active_storage_blobs)
|
||||
|
||||
unless column_exists?(:active_storage_blobs, :service_name)
|
||||
add_column :active_storage_blobs, :service_name, :string
|
||||
|
||||
if configured_service = ActiveStorage::Blob.service.name
|
||||
ActiveStorage::Blob.unscoped.update_all(
|
||||
service_name: configured_service
|
||||
)
|
||||
end
|
||||
|
||||
change_column :active_storage_blobs, :service_name, :string, null: false
|
||||
end
|
||||
end
|
||||
|
||||
def down
|
||||
return unless table_exists?(:active_storage_blobs)
|
||||
|
||||
remove_column :active_storage_blobs, :service_name
|
||||
end
|
||||
end
|
||||
@@ -1,36 +0,0 @@
|
||||
# This migration comes from active_storage (originally 20191206030411)
|
||||
class CreateActiveStorageVariantRecords < ActiveRecord::Migration[6.0]
|
||||
def change
|
||||
return unless table_exists?(:active_storage_blobs)
|
||||
|
||||
# Use Active Record's configured type for primary key
|
||||
create_table :active_storage_variant_records,
|
||||
id: primary_key_type,
|
||||
if_not_exists: true do |t|
|
||||
t.belongs_to :blob,
|
||||
null: false,
|
||||
index: false,
|
||||
type: blobs_primary_key_type
|
||||
t.string :variation_digest, null: false
|
||||
|
||||
t.index %i[blob_id variation_digest],
|
||||
name: "index_active_storage_variant_records_uniqueness",
|
||||
unique: true
|
||||
t.foreign_key :active_storage_blobs, column: :blob_id
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def primary_key_type
|
||||
config = Rails.configuration.generators
|
||||
config.options[config.orm][:primary_key_type] || :primary_key
|
||||
end
|
||||
|
||||
def blobs_primary_key_type
|
||||
pkey_name = connection.primary_key(:active_storage_blobs)
|
||||
pkey_column =
|
||||
connection.columns(:active_storage_blobs).find { |c| c.name == pkey_name }
|
||||
pkey_column.bigint? ? :bigint : pkey_column.type
|
||||
end
|
||||
end
|
||||
@@ -1,8 +0,0 @@
|
||||
# This migration comes from active_storage (originally 20211119233751)
|
||||
class RemoveNotNullOnActiveStorageBlobsChecksum < ActiveRecord::Migration[6.0]
|
||||
def change
|
||||
return unless table_exists?(:active_storage_blobs)
|
||||
|
||||
change_column_null(:active_storage_blobs, :checksum, true)
|
||||
end
|
||||
end
|
||||
@@ -1,17 +1,6 @@
|
||||
namespace :e621 do
|
||||
desc "import legacy e621 posts"
|
||||
task :import_legacy => :environment do |t, args|
|
||||
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i
|
||||
forks = args[:forks]&.to_i || ENV["forks"]&.to_i
|
||||
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i
|
||||
|
||||
LegacyImport::E621LegacyPostImporter.
|
||||
new(batch_size: batch_size, forks: forks, start_at: start_at).
|
||||
run
|
||||
end
|
||||
|
||||
desc "import e621 data from csv"
|
||||
task :import_csv => :environment do |t, args|
|
||||
task import_csv: :environment do |t, args|
|
||||
start_at = ENV["start_at"]&.to_i
|
||||
limit = ENV["limit"]&.to_i
|
||||
csv_path = ENV["csv"] || raise("must supply `csv`")
|
||||
@@ -24,9 +13,7 @@ namespace :e621 do
|
||||
end
|
||||
|
||||
desc "run a single e621 posts index job"
|
||||
task :posts_index_job => :environment do
|
||||
Domain::E621::Job::PostsIndexJob.
|
||||
set(priority: -10).
|
||||
perform_later({})
|
||||
task posts_index_job: :environment do
|
||||
Domain::E621::Job::PostsIndexJob.set(priority: -10).perform_later({})
|
||||
end
|
||||
end
|
||||
|
||||
66
rake/fa.rake
66
rake/fa.rake
@@ -11,7 +11,7 @@ namespace :fa do
|
||||
parser =
|
||||
Domain::Fa::Parser::Page.new(
|
||||
page_log_entry.response.contents,
|
||||
require_logged_in: false
|
||||
require_logged_in: false,
|
||||
)
|
||||
unless parser.probably_user_page?
|
||||
# Rails.logger.error("user #{user.id} / #{user.url_name} page #{page_log_entry.id} is not a user page")
|
||||
@@ -24,7 +24,7 @@ namespace :fa do
|
||||
user.save!
|
||||
puts ""
|
||||
Rails.logger.info(
|
||||
"updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}"
|
||||
"updated #{user.id} / #{user.url_name} - #{old_name} -> #{new_name}",
|
||||
)
|
||||
else
|
||||
print "."
|
||||
@@ -55,7 +55,7 @@ namespace :fa do
|
||||
"number of posts the user has favorited, as per the user page",
|
||||
num_pageviews:
|
||||
"number of pageviews of the user's page, as per the user page",
|
||||
registered_at: "when the account was registered, as per the user page"
|
||||
registered_at: "when the account was registered, as per the user page",
|
||||
}
|
||||
model_ids = Domain::Fa::User.order(url_name: :asc).pluck(:id)
|
||||
File.open(out_file, "wt") do |file|
|
||||
@@ -89,7 +89,7 @@ namespace :fa do
|
||||
reverse_scan_holes: false,
|
||||
start_at: start_at,
|
||||
low_water_mark: low_water_mark,
|
||||
high_water_mark: high_water_mark
|
||||
high_water_mark: high_water_mark,
|
||||
)
|
||||
|
||||
loop { sleep poll_duration if enqueuer.run_once == :sleep }
|
||||
@@ -108,7 +108,7 @@ namespace :fa do
|
||||
reverse_scan_holes: true,
|
||||
start_at: start_at,
|
||||
low_water_mark: low_water_mark,
|
||||
high_water_mark: high_water_mark
|
||||
high_water_mark: high_water_mark,
|
||||
)
|
||||
|
||||
loop { sleep poll_duration if enqueuer.run_once == :sleep }
|
||||
@@ -125,7 +125,7 @@ namespace :fa do
|
||||
Domain::Fa::UserEnqueuer.new(
|
||||
start_at: start_at,
|
||||
low_water_mark: low_water_mark,
|
||||
high_water_mark: high_water_mark
|
||||
high_water_mark: high_water_mark,
|
||||
)
|
||||
|
||||
loop { sleep poll_duration if enqueuer.run_once == :sleep }
|
||||
@@ -158,7 +158,7 @@ namespace :fa do
|
||||
total: total,
|
||||
format: "%t: %c/%C %B %p%% %a %e",
|
||||
output: $stderr,
|
||||
throttle_rate: 0.2
|
||||
throttle_rate: 0.2,
|
||||
)
|
||||
|
||||
# make stdout unbuffered
|
||||
@@ -175,24 +175,11 @@ namespace :fa do
|
||||
$stdout.flush
|
||||
end
|
||||
|
||||
desc "Import existing FA posts"
|
||||
task :import_existing, [:start_at] => [:environment] do |t, args|
|
||||
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i
|
||||
forks = args[:forks]&.to_i || ENV["forks"]&.to_i
|
||||
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i
|
||||
|
||||
LegacyImport::FaPostImporter.new(
|
||||
batch_size: batch_size,
|
||||
forks: forks,
|
||||
start_at: start_at
|
||||
).run
|
||||
end
|
||||
|
||||
desc "run a single browse page job"
|
||||
task browse_page_job: %i[set_logger_stdout environment] do
|
||||
Domain::Fa::Job::BrowsePageJob.set(
|
||||
priority: -20,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({})
|
||||
puts "#{Time.now} - browse_page_job - Domain::Fa::Job::BrowsePageJob"
|
||||
end
|
||||
@@ -201,7 +188,7 @@ namespace :fa do
|
||||
task home_page_job: %i[set_logger_stdout environment] do
|
||||
Domain::Fa::Job::HomePageJob.set(
|
||||
priority: -20,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({})
|
||||
puts "#{Time.now} - home_page_job - Domain::Fa::Job::HomePageJob"
|
||||
end
|
||||
@@ -211,7 +198,7 @@ namespace :fa do
|
||||
fa_id = ENV["fa_id"] || raise("must provide fa_id")
|
||||
Domain::Fa::Job::ScanPostJob.set(
|
||||
priority: -10,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({ fa_id: fa_id, force_scan: true })
|
||||
end
|
||||
|
||||
@@ -241,7 +228,7 @@ namespace :fa do
|
||||
for fa_id in (fa_id_start..fa_id_end)
|
||||
Domain::Fa::Job::ScanPostJob.set(
|
||||
priority: -10,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({ fa_id: fa_id })
|
||||
end
|
||||
end
|
||||
@@ -251,7 +238,7 @@ namespace :fa do
|
||||
url_name = ENV["url_name"] || raise("must provide url_name")
|
||||
Domain::Fa::Job::UserPageJob.set(
|
||||
priority: -10,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({ url_name: url_name, force_scan: true })
|
||||
end
|
||||
|
||||
@@ -260,7 +247,7 @@ namespace :fa do
|
||||
url_name = ENV["url_name"] || raise("must provide url_name")
|
||||
Domain::Fa::Job::UserGalleryJob.set(
|
||||
priority: -10,
|
||||
queue: "manual"
|
||||
queue: "manual",
|
||||
).perform_later({ url_name: url_name, force_scan: true })
|
||||
end
|
||||
|
||||
@@ -283,13 +270,13 @@ namespace :fa do
|
||||
post.save!
|
||||
|
||||
Domain::Fa::Job::UserPageJob.set(priority: -10).perform_later(
|
||||
{ user: real_user }
|
||||
{ user: real_user },
|
||||
)
|
||||
Domain::Fa::Job::UserGalleryJob.set(priority: -10).perform_later(
|
||||
{ user: real_user }
|
||||
{ user: real_user },
|
||||
)
|
||||
Domain::Fa::Job::ScanPostJob.set(priority: -10).perform_later(
|
||||
{ post: post }
|
||||
{ post: post },
|
||||
)
|
||||
end
|
||||
end
|
||||
@@ -302,7 +289,7 @@ namespace :fa do
|
||||
tables =
|
||||
ENV["tables"] ||
|
||||
raise(
|
||||
"'tables' required (all, #{Domain::Fa::SqliteExporter::TABLES.keys.join(", ")})"
|
||||
"'tables' required (all, #{Domain::Fa::SqliteExporter::TABLES.keys.join(", ")})",
|
||||
)
|
||||
tables = tables.split(",").map(&:to_sym)
|
||||
|
||||
@@ -312,23 +299,4 @@ namespace :fa do
|
||||
exporter.run
|
||||
exporter.end_profiling! if profile
|
||||
end
|
||||
|
||||
task fix_fa_user_avatars: %i[environment set_logger_stdout] do
|
||||
url_name = ENV["url_name"]
|
||||
if url_name
|
||||
start_at = 0
|
||||
limit = 1
|
||||
else
|
||||
start_at =
|
||||
ENV["start_at"]&.to_i || raise("need start_at (user avatar id)")
|
||||
limit = ENV["limit"]&.to_i
|
||||
end
|
||||
job =
|
||||
Domain::Fa::UserAvatarFixer.new(
|
||||
start_at: start_at,
|
||||
limit: limit,
|
||||
url_name: url_name
|
||||
)
|
||||
job.run
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
namespace :log_entry do
|
||||
desc "Fix up entries that have '' contents but shouldn't"
|
||||
task :fix_up_empty_response_contents => [:environment] do
|
||||
query = ::HttpLogEntry.where(response_sha256: Digest::SHA256.digest("")).limit(100)
|
||||
query.find_each do |model|
|
||||
puts "uri: #{model.uri_str}"
|
||||
end
|
||||
end
|
||||
|
||||
desc "Find an HttpLogEntry that is missing its response body, ignoring those with a missing blob entry file"
|
||||
task :find_missing_response_body_ignore_file => [:environment] do
|
||||
Legacy::HttpLogEntry.where(
|
||||
"resp_body is not null or blob_entry_id is not null"
|
||||
).find_in_batches do |batch|
|
||||
id_to_legacy = batch.map { |l| [l.id, l] }.to_h
|
||||
legacy_ids = batch.map(&:id)
|
||||
log_ids = HttpLogEntry.select(:id).where(id: legacy_ids).map(&:id).to_a
|
||||
|
||||
missing_ids = legacy_ids - log_ids
|
||||
|
||||
# ignore the ones which have a missing blob entry
|
||||
missing_ids.filter! do |id|
|
||||
model = id_to_legacy[id]
|
||||
model.response_body
|
||||
true
|
||||
rescue Legacy::HttpLogEntry::NoBEPathException
|
||||
false
|
||||
rescue
|
||||
true
|
||||
end
|
||||
|
||||
if missing_ids.any?
|
||||
puts "Found missing: #{missing_ids}"
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
desc "Bulk import Legacy::HLE -> ::HLE"
|
||||
task :http_log_entry_bulk_importer, [:batch_size, :cache_size, :start_at, :finish_at] => [:environment] do |t, args|
|
||||
batch_size = args[:batch_size]&.to_i || ENV["batch_size"]&.to_i || 8192
|
||||
cache_size = args[:cache_size]&.to_i || ENV["cache_size"]&.to_i || 6
|
||||
start_at = args[:start_at]&.to_i || ENV["start_at"]&.to_i || 0
|
||||
finish_at = args[:finish_at]&.to_i || ENV["finish_at"]&.to_i || nil
|
||||
|
||||
LegacyImport::HttpLogEntryBulkImporter.
|
||||
new(batch_size, cache_size, start_at, finish_at).
|
||||
run
|
||||
end
|
||||
|
||||
task :legacy_http_log_entry_native_blob_entry => [:environment] do
|
||||
batch_size = ENV["batch_size"]&.to_i || 4000
|
||||
parallelism = ENV["parallelism"]&.to_i || 8
|
||||
|
||||
query = Legacy::HttpLogEntry.
|
||||
where(diff_type: :native).
|
||||
where(native_blob_entry_sha256: nil).
|
||||
where("resp_body is not null")
|
||||
|
||||
start_time = Time.now
|
||||
progress = 0
|
||||
|
||||
query.find_in_batches(batch_size: batch_size, start: 0, finish: 10) do |batch|
|
||||
ForkFuture.parallel_each(parallelism, batch) do |http_log_entry|
|
||||
http_log_entry.update_columns(
|
||||
native_blob_entry_sha256: HexUtil.hex2bin(http_log_entry.resp_body),
|
||||
)
|
||||
end
|
||||
|
||||
progress += batch.size
|
||||
rate = progress.to_f / (Time.now - start_time)
|
||||
puts "finish batch, last id #{batch.last&.id} - #{progress} - #{rate.round(1)} / second"
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,35 +0,0 @@
|
||||
require "rails_helper"
|
||||
|
||||
describe LegacyImport::AdaptiveCache do
|
||||
it "works" do
|
||||
cache = LegacyImport::AdaptiveCache.new(4, 1.0, 0.0)
|
||||
refute cache.at_capacity?
|
||||
expect(cache.candidates).to eq([])
|
||||
|
||||
# works even when the candidate doesn't exist
|
||||
cache.reward 1
|
||||
expect(cache.candidates).to eq([])
|
||||
|
||||
cache.insert :a, "a"
|
||||
cache.insert :b, "b"
|
||||
cache.insert :c, "c"
|
||||
cache.insert :d, "d"
|
||||
assert cache.at_capacity?
|
||||
expect(cache.candidates).to eq(%w[d c b a])
|
||||
|
||||
5.times { cache.reward :a }
|
||||
3.times { cache.reward :b }
|
||||
1.times { cache.reward :c }
|
||||
expect(cache.candidates).to eq(%w[a b c d])
|
||||
expect(cache.scores).to eq([5.0, 3.0, 1.0, 0.0])
|
||||
|
||||
3.times { cache.reward :c } # 1 => 4
|
||||
expect(cache.candidates).to eq(%w[a c b d])
|
||||
expect(cache.scores).to eq([5.0, 4.0, 3.0, 0.0])
|
||||
|
||||
# new 'e' should bump off 'd' which has a 0 score
|
||||
cache.insert :e, "e"
|
||||
expect(cache.candidates).to eq(%w[a c b e])
|
||||
expect(cache.scores).to eq([5.0, 4.0, 3.0, 0.0])
|
||||
end
|
||||
end
|
||||
@@ -1,20 +0,0 @@
|
||||
#!/usr/bin/env fish
|
||||
|
||||
set SESSION migration
|
||||
|
||||
set NUM_SHARDS $argv[1]
|
||||
set START_AT $argv[2]
|
||||
|
||||
tmux new-session -d -s $SESSION
|
||||
|
||||
for i in (seq 2 $NUM_SHARDS)
|
||||
tmux select-layout tiled
|
||||
tmux split-window -t $SESSION:0.0 -h -d
|
||||
end
|
||||
tmux select-layout tiled
|
||||
|
||||
for i in (seq 0 $NUM_SHARDS)
|
||||
tmux send-keys -t $SESSION:0.$i 'bin/rake migrate_legacy_http_entries[' $START_AT ', ' $NUM_SHARDS ', ' $i ']' C-m
|
||||
end
|
||||
|
||||
tmux attach-session -t $SESSION
|
||||
32
test/fixtures/legacy/blob_entries.yml
vendored
32
test/fixtures/legacy/blob_entries.yml
vendored
@@ -1,32 +0,0 @@
|
||||
---
|
||||
id_1370198:
|
||||
id: 1370198
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-02-27 12:00:51.488220000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-02-27 12:00:51.488220000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-02-27 12:00:51.488220000 Z
|
||||
zone: *1
|
||||
time: 2016-02-27 12:00:51.488220000 Z
|
||||
file_size: 313065
|
||||
refcount: 1
|
||||
sha256: 5ed3a0400ac50f721123c7a8c638da8b19bf563f8e880f9abb36dcb38395bc82
|
||||
dir_depth: 2
|
||||
|
||||
id_8663902:
|
||||
id: 8663902
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-11-26 22:09:59.707602000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-11-26 22:09:59.707602000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-11-26 22:09:59.707602000 Z
|
||||
zone: *1
|
||||
time: 2016-11-26 22:09:59.707602000 Z
|
||||
file_size: 6936
|
||||
refcount: 2
|
||||
sha256: 41f8daf7772d11f80afe56b742087a2d1ab372e08b69e1284be4fefec2ad0c7f
|
||||
dir_depth: 4
|
||||
50
test/fixtures/legacy/fa/post_descriptions.yml
vendored
50
test/fixtures/legacy/fa/post_descriptions.yml
vendored
@@ -1,50 +0,0 @@
|
||||
---
|
||||
id_11551023:
|
||||
id: 11551023
|
||||
value: "<a href=\"/user/kenny-mccormick/\"><img class=\"avatar\" alt=\"kenny-mccormick\"
|
||||
src=\"//a.facdn.net/1485168311/kenny-mccormick.gif\"></a>\n <br><br>\n
|
||||
\ [copied from my DA page]<br>\r\n<br>\r\nLike last time,
|
||||
mid week posts are just being dedicated to images that come from the south park
|
||||
stories, starting with a particular fav of mine from chapter 2.<br>\r\n<br>\r\nIt
|
||||
was very fun to design the outfit for Kenny, though it's very different from his
|
||||
show attire. I think he looks really cute, tempted to make the design part of another
|
||||
character at some point, it's just so cute <3<br>\r\n<br>\r\n-<br>\r\n<br>\r\nImage
|
||||
was made by Tato and colored by me.<br>\r\n<br>\r\nWant to support this content?<br>\r\n<a
|
||||
href=\"https://www.patreon.com/KennyCrusader\" title=\"https://www.patreon.com/KennyCrusader\"
|
||||
class=\"auto_link\">https://www.patreon.com/KennyCrusader</a>"
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:22.199757000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2018-02-01 07:50:22.199757000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:22.199757000 Z
|
||||
zone: *1
|
||||
time: 2018-02-01 07:50:22.199757000 Z
|
||||
|
||||
id_144:
|
||||
id: 144
|
||||
value: "\n<a href=\"/user/meesh/\"><img class=\"avatar\" alt=\"meesh\" src=\"//a.facdn.net/1424255659/meesh.gif\"></a>\n<br><br>\n<a
|
||||
href=\"http://meesh.pandachan.org/massive-art-collections/\" title=\"http://meesh.pandachan.org/massive-art-collections/\"
|
||||
class=\"auto_link auto_link_shortened\">http://meesh.pandachan.org/massive-.....t-collections/</a><br>\n<br>\nMy
|
||||
latest Art Pack is here! And it’s a doozy. It includes:<br>\n<br>\n-95% explicit
|
||||
adult content, naturally. Gay, straight and some in-between<br>\n<br>\n-143 unique
|
||||
sketch commissions, with a handful that have special edits. 56 of which have never
|
||||
been posted to my account<br>\n<br>\n-69 sketches that never saw the light of day
|
||||
outside of my weekly streams, or not at all<br>\n<br>\n-27 unique Tier 1 and Tier
|
||||
2 commissions. Including Dog Pound, Pills Chronicles and others, with preliminary
|
||||
stages such as sketches, concepts, and inks<br>\n<br>\n-High resolution and resized
|
||||
versions of every single sketch commission, Tier 1 and Tier 2. All personal sketches
|
||||
are in hi-res or have a hi-res version included<br>\n<br>\n-590 image files in total<br>\n<br>\nIt
|
||||
all can be yours for just $10! You can purchase it right here:<a href=\"http://meesh.pandachan.org/massive-art-collections/\"
|
||||
title=\"http://meesh.pandachan.org/massive-art-collections/\" class=\"auto_link
|
||||
auto_link_shortened\">http://meesh.pandachan.org/massive-.....t-collections/</a> "
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-12-22 17:06:09.549412000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-12-22 17:06:09.549412000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-12-22 17:06:09.549412000 Z
|
||||
zone: *1
|
||||
time: 2016-12-22 17:06:09.549412000 Z
|
||||
273
test/fixtures/legacy/fa/posts.yml
vendored
273
test/fixtures/legacy/fa/posts.yml
vendored
@@ -1,273 +0,0 @@
|
||||
---
|
||||
id_13950325:
|
||||
id: 13950325
|
||||
fa_id: 25793413
|
||||
creator_name: Kenny-Mccormick
|
||||
creator_id:
|
||||
title: "[Commissioned] [Collab] Little Kenny - By Tato"
|
||||
category: Artwork (Digital)
|
||||
theme: Baby fur
|
||||
species: Unspecified / Any
|
||||
gender: Male
|
||||
keywords:
|
||||
- south
|
||||
- park
|
||||
- kenny
|
||||
- mccormick
|
||||
- tato
|
||||
- age
|
||||
- regression
|
||||
- diaper
|
||||
file_url: "//d.facdn.net/art/kenny-mccormick/1513691072/1513691072.kenny-mccormick_kenny_s_new_outfitf.png"
|
||||
blob_entry_id:
|
||||
old_files: []
|
||||
num_favorites: 25
|
||||
num_comments: 4
|
||||
num_views: 263
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:22.177857000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2018-02-01 07:50:22.177857000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:22.201057000 Z
|
||||
zone: *1
|
||||
time: 2018-02-01 07:50:22.201057000 Z
|
||||
in_gallery:
|
||||
in_folders: []
|
||||
state: scanned_submission
|
||||
state_error:
|
||||
folder_name:
|
||||
gallery_page_logfile:
|
||||
submission_page_logfile:
|
||||
submission_page_log_entry_id: 19252126
|
||||
description_id: 11551023
|
||||
imgsearch_entry_id:
|
||||
|
||||
id_13950327:
|
||||
id: 13950327
|
||||
fa_id: 25793411
|
||||
creator_name:
|
||||
creator_id:
|
||||
title:
|
||||
category:
|
||||
theme:
|
||||
species:
|
||||
gender:
|
||||
keywords: []
|
||||
file_url:
|
||||
blob_entry_id:
|
||||
old_files: []
|
||||
num_favorites:
|
||||
num_comments:
|
||||
num_views:
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:23.214173000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2018-02-01 07:50:23.214173000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-02-01 07:50:23.214173000 Z
|
||||
zone: *1
|
||||
time: 2018-02-01 07:50:23.214173000 Z
|
||||
in_gallery:
|
||||
in_folders: []
|
||||
state: scan_error
|
||||
state_error: post removed
|
||||
folder_name:
|
||||
gallery_page_logfile:
|
||||
submission_page_logfile:
|
||||
submission_page_log_entry_id:
|
||||
description_id:
|
||||
imgsearch_entry_id:
|
||||
|
||||
id_4936259:
|
||||
id: 4936259
|
||||
fa_id: 7126769
|
||||
creator_name: Rodrick-Dragon
|
||||
creator_id: 27392
|
||||
title: Profile ID
|
||||
category: Artwork (Digital)
|
||||
theme: Doodle
|
||||
species: Dragon (Other)
|
||||
gender: Male
|
||||
keywords: []
|
||||
file_url: "//d.facdn.net/art/rodrick-dragon/1325297838/1325297838.rodrick-dragon_bio.jpg"
|
||||
blob_entry_id: 11923806
|
||||
old_files: []
|
||||
num_favorites: 5
|
||||
num_comments: 3
|
||||
num_views: 30
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-03-09 12:21:45.224977000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-03-09 12:21:45.224977000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-01-29 01:14:47.165636000 Z
|
||||
zone: *1
|
||||
time: 2018-01-29 01:14:47.165636000 Z
|
||||
in_gallery:
|
||||
in_folders: []
|
||||
state: have_static
|
||||
state_error:
|
||||
folder_name: Scraps
|
||||
gallery_page_logfile: listing_pages/rodrick-dragon/1457526104-Scraps_1.html
|
||||
submission_page_logfile:
|
||||
submission_page_log_entry_id: 6026946
|
||||
description_id: 4858488
|
||||
imgsearch_entry_id: 6310759
|
||||
|
||||
id_3234144:
|
||||
id: 3234144
|
||||
fa_id: 8489215
|
||||
creator_name: LokiLover
|
||||
creator_id: 14101
|
||||
title: Scribbles with Kier
|
||||
category: Artwork (Digital)
|
||||
theme: Doodle
|
||||
species: Unspecified / Any
|
||||
gender: Any
|
||||
keywords:
|
||||
- doodles
|
||||
- OC
|
||||
- opencanvas
|
||||
- goofy
|
||||
- silly
|
||||
- chubby
|
||||
- things
|
||||
- dinosaurs
|
||||
- cows
|
||||
- kitties
|
||||
- spider
|
||||
- gremlins
|
||||
- chibi
|
||||
- super
|
||||
- sexy
|
||||
- cheerleader
|
||||
file_url: "/full/8489215/"
|
||||
blob_entry_id: 6877172
|
||||
old_files: []
|
||||
num_favorites: 5
|
||||
num_comments: 0
|
||||
num_views: 152
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-03-04 20:41:26.393592000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-03-04 20:41:26.393592000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-12-23 03:56:27.425752000 Z
|
||||
zone: *1
|
||||
time: 2016-12-23 03:56:27.425752000 Z
|
||||
in_gallery:
|
||||
in_folders: []
|
||||
state: have_static
|
||||
state_error:
|
||||
folder_name: Gallery
|
||||
gallery_page_logfile: listing_pages/lokilover/1476614903-Gallery_2.html
|
||||
submission_page_logfile: submissions/lokilover/1463959274-8489215.html
|
||||
submission_page_log_entry_id:
|
||||
description_id: 3173583
|
||||
imgsearch_entry_id:
|
||||
|
||||
id_10117853:
|
||||
id: 10117853
|
||||
fa_id: 21826851
|
||||
creator_name: Drake_Ergenthal
|
||||
creator_id: 106983
|
||||
title: |-
|
||||
Font size adjustment: smallerlarger
|
||||
|
||||
Locust vs. Nature
|
||||
category: Story
|
||||
theme: All
|
||||
species:
|
||||
gender:
|
||||
keywords:
|
||||
- Locust
|
||||
- Grenadier
|
||||
- Hunter
|
||||
- Elite
|
||||
- Savage
|
||||
- Kantus
|
||||
- Miner
|
||||
- quicksand
|
||||
- Gears
|
||||
- of
|
||||
- War
|
||||
file_url: "//d.facdn.net/art/drakeergenthal/stories/1480188610/1480188610.drakeergenthal_gow_fanfic.txt"
|
||||
blob_entry_id: 8663902
|
||||
old_files: []
|
||||
num_favorites: 1
|
||||
num_comments: 0
|
||||
num_views: 11
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-11-26 19:44:43.419488000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-11-26 19:44:43.419488000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-12-23 20:32:59.798452000 Z
|
||||
zone: *1
|
||||
time: 2016-12-23 20:32:59.798452000 Z
|
||||
in_gallery:
|
||||
in_folders: []
|
||||
state: have_static
|
||||
state_error: guessing exists
|
||||
folder_name:
|
||||
gallery_page_logfile:
|
||||
submission_page_logfile:
|
||||
submission_page_log_entry_id: 1449
|
||||
description_id: 8039358
|
||||
imgsearch_entry_id:
|
||||
|
||||
fa_id_19177819:
|
||||
id: 1
|
||||
fa_id: 19177819
|
||||
creator_name: Meesh
|
||||
creator_id: 1
|
||||
title: MASSIVE ART PACK 6 - Available now!
|
||||
category: Artwork (Digital)
|
||||
theme: All
|
||||
species: Unspecified / Any
|
||||
gender: Any
|
||||
keywords:
|
||||
- meesh
|
||||
- nsfw
|
||||
- art
|
||||
- pack
|
||||
- adult
|
||||
- boner
|
||||
- touching
|
||||
file_url: "//d.facdn.net/art/meesh/1456189705/1456189705.meesh_production5.png"
|
||||
blob_entry_id: 1370198
|
||||
old_files: []
|
||||
num_favorites: 8
|
||||
num_comments: 2
|
||||
num_views: 904
|
||||
posted_at:
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-02-28 00:26:18.966788000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-02-28 00:26:18.966788000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-01-27 23:26:36.605202000 Z
|
||||
zone: *1
|
||||
time: 2018-01-27 23:26:36.605202000 Z
|
||||
in_gallery: true
|
||||
in_folders: []
|
||||
state: have_static
|
||||
state_error:
|
||||
folder_name: Main Gallery
|
||||
gallery_page_logfile: listing_pages/meesh/1480115720-MainGallery_1.html
|
||||
submission_page_logfile:
|
||||
submission_page_log_entry_id:
|
||||
description_id: 144
|
||||
imgsearch_entry_id: 154
|
||||
80
test/fixtures/legacy/fa/users.yml
vendored
80
test/fixtures/legacy/fa/users.yml
vendored
@@ -1,80 +0,0 @@
|
||||
---
|
||||
id_1:
|
||||
id: 1
|
||||
name: Meesh
|
||||
full_name: Meesh
|
||||
artist_type: PrOn Artist
|
||||
mood: optimistic
|
||||
profile_html: "\n<b>Full
|
||||
Name:</b> Meesh<br>\n<b>Artist
|
||||
Type:</b> PrOn Artist<br>\n<b>Registered
|
||||
since:</b> Dec 11th, 2005 11:28<br>\n<b>Current
|
||||
mood:</b> optimistic<br>\n<b>Artist
|
||||
Profile:</b><br>\n<div
|
||||
class=\"bbcode bbcode_center\">Male | 28 | Housecat | Single | Straight</div>\n<br>\r\n<br>\r\n<a
|
||||
href=\"/user/patreon\" class=\"iconusername\"><img src=\"//a.facdn.net/20170212/patreon.gif\"
|
||||
align=\"middle\" title=\"patreon\" alt=\"patreon\"></a> I have a Patreon - Early
|
||||
access for comics starts at $6/month! <a href=\"http://www.patreon.com/meesh\" title=\"http://www.patreon.com/meesh\"
|
||||
class=\"auto_link\">www.patreon.com/meesh</a><br>\r\n<br>\r\n<span class=\"bbcode\"
|
||||
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">BUSINESS INQUIRIES:
|
||||
MEESHYMEESH@GMAIL.COM</strong></span><br>\r\n<a href=\"/user/oregonfurs\" class=\"iconusername\"><img
|
||||
src=\"//a.facdn.net/20170212/oregonfurs.gif\" align=\"middle\" title=\"oregonfurs\"
|
||||
alt=\"oregonfurs\">oregonfurs</a><br>\r\n<strong class=\"bbcode bbcode_b\">MY
|
||||
STORE:</strong> <a href=\"http://www.meesh.pandachan.org\" title=\"http://www.meesh.pandachan.org\"
|
||||
class=\"auto_link\">www.meesh.pandachan.org</a><br>\r\n<br>\r\n<span class=\"bbcode\"
|
||||
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">TUMBLR:</strong></span><br>\r\n<a
|
||||
href=\"http://www.meeshmeat.tumblr.com\" title=\"http://www.meeshmeat.tumblr.com\"
|
||||
class=\"auto_link\">www.meeshmeat.tumblr.com</a><br>\r\n<br>\r\n<span class=\"bbcode\"
|
||||
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">COMMISSIONS:</strong></span><strong
|
||||
class=\"bbcode bbcode_b\">CLOSED.</strong> Email me at <a class=\"auto_link email\"
|
||||
href=\"mailto:meeshymeesh@gmail.com\">meeshymeesh[at]gmail.com</a> to inquire about
|
||||
commissions. Current turnaround time is approx one month.<br>\r\n<strong class=\"bbcode
|
||||
bbcode_b\">Tier 1:</strong> <a href=\"https://www.furaffinity.net/view/17970865/\"
|
||||
title=\"https://www.furaffinity.net/view/17970865/\" class=\"auto_link\">https://www.furaffinity.net/view/17970865/</a>
|
||||
$600/character<br>\r\n<strong class=\"bbcode bbcode_b\">Tier 2:</strong> <a href=\"https://www.furaffinity.net/view/18356127/\"
|
||||
title=\"https://www.furaffinity.net/view/18356127/\" class=\"auto_link\">https://www.furaffinity.net/view/18356127/</a>
|
||||
$200/character<br>\r\n<strong class=\"bbcode bbcode_b\">Sketch:</strong> <a href=\"https://www.furaffinity.net/view/18329853/\"
|
||||
title=\"https://www.furaffinity.net/view/18329853/\" class=\"auto_link\">https://www.furaffinity.net/view/18329853/</a>
|
||||
$80/character<br>\r\n<br>\r\nMy gallery is a good example of what I will and will
|
||||
not draw. <br>\r\n<br>\r\n<span class=\"bbcode\" style=\"color: #d92464;\"><strong
|
||||
class=\"bbcode bbcode_b\">Terms of Service for commissions:</strong></span> I will
|
||||
always have control over what I will and will not draw. I will not work towards
|
||||
any illustration that is against the law, or is intended to be slanderous or hateful
|
||||
towards any other individual or business. I have the right to change an estimated
|
||||
date of completion depending on the situation. I have the right to refuse service
|
||||
to anyone. Money must be paid in advance before work can begin on a project. You
|
||||
have the right to ask for changes both in the finished product, and mid-process.
|
||||
By hiring me, you understand and agree to these terms.<br>\r\n<br>\r\n<span class=\"bbcode\"
|
||||
style=\"color: #d92464;\"><strong class=\"bbcode bbcode_b\">Refund policy:</strong></span>
|
||||
Refunds will be granted in full only if work on a project has not yet begun. If
|
||||
necessary, proof can be provided of the work done."
|
||||
num_pageviews: 1715730
|
||||
num_submissions: 1225
|
||||
num_comments_recieved: 40123
|
||||
num_comments_given: 17386
|
||||
num_journals: 13
|
||||
num_favorites: 893478
|
||||
registered_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2005-12-11 11:28:00.000000000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2005-12-11 11:28:00.000000000 Z
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-02-28 00:26:18.692514000 Z
|
||||
zone: *1
|
||||
time: 2016-02-28 00:26:18.692514000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-10-01 11:16:55.673308000 Z
|
||||
zone: *1
|
||||
time: 2018-10-01 11:16:55.673308000 Z
|
||||
url_name: meesh
|
||||
scanned_gallery: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-10-01 11:16:55.671846000 Z
|
||||
zone: *1
|
||||
time: 2018-10-01 11:16:55.671846000 Z
|
||||
scanned_page: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2017-02-12 23:13:56.517709000 Z
|
||||
zone: *1
|
||||
time: 2017-02-12 23:13:56.517709000 Z
|
||||
user_page_logfile: user/meesh/1480115630-meesh.html
|
||||
user_page_log_entry_id: 12175910
|
||||
34
test/fixtures/legacy/http_log_entries.yml
vendored
34
test/fixtures/legacy/http_log_entries.yml
vendored
@@ -1,34 +0,0 @@
|
||||
1766_id:
|
||||
id: 1766
|
||||
scheme: https
|
||||
host: d.facdn.net
|
||||
path: "/art/drakeergenthal/stories/1480188610/1480188610.drakeergenthal_gow_fanfic.txt"
|
||||
query:
|
||||
verb: get
|
||||
status: 200
|
||||
response_time: 192
|
||||
content_type: text/plain
|
||||
response_size: 6936
|
||||
parent_log_entry_id:
|
||||
blob_entry_id: 8663902
|
||||
gzipped: false
|
||||
requested_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-11-26 22:09:59.699304000 Z
|
||||
zone: &1 !ruby/object:ActiveSupport::TimeZone
|
||||
name: Etc/UTC
|
||||
time: 2016-11-26 22:09:59.699304000 Z
|
||||
created_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2016-11-26 22:09:59.753081000 Z
|
||||
zone: *1
|
||||
time: 2016-11-26 22:09:59.753081000 Z
|
||||
updated_at: !ruby/object:ActiveSupport::TimeWithZone
|
||||
utc: 2018-04-16 03:35:12.524953000 Z
|
||||
zone: *1
|
||||
time: 2018-04-16 03:35:12.524953000 Z
|
||||
resp_body: 41F8DAF7772D11F80AFE56B742087A2D1AB372E08B69E1284BE4FEFEC2AD0C7F
|
||||
imported_from_file:
|
||||
req_headers_id: 3
|
||||
res_headers_id: 575
|
||||
diff_type: native
|
||||
native_blob_entry_sha256: !binary |-
|
||||
Qfja93ctEfgK/la3Qgh6LRqzcuCLaeEoS+T+/sKtDH8=
|
||||
@@ -1,19 +0,0 @@
|
||||
Locust vs. Nature
|
||||
|
||||
Not much are known for what has happened to the rest of the Locust, during the war. Some even faced a worse fate than dying from any weapon or from the Imulsion Countermeasure weapon. The Lambent are the actual threat of all, if only the Locust would see it that way and join forces with the humans (aka ground walkers). Let's just say what would happen to four unlucky Locust characters, while around the timeline of Gears of War 3.
|
||||
|
||||
We have a Hunter Elite, on a mission to search for other members of the Locust that were apparently lost from the Jacinto incident. Let's call him Chuck for this one. The Locust don't have much for names around the low ranks. The only names that were mentioned are RAAM, Skorge, and Karn. As for Chuck, he has to venture above ground for his search. But he wasn't alone, as he brings around a miner for connection to the lost ones. We're gonna call the miner as Jules. Chuck and Jules are wearing their proper Locust clothes to notify their ranks. As much as they hate the humans living above ground, the two have to focus on searching for the lost Locust. They stayed hidden from the inhabitants from certain cities.
|
||||
|
||||
Chuck and Jules did come across some Lambent along the way, which are a nuisance to everyone. Chuck has his Hammerburst with him to take some out, but Jules doesn't have any weapon on him. But he did grab a Lancer from a dead COG soldier, and uses it against the Lambent. Chuck was even surprised about Jules' capabilities. As much as the Lambent are their enemy from the threat, they decide to hide from the humans and COG soldiers who arrived to provide reinforcements. Jules keeps the Lancer with him, as it was his only thing that protected him.
|
||||
|
||||
They soon arrive at a place called the Deadlands, and there are no signs of Lambent anywhere. Let alone any Locust survivors to find. Jules starts to have a feeling that they being watched. Chuck notices a trap door around the middle of the arena. For good measures, he shoots the door open and it only shows some ground underneath. Like a pitfall. While they avoided a sudden trap, they soon get jumped by their own Locust militia. Only their outfits are much different compared to their own, when the regular Locust are well organized too. These Locust are acting a bit feral of their way, and doesn't follow the ways of the Locust Queen. They are savages compared to Chuck and Jules, and the two are also surrounded. One of the savages is a grenadier, who soon falls into the trap while rushing towards Chuck and Jules. We'll call him Zeke for that. Despite putting up a good fight, the two were subdued by the Savage Locust.
|
||||
|
||||
Chuck and Jules were disarmed and the savages prevents them from moving, as their leader shows up, who happens to be a Savage Kantus. We'll call him Travis for no apparent reason. Anyway, Travis was not amused about the outsider Locust putting up resistance against his own community. As punishment, the two are thrown down where the trap door was to suffer endlessly. The savage drone threw the two in, Travis decides to join them to make sure the outsiders will suffer to no end. The savages waved goodbye to Travis, as he's gonna be joining them for eternity. As for Chuck and Jules, they've reached the bottom of the fall, just to see Zeke with them. They plan on getting out of this, only there's another reason why these characters are doomed here. The pitfall is slowly sucking them under. Zeke is already up to his knees in the sand, so he can't pull himself out. Jules then notices that he can't lift up his feet, as his ankles are already under. Chuck tries to climb out, as he quick to act from the situation. While climbing, Travis falls on top of Chuck and they plummet down to Jules and Zeke. Chuck opens his eyes, only to find himself up to his thighs within the sand. Travis explains to them as this is quicksand, and it's impossible to escape. He's willing to enjoy every minute of it, while the others struggle to get free. Travis decides to take off some of his armor while he's sinking with the others, to reveal his red undergarment to them.
|
||||
|
||||
Jules then yells for help, as the quicksand starts sucking up his thighs. It doesn't help the dismay for Chuck or Zeke, as the living ground soon touches their groins. Travis soon starts touching his crotch while laying on his knees. The quicksand soon reaches Jules' crotch, and now three of them can barely move their legs. It will soon include Travis as he's ready to unload his bladder. The sinking Locust were taken by surprise, as the kantus would do something so very, very different!!! Chuck, Jules and Zeke then realizes that they have to pee! The two tried to resist the temptation, but Zeke points out that it's pointless to fight. The savage grenadier soon let's it go, as his crotch is already suck under by the sands. The piss was hard to ignore, and soon the buildup within Chuck's and Jules' became too great. They unload their fill, as the quicksand begins to suck up their stomaches. Travis was happy to watch his crotch sinking within the hungry sands.
|
||||
|
||||
Chuck, and Jules were so deep in, the hunter even tried to pull himself out. It didn't work, and now he can't even pull up his arms from the sands. Jules uses up his remaining arm that was still up the surface, to activate his full helmet. It didn't help, now that three of them are up to their shoulders and continues to sink. Travis was thrilled to see their despair, even if the quicksand is reaching up to his chest. Travis explains to them, that it's not the end for them. Chuck believes that it is, as the sands begins to start sucking up his head down. The kantus soon says they will not be forgotten, only going to sleep for a long time. Zeke was soon sucked under, and soon claims Chuck too. Jules did give out a question, saying if they'll be alright. Travis says yes, before the miner was sucked under.
|
||||
|
||||
As for Travis now up to his armpits within his quicksand, he begins playing around until it was time to enchant their long sleep within the sands. Before he was submerged along with the others, Travis conjures his spell. They disappeared from the earth, but somehow they will survive everything.
|
||||
|
||||
For over twenty-five years, Travis will revive himself as well as the others since the quicksand has dried up. Chuck, Jules and Zeke wake up to find themselves only up to their armpits in just dirt. They dug themselves out, and notices Travis up to his neck within the solid sands. Since he preserved them for all this time, the remaining Locust dug him up and climbed out. Upon getting out, they soon noticed that everything has changed! Travis soon wakes up, and was willing to help find a way to survive their new world. They are also unaware that the essence of the Queen still exist somewhere. Perhaps they are lucky than they thought.
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 306 KiB |
@@ -1,9 +0,0 @@
|
||||
class Domain::Fa::PostTest < ActiveSupport::TestCase
|
||||
# test "remove buggy prefixes" do
|
||||
# TODO - implement this
|
||||
# Some posts have a title prefixed with "Font size adjustment: smallerlarger"
|
||||
# which should be removed
|
||||
# Legacy::Fa::Post.where("title like ?", "Font size adjustment: smallerlarger%").count
|
||||
# => 7056
|
||||
# end
|
||||
end
|
||||
@@ -1 +0,0 @@
|
||||
|
||||
Reference in New Issue
Block a user