do not normalize urls
This commit is contained in:
@@ -65,10 +65,12 @@ class Scraper::CurlHttpPerformer
|
||||
curl = get_curl
|
||||
start_at = Time.now
|
||||
|
||||
curl.url = request.uri.normalize.to_s
|
||||
# TODO - normalizing the URL breaks URLs with utf-8 characters
|
||||
# curl.url = request.uri.normalize.to_s
|
||||
curl.url = request.uri.to_s
|
||||
curl.follow_location = request.follow_redirects
|
||||
request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
|
||||
curl.headers["User-Agent"] = "FurryArchiver/1.0 / dhelta"
|
||||
curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises"
|
||||
case request.http_method
|
||||
when Method::Get
|
||||
curl.get
|
||||
|
||||
44
rake/posts.rake
Normal file
44
rake/posts.rake
Normal file
@@ -0,0 +1,44 @@
|
||||
# typed: true
|
||||
# frozen_string_literal: true
|
||||
T.bind(self, T.all(Rake::DSL, Object))
|
||||
namespace :posts do
|
||||
desc "Find 404 post files with mismatched normalized URLs"
|
||||
task find_404_post_files_with_mismatched_normalized_urls: :environment do
|
||||
query =
|
||||
Domain::PostFile
|
||||
.where(state: "terminal_error")
|
||||
.where("url_str ~ '[^\\x00-\\x7F]'")
|
||||
.includes(:log_entry, post: :files)
|
||||
|
||||
query.find_each(batch_size: 32, order: :desc) do |post_file|
|
||||
le = post_file.log_entry
|
||||
next if le.nil?
|
||||
next if le.status_code != 404
|
||||
next if post_file.url_str.blank?
|
||||
uri = Addressable::URI.parse(post_file.url_str)
|
||||
next if uri.to_s == uri.normalize.to_s
|
||||
next if post_file.post&.files&.any? { |file| file.state_ok? }
|
||||
if post_file.post&.files&.any? { |file|
|
||||
file.url_str =~ /furarchiver.net/
|
||||
}
|
||||
next
|
||||
end
|
||||
puts "#{post_file.post.to_param} ::::: #{post_file.to_param} ::::: #{post_file.url_str}"
|
||||
end
|
||||
end
|
||||
|
||||
desc "Redownload 404 post files with mismatched normalized URLs"
|
||||
task redownload_404_post_files_with_mismatched_normalized_urls:
|
||||
:environment do
|
||||
File
|
||||
.readlines(ENV["file"] || raise("must provide file"))
|
||||
.map(&:strip)
|
||||
.each do |line|
|
||||
_, file_id, _ = line.split(" ::::: ")
|
||||
raise("no file id") if file_id.blank?
|
||||
file = Domain::PostFile.find(file_id)
|
||||
file.state_pending!
|
||||
Domain::Fa::Job::ScanFileJob.perform_now(file:)
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user