do not normalize urls
This commit is contained in:
@@ -65,10 +65,12 @@ class Scraper::CurlHttpPerformer
|
|||||||
curl = get_curl
|
curl = get_curl
|
||||||
start_at = Time.now
|
start_at = Time.now
|
||||||
|
|
||||||
curl.url = request.uri.normalize.to_s
|
# TODO - normalizing the URL breaks URLs with utf-8 characters
|
||||||
|
# curl.url = request.uri.normalize.to_s
|
||||||
|
curl.url = request.uri.to_s
|
||||||
curl.follow_location = request.follow_redirects
|
curl.follow_location = request.follow_redirects
|
||||||
request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
|
request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
|
||||||
curl.headers["User-Agent"] = "FurryArchiver/1.0 / dhelta"
|
curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises"
|
||||||
case request.http_method
|
case request.http_method
|
||||||
when Method::Get
|
when Method::Get
|
||||||
curl.get
|
curl.get
|
||||||
|
|||||||
44
rake/posts.rake
Normal file
44
rake/posts.rake
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# typed: true
|
||||||
|
# frozen_string_literal: true
|
||||||
|
T.bind(self, T.all(Rake::DSL, Object))
|
||||||
|
namespace :posts do
|
||||||
|
desc "Find 404 post files with mismatched normalized URLs"
|
||||||
|
task find_404_post_files_with_mismatched_normalized_urls: :environment do
|
||||||
|
query =
|
||||||
|
Domain::PostFile
|
||||||
|
.where(state: "terminal_error")
|
||||||
|
.where("url_str ~ '[^\\x00-\\x7F]'")
|
||||||
|
.includes(:log_entry, post: :files)
|
||||||
|
|
||||||
|
query.find_each(batch_size: 32, order: :desc) do |post_file|
|
||||||
|
le = post_file.log_entry
|
||||||
|
next if le.nil?
|
||||||
|
next if le.status_code != 404
|
||||||
|
next if post_file.url_str.blank?
|
||||||
|
uri = Addressable::URI.parse(post_file.url_str)
|
||||||
|
next if uri.to_s == uri.normalize.to_s
|
||||||
|
next if post_file.post&.files&.any? { |file| file.state_ok? }
|
||||||
|
if post_file.post&.files&.any? { |file|
|
||||||
|
file.url_str =~ /furarchiver.net/
|
||||||
|
}
|
||||||
|
next
|
||||||
|
end
|
||||||
|
puts "#{post_file.post.to_param} ::::: #{post_file.to_param} ::::: #{post_file.url_str}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
desc "Redownload 404 post files with mismatched normalized URLs"
|
||||||
|
task redownload_404_post_files_with_mismatched_normalized_urls:
|
||||||
|
:environment do
|
||||||
|
File
|
||||||
|
.readlines(ENV["file"] || raise("must provide file"))
|
||||||
|
.map(&:strip)
|
||||||
|
.each do |line|
|
||||||
|
_, file_id, _ = line.split(" ::::: ")
|
||||||
|
raise("no file id") if file_id.blank?
|
||||||
|
file = Domain::PostFile.find(file_id)
|
||||||
|
file.state_pending!
|
||||||
|
Domain::Fa::Job::ScanFileJob.perform_now(file:)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user