Files
redux-scraper/app/lib/scraper/curl_http_performer.rb
2025-08-05 18:52:50 +00:00

173 lines
4.3 KiB
Ruby

# typed: strict
class Scraper::CurlHttpPerformer
extend T::Sig
# Response =
# Struct.new(:response_code, :response_headers, :response_time_ms, :body)
class Method < T::Enum
enums do
Get = new("GET")
Post = new("POST")
end
end
class Response < T::Struct
include T::Struct::ActsAsComparable
const :uri, Addressable::URI
const :response_code, Integer
const :response_headers, T::Hash[String, String]
const :response_time_ms, Integer
const :body, String
const :performed_by, String
end
class Request < T::Struct
include T::Struct::ActsAsComparable
const :http_method, Method
const :uri, Addressable::URI
const :request_headers, T::Hash[String, String]
const :body, T.nilable(String)
const :follow_redirects, T::Boolean, default: true
end
sig { returns(String) }
def name
"direct"
end
sig(:final) { params(request: Request).returns(Response) }
def do_request(request)
do_request_impl(request)
end
private
sig { returns(String) }
def performed_by
proxy_url = ENV["HTTP_PROXY_URL"]
case proxy_url
when nil
"direct"
when /airvpn-netherlands-proxy:(\d+)/
"airvpn-1-netherlands"
when /airvpn-san-jose-proxy:(\d+)/
"airvpn-2-san-jose"
else
raise("Unknown proxy URL: #{proxy_url}")
end
end
sig { params(request: Request).returns(Response) }
def do_request_impl(request)
curl = get_curl
start_at = Time.now
# TODO - normalizing the URL breaks URLs with utf-8 characters
# curl.url = request.uri.normalize.to_s
curl.url = request.uri.to_s
curl.follow_location = request.follow_redirects
request.request_headers.each { |key, value| curl.headers[key.to_s] = value }
curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises"
case request.http_method
when Method::Get
curl.get
when Method::Post
curl.post_body = request.body
curl.post
end
response_time_ms = ((Time.now - start_at) * 1000).to_i
response_code = curl.response_code
body_str = curl.body_str
response_headers = self.class.parse_header_str(curl.header_str).to_h
rc_color =
case response_code / 100
when 2
[:green]
when 3
[:cyan]
when 4
[:red]
when 5
%i[red bold]
else
[:white]
end
rc_string = response_code.to_s
rc_color.each { |m| rc_string = rc_string.send(m) }
cf_cache_status =
if response_headers["cf-cache-status"]
"(#{response_headers["cf-cache-status"]&.light_blue})"
else
nil
end
puts [
"[#{Process.pid.to_s.black.bold} / #{self.name.to_s.black.bold}]",
"[#{request.http_method.serialize} #{rc_string}",
"#{self.class.humansize(body_str.size).to_s.bold}",
(response_time_ms.to_s + "ms").light_blue.bold + "]",
cf_cache_status,
"#{request.uri.to_s.bold}",
].reject(&:nil?).join(" ")
Response.new(
uri: request.uri,
response_code:,
response_headers:,
response_time_ms:,
body: body_str,
performed_by: performed_by,
)
end
sig { returns(Curl::Easy) }
def get_curl
t = Thread.current
unless t.thread_variable?(:curl)
t.thread_variable_set(:curl, Curl::Easy.new)
end
curl = t.thread_variable_get(:curl)
proxy_url = ENV["HTTP_PROXY_URL"]
curl.timeout = 30
curl.connect_timeout = 2
curl.proxy_url = proxy_url
curl.headers = {}
curl
end
sig { params(size: Integer).returns(String) }
def self.humansize(size)
units = %w[B KiB MiB GiB TiB Pib EiB ZiB]
return "0.0 B" if size == 0
exp = (Math.log(size) / Math.log(1024)).to_i
exp += 1 if (size.to_f / 1024**exp >= 1024 - 0.05)
exp = units.size - 1 if exp > units.size - 1
unit = units[exp]
if unit == "B"
"%d B" % [size]
else
"%.1f %s" % [size.to_f / 1024**exp, unit]
end
end
sig { params(header_str: String).returns(T::Array[[String, String]]) }
def self.parse_header_str(header_str)
split = header_str.split("\r\n")
split
.each_with_index
.map do |str, idx|
next nil if idx == 0
idx = str.index(": ")
next nil unless idx
T.cast([str[0...idx], str[idx + 2..-1]], [String, String])
end
.compact
end
end