# typed: strict class Scraper::CurlHttpPerformer extend T::Sig # Response = # Struct.new(:response_code, :response_headers, :response_time_ms, :body) class Method < T::Enum enums do Get = new("GET") Post = new("POST") end end class Response < T::Struct include T::Struct::ActsAsComparable const :uri, Addressable::URI const :response_code, Integer const :response_headers, T::Hash[String, String] const :response_time_ms, Integer const :body, String const :performed_by, String end class Request < T::Struct include T::Struct::ActsAsComparable const :http_method, Method const :uri, Addressable::URI const :request_headers, T::Hash[String, String] const :body, T.nilable(String) const :follow_redirects, T::Boolean, default: true end sig { returns(String) } def name "direct" end sig(:final) { params(request: Request).returns(Response) } def do_request(request) do_request_impl(request) end private sig { returns(String) } def performed_by proxy_url = ENV["HTTP_PROXY_URL"] case proxy_url when nil "direct" when /airvpn-netherlands-proxy:(\d+)/ "airvpn-1-netherlands" when /airvpn-san-jose-proxy:(\d+)/ "airvpn-2-san-jose" else raise("Unknown proxy URL: #{proxy_url}") end end sig { params(request: Request).returns(Response) } def do_request_impl(request) curl = get_curl start_at = Time.now curl.url = request.uri.normalize.to_s curl.follow_location = request.follow_redirects request.request_headers.each { |key, value| curl.headers[key.to_s] = value } curl.headers["User-Agent"] = "FurryArchiver/1.0 / telegram: @DeltaNoises" case request.http_method when Method::Get curl.get when Method::Post curl.post_body = request.body curl.post end response_time_ms = ((Time.now - start_at) * 1000).to_i response_code = curl.response_code body_str = curl.body_str response_headers = self.class.parse_header_str(curl.header_str).to_h rc_color = case response_code / 100 when 2 [:green] when 3 [:cyan] when 4 [:red] when 5 %i[red bold] else [:white] end rc_string = response_code.to_s rc_color.each { |m| rc_string = rc_string.send(m) } cf_cache_status = if response_headers["cf-cache-status"] "(#{response_headers["cf-cache-status"]&.light_blue})" else nil end puts [ "[#{Process.pid.to_s.black.bold} / #{self.name.to_s.black.bold}]", "[#{request.http_method.serialize} #{rc_string}", "#{self.class.humansize(body_str.size).to_s.bold}", (response_time_ms.to_s + "ms").light_blue.bold + "]", cf_cache_status, "#{request.uri.to_s.bold}", ].reject(&:nil?).join(" ") Response.new( uri: request.uri, response_code:, response_headers:, response_time_ms:, body: body_str, performed_by: performed_by, ) end sig { returns(Curl::Easy) } def get_curl t = Thread.current unless t.thread_variable?(:curl) t.thread_variable_set(:curl, Curl::Easy.new) end curl = t.thread_variable_get(:curl) proxy_url = ENV["HTTP_PROXY_URL"] curl.timeout = 30 curl.connect_timeout = 2 curl.proxy_url = proxy_url curl.headers = {} curl end sig { params(size: Integer).returns(String) } def self.humansize(size) units = %w[B KiB MiB GiB TiB Pib EiB ZiB] return "0.0 B" if size == 0 exp = (Math.log(size) / Math.log(1024)).to_i exp += 1 if (size.to_f / 1024**exp >= 1024 - 0.05) exp = units.size - 1 if exp > units.size - 1 unit = units[exp] if unit == "B" "%d B" % [size] else "%.1f %s" % [size.to_f / 1024**exp, unit] end end sig { params(header_str: String).returns(T::Array[[String, String]]) } def self.parse_header_str(header_str) split = header_str.split("\r\n") split .each_with_index .map do |str, idx| next nil if idx == 0 idx = str.index(": ") next nil unless idx T.cast([str[0...idx], str[idx + 2..-1]], [String, String]) end .compact end end