move proxy config into yaml
This commit is contained in:
@@ -3,9 +3,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
||||
|
||||
def self.build_http_client
|
||||
@@fa_base_http_client ||= begin
|
||||
proxy = ENV["proxy"]
|
||||
puts "Building FA HTTP client (proxy: #{proxy})"
|
||||
Scraper::FaHttpClient.new(proxy || Scraper::CurlHttpPerformer.new)
|
||||
Scraper::FaHttpClient.new
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -24,10 +24,19 @@ class Domain::Twitter::Job::MediaJob < Domain::Twitter::Job::TwitterJobBase
|
||||
"#{response.log_entry.response_time_ms} ms"
|
||||
|
||||
if response.status_code != 200
|
||||
if response.status_code == 404
|
||||
@media.count_404 += 1
|
||||
if @media.count_404 > 2
|
||||
@media.state = "error"
|
||||
@media.save!
|
||||
return
|
||||
end
|
||||
end
|
||||
|
||||
fatal_error("#{response.status_code} getting media")
|
||||
end
|
||||
|
||||
@media.file = response.log_entry
|
||||
@media.save
|
||||
@media.save!
|
||||
end
|
||||
end
|
||||
|
||||
@@ -3,17 +3,17 @@ class Domain::Twitter::Job::TwitterJobBase < Scraper::JobBase
|
||||
|
||||
def self.build_http_client
|
||||
@@twitter_http_client ||= begin
|
||||
proxy = ENV["proxy"]
|
||||
puts "Building Twitter HTTP client (proxy: #{proxy})"
|
||||
Scraper::TwitterHttpClient.new(proxy || Scraper::CurlHttpPerformer.new)
|
||||
Scraper::TwitterHttpClient.new
|
||||
end
|
||||
end
|
||||
|
||||
def self.build_gallery_dl_client
|
||||
@@gallery_dl_client ||= begin
|
||||
proxy = ENV["proxy"]
|
||||
puts "Building GalleryDlClient (TODO: proxy: #{proxy})"
|
||||
Scraper::GalleryDlClient.new("http://localhost:5000/")
|
||||
proxy_config = Rails.application.config.x.proxy
|
||||
if proxy_config[:gallery_dl].blank?
|
||||
raise("no gallery_dl host defined for proxy config #{proxy_config[:name]}")
|
||||
end
|
||||
Scraper::GalleryDlClient.new(proxy_config[:name], proxy_config[:gallery_dl])
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ class Scraper::BaseHttpClient
|
||||
|
||||
class InvalidURLError < ArgumentError; end
|
||||
|
||||
def initialize(http_performer_or_proxy)
|
||||
def initialize(http_performer = nil)
|
||||
cookies || raise("Subclass must define non-nil #cookies")
|
||||
allowed_domains || raise("Subclass must define non-nil #allowed_domains")
|
||||
|
||||
@@ -21,11 +21,17 @@ class Scraper::BaseHttpClient
|
||||
hash[key] = LegacyImport::AdaptiveCache.new(@max_cache_size, 1.0, 0.1)
|
||||
end
|
||||
|
||||
if http_performer_or_proxy.is_a?(String)
|
||||
@http_performer = Scraper::ProxyHttpPerformer.new(http_performer_or_proxy)
|
||||
else
|
||||
@http_performer = http_performer_or_proxy
|
||||
@http_performer = http_performer || begin
|
||||
proxy_config = Rails.application.config.x.proxy
|
||||
if proxy_config[:http]
|
||||
logger.info("build #{proxy_config[:name]} - #{"Proxy".green} - #{proxy_config[:http]}")
|
||||
Scraper::ProxyHttpPerformer.new(proxy_config[:name], proxy_config[:http])
|
||||
else
|
||||
logger.info("build #{proxy_config[:name]} - #{"Direct".green} - (localhost implicit)")
|
||||
Scraper::CurlHttpPerformer.new
|
||||
end
|
||||
end
|
||||
|
||||
@domain_last_requested_at = {}
|
||||
logger.level = :info
|
||||
end
|
||||
@@ -103,8 +109,16 @@ class Scraper::BaseHttpClient
|
||||
@domain_last_requested_at[uri.host] = requested_at + (response_time_ms.to_f / 1000)
|
||||
|
||||
content_type = response_headers["Content-Type"] ||
|
||||
response_headers["content-type"] ||
|
||||
raise("no content type provided: #{response_headers}")
|
||||
response_headers["content-type"]
|
||||
|
||||
if content_type.nil?
|
||||
if response_code == 404
|
||||
# fudge it
|
||||
content_type = "plain/fourohfour"
|
||||
else
|
||||
raise("no content type provided: #{response_code} / #{response_headers}")
|
||||
end
|
||||
end
|
||||
|
||||
cache_key = "#{uri.host}|#{content_type}"
|
||||
blob_entry_cache = @blob_entry_cache[cache_key]
|
||||
|
||||
@@ -5,7 +5,7 @@ class Scraper::FaHttpClient < Scraper::BaseHttpClient
|
||||
"ipinfo.io",
|
||||
]
|
||||
|
||||
def initialize(http_performer_or_proxy)
|
||||
def initialize(http_performer_or_proxy = nil)
|
||||
if http_performer_or_proxy.is_a?(String)
|
||||
@cookies = Rails.application.config.x.cookies.fa[http_performer_or_proxy]
|
||||
else
|
||||
|
||||
@@ -29,7 +29,11 @@ class Scraper::GalleryDlClient
|
||||
:width,
|
||||
)
|
||||
|
||||
def initialize(host)
|
||||
def initialize(name, host)
|
||||
name || raise("no name provided")
|
||||
host || raise("no host provided")
|
||||
logger.info("build #{name.green.bold} - #{host.green}")
|
||||
@performed_by = name
|
||||
@client = Ripcord::Client.new(host)
|
||||
@max_cache_size = 8
|
||||
@blob_entry_cache = Hash.new do |hash, key|
|
||||
@@ -38,13 +42,16 @@ class Scraper::GalleryDlClient
|
||||
end
|
||||
|
||||
def start_twitter_user(username, caused_by_entry: nil)
|
||||
rpc = @client.call("start_user", ["https://twitter.com/#{username}/tweets"])
|
||||
@token = SecureRandom.uuid
|
||||
rpc = @client.call("start_user", [@token, "https://twitter.com/#{username}/tweets"])
|
||||
raise unless rpc.successful?
|
||||
decode_message(rpc.result, caused_by_entry)
|
||||
rescue
|
||||
binding.pry
|
||||
end
|
||||
|
||||
def next_message(caused_by_entry: nil)
|
||||
rpc = @client.call("next_message", [])
|
||||
rpc = @client.call("next_message", [@token])
|
||||
raise unless rpc.successful?
|
||||
decode_message(rpc.result, caused_by_entry)
|
||||
end
|
||||
@@ -52,6 +59,9 @@ class Scraper::GalleryDlClient
|
||||
private
|
||||
|
||||
def decode_message(response, caused_by_entry)
|
||||
token = response[:token]
|
||||
raise("token mismatch: #{token} != #{@token}") if token != @token
|
||||
|
||||
case response[:event]
|
||||
when "start" then StartEvent.new(response[:url], response[:extractor])
|
||||
when "finish" then FinishEvent.new(nil)
|
||||
@@ -131,7 +141,7 @@ class Scraper::GalleryDlClient
|
||||
response_time_ms: http_event.response_time_ms,
|
||||
requested_at: http_event.requested_at,
|
||||
caused_by_entry: caused_by_entry,
|
||||
performed_by: "direct-gdl",
|
||||
performed_by: @performed_by,
|
||||
})
|
||||
|
||||
log_entry.save!
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
require "base64"
|
||||
|
||||
class Scraper::ProxyHttpPerformer < Scraper::HttpPerformer
|
||||
def initialize(proxy_name)
|
||||
@name = proxy_name
|
||||
proxy_url = Rails.application.config.x.proxies[proxy_name]
|
||||
@client = Ripcord::Client.new(proxy_url)
|
||||
def initialize(name, host)
|
||||
name || raise("no name provided")
|
||||
host || raise("no host provided")
|
||||
@name = name
|
||||
@client = Ripcord::Client.new(host)
|
||||
end
|
||||
|
||||
def name
|
||||
|
||||
@@ -4,15 +4,6 @@ class Scraper::TwitterHttpClient < Scraper::BaseHttpClient
|
||||
"ipinfo.io",
|
||||
]
|
||||
|
||||
def initialize(http_performer_or_proxy)
|
||||
# if http_performer_or_proxy.is_a?(String)
|
||||
# @cookies = Rails.application.config.x.cookies.fa[http_performer_or_proxy]
|
||||
# else
|
||||
# @cookies = Rails.application.config.x.cookies.fa["direct"]
|
||||
# end
|
||||
super(http_performer_or_proxy)
|
||||
end
|
||||
|
||||
def cookies
|
||||
[]
|
||||
end
|
||||
|
||||
@@ -10,4 +10,12 @@ class Domain::Twitter::Media < ReduxApplicationRecord
|
||||
|
||||
belongs_to :tweet, class_name: "Domain::Twitter::Tweet"
|
||||
belongs_to :file, class_name: "HttpLogEntry", optional: true
|
||||
|
||||
def count_404
|
||||
self.state_detail["404_count"] ||= 0
|
||||
end
|
||||
|
||||
def count_404=(num)
|
||||
self.state_detail["404_count"] = num
|
||||
end
|
||||
end
|
||||
|
||||
@@ -23,11 +23,11 @@ module ReduxScraper
|
||||
# config.time_zone = "Central Time (US & Canada)"
|
||||
# config.eager_load_paths << Rails.root.join("extras")
|
||||
config.x.cookies.fa = ReduxScraper::Application.config_for("cookies/fa")
|
||||
config.x.proxies = {
|
||||
"proxy-1" => "http://proxy-1.local:9292",
|
||||
"dedipath-1" => "http://10.200.0.6:9292",
|
||||
}
|
||||
|
||||
config.x.proxies = ReduxScraper::Application.config_for("proxies")
|
||||
config.x.influxdb = ReduxScraper::Application.config_for("influxdb")
|
||||
|
||||
proxy_name = ENV["proxy"] || "direct"
|
||||
config.x.proxy = Rails.application.config.x.proxies[proxy_name] || raise("no proxy config for '#{proxy_name}'")
|
||||
config.x.proxy[:name] = proxy_name
|
||||
end
|
||||
end
|
||||
|
||||
23
config/proxies.yml
Normal file
23
config/proxies.yml
Normal file
@@ -0,0 +1,23 @@
|
||||
default: &default
|
||||
direct:
|
||||
http: null # use direct curl instead of proxy-app
|
||||
gallery_dl: http://localhost:5001
|
||||
|
||||
proxy-1:
|
||||
http: http://proxy-1.local:9292
|
||||
# gallery_dl: http://proxy-1.local:5001
|
||||
|
||||
dedipath-1:
|
||||
http: http://10.200.0.6:9292
|
||||
# gallery_dl: http://10.200.0.6:5001
|
||||
|
||||
production:
|
||||
<<: *default
|
||||
|
||||
development:
|
||||
<<: *default
|
||||
|
||||
test:
|
||||
direct:
|
||||
proxy-1:
|
||||
dedipath-1:
|
||||
@@ -38,7 +38,7 @@ queues = {
|
||||
}
|
||||
|
||||
if ENV["proxy"] == "direct" || ENV["proxy"].nil?
|
||||
queues[:twitter_timeline_tweets] = 1
|
||||
queues[:twitter_timeline_tweets] = 4
|
||||
end
|
||||
|
||||
queues.each do |queue, workers|
|
||||
|
||||
Reference in New Issue
Block a user