161 lines
4.4 KiB
Ruby
161 lines
4.4 KiB
Ruby
# typed: false
|
|
class Scraper::GalleryDlClient
|
|
include HasColorLogger
|
|
|
|
StartEvent = Struct.new(:url, :extractor)
|
|
FinishEvent = Struct.new(:ignore)
|
|
HttpRequestEvent =
|
|
Struct.new(
|
|
:method,
|
|
:url,
|
|
:kwargs,
|
|
:requested_at,
|
|
:request_headers,
|
|
:response_headers,
|
|
:response_code,
|
|
:response_time_ms,
|
|
:body,
|
|
:log_entry,
|
|
keyword_init: true,
|
|
)
|
|
TweetEvent = Struct.new(:tweet, :author)
|
|
TweetMediaEvent =
|
|
Struct.new(
|
|
:tweet_id,
|
|
:file_url,
|
|
:filename,
|
|
:media_num,
|
|
:extension,
|
|
:height,
|
|
:width,
|
|
)
|
|
|
|
def initialize(name, host)
|
|
name || raise("no name provided")
|
|
host || raise("no host provided")
|
|
logger.info("build #{name.to_s.green.bold} - #{host.green}")
|
|
@performed_by = name
|
|
@client = Ripcord::Client.new(host)
|
|
end
|
|
|
|
def start_twitter_user(username, caused_by_entry: nil)
|
|
@token = SecureRandom.uuid
|
|
rpc =
|
|
@client.call(
|
|
"start_user",
|
|
[@token, "https://twitter.com/#{username}/tweets"],
|
|
)
|
|
raise rpc_error_str(rpc) unless rpc.successful?
|
|
decode_message(rpc.result, caused_by_entry)
|
|
end
|
|
|
|
def next_message(caused_by_entry: nil)
|
|
rpc = @client.call("next_message", [@token])
|
|
raise rpc_error_str(rpc) unless rpc.successful?
|
|
decode_message(rpc.result, caused_by_entry)
|
|
end
|
|
|
|
private
|
|
|
|
def rpc_error_str(rpc)
|
|
"#{rpc.error.message}: #{rpc.error.data}"
|
|
end
|
|
|
|
def decode_message(response, caused_by_entry)
|
|
token = response[:token]
|
|
raise("token mismatch: #{token} != #{@token}") if token != @token
|
|
|
|
case response[:event]
|
|
when "start"
|
|
StartEvent.new(response[:url], response[:extractor])
|
|
when "finish"
|
|
FinishEvent.new(nil)
|
|
when "http_request"
|
|
http_request = response[:http_request]
|
|
event =
|
|
HttpRequestEvent.new(
|
|
method: http_request[:method],
|
|
url: http_request[:url],
|
|
kwargs: http_request[:kwargs],
|
|
requested_at: Time.at(http_request[:requested_at]),
|
|
request_headers: http_request[:kwargs][:headers],
|
|
response_headers: http_request[:response_headers],
|
|
response_code: http_request[:status_code],
|
|
response_time_ms: (http_request[:duration] * 1000).to_i,
|
|
body: Base64.decode64(http_request[:content_base64]),
|
|
log_entry: nil,
|
|
)
|
|
log_and_set_http_request_event(event, caused_by_entry)
|
|
event
|
|
when "tweet"
|
|
TweetEvent.new(response[:tweet], response[:author])
|
|
when "tweet_media"
|
|
media = response[:media]
|
|
TweetMediaEvent.new(
|
|
media[:tweet_id],
|
|
media[:file_url],
|
|
media[:filename],
|
|
media[:media_num],
|
|
media[:extension],
|
|
media[:height],
|
|
media[:width],
|
|
)
|
|
end
|
|
end
|
|
|
|
def log_and_set_http_request_event(http_event, caused_by_entry)
|
|
request_headers = http_event.request_headers
|
|
response_headers = http_event.response_headers
|
|
content_type =
|
|
response_headers[:"Content-Type"] || response_headers[:"content-type"] ||
|
|
raise("no content type provided: #{response_headers}")
|
|
|
|
url = http_event.url
|
|
uri = Addressable::URI.parse(url)
|
|
|
|
if http_event.kwargs && http_event.kwargs[:params] &&
|
|
http_event.kwargs[:params][:variables]
|
|
uri.query = JSON.parse(http_event.kwargs[:params][:variables]).to_query
|
|
end
|
|
|
|
url = uri.to_s
|
|
|
|
retries = 0
|
|
begin
|
|
response_blob_entry =
|
|
BlobEntry.find_or_build(
|
|
content_type: content_type,
|
|
contents: http_event.body,
|
|
)
|
|
|
|
log_entry =
|
|
HttpLogEntry.new(
|
|
{
|
|
uri: url,
|
|
verb: http_event.method.downcase,
|
|
content_type: content_type,
|
|
status_code: http_event.response_code,
|
|
request_headers:
|
|
HttpLogEntryHeader.find_or_build(headers: request_headers),
|
|
response_headers:
|
|
HttpLogEntryHeader.find_or_build(headers: response_headers),
|
|
response: response_blob_entry,
|
|
response_time_ms: http_event.response_time_ms,
|
|
requested_at: http_event.requested_at,
|
|
caused_by_entry: caused_by_entry,
|
|
performed_by: @performed_by,
|
|
},
|
|
)
|
|
|
|
log_entry.save!
|
|
rescue StandardError
|
|
retries += 1
|
|
retry if retries < 2
|
|
raise
|
|
end
|
|
|
|
logger.debug "insert http log entry #{log_entry.id.to_s.bold}"
|
|
http_event.log_entry = log_entry
|
|
end
|
|
end
|