add basic test for twitter timeline job
This commit is contained in:
@@ -2,7 +2,7 @@ class Domain::Fa::Job::FaJobBase < Scraper::JobBase
|
||||
discard_on ActiveJob::DeserializationError
|
||||
|
||||
def self.http_factory_method
|
||||
:new_fa_http_client
|
||||
:get_fa_http_client
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,20 +1,8 @@
|
||||
class Domain::Twitter::Job::TwitterJobBase < Scraper::JobBase
|
||||
discard_on ActiveJob::DeserializationError
|
||||
|
||||
def self.build_http_client
|
||||
@@twitter_http_client ||= begin
|
||||
Scraper::TwitterHttpClient.new
|
||||
end
|
||||
end
|
||||
|
||||
def self.build_gallery_dl_client
|
||||
@@gallery_dl_client ||= begin
|
||||
proxy_config = Rails.application.config.x.proxy
|
||||
if proxy_config[:gallery_dl].blank?
|
||||
raise("no gallery_dl host defined for proxy config #{proxy_config[:name]}")
|
||||
end
|
||||
Scraper::GalleryDlClient.new(proxy_config[:name], proxy_config[:gallery_dl])
|
||||
end
|
||||
def self.http_factory_method
|
||||
:get_twitter_http_client
|
||||
end
|
||||
|
||||
def find_or_intitialize_user_from_args(args)
|
||||
@@ -28,14 +16,4 @@ class Domain::Twitter::Job::TwitterJobBase < Scraper::JobBase
|
||||
fatal_error("arg 'name' or 'tw_id' is required if arg 'user' is nil")
|
||||
end
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
def http_client
|
||||
@http_client ||= self.class.build_http_client
|
||||
end
|
||||
|
||||
def gallery_dl_client
|
||||
@gallery_dl_client ||= self.class.build_gallery_dl_client
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::TwitterJobBase
|
||||
GDL = Scraper::GalleryDlClient
|
||||
GDLClient = Scraper::GalleryDlClient
|
||||
queue_as :twitter_timeline_tweets
|
||||
|
||||
def perform(args)
|
||||
@@ -35,31 +35,32 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
event = gallery_dl_client.next_message(
|
||||
caused_by_entry: @first_twitter_caused_by || @caused_by_entry,
|
||||
)
|
||||
fatal_error("nil event from gallery_dl_client") if event.nil?
|
||||
|
||||
case event
|
||||
when GDL::StartEvent
|
||||
logger.info("start tweets(#{event.extractor})")
|
||||
when GDL::FinishEvent
|
||||
logger.info("end tweets - #{@num_created_tweets} total")
|
||||
when GDLClient::StartEvent
|
||||
logger.info("start tweets (#{event.extractor.to_s.bold})")
|
||||
when GDLClient::FinishEvent
|
||||
logger.info("finish - #{@num_created_tweets.to_s.bold} saw total")
|
||||
break
|
||||
when GDL::HttpRequestEvent
|
||||
when GDLClient::HttpRequestEvent
|
||||
@first_twitter_caused_by ||= event.log_entry
|
||||
maybe_extract_user_info(event)
|
||||
when GDL::TweetEvent
|
||||
when GDLClient::TweetEvent
|
||||
update_user_from_tweet(event)
|
||||
break if handle_tweet_event(event) == :break
|
||||
when GDL::TweetMediaEvent
|
||||
when GDLClient::TweetMediaEvent
|
||||
handle_media_event(event)
|
||||
else
|
||||
raise("unhandled event #{event}")
|
||||
fatal_error("unhandled event: `#{event.inspect}`")
|
||||
end
|
||||
end
|
||||
|
||||
logger.info(
|
||||
"created #{@num_created_tweets.to_s.bold} / " +
|
||||
"scanned #{@num_scanned_tweets.to_s.bold} tweets, " +
|
||||
"created #{@num_created_medias} medias"
|
||||
)
|
||||
logger.info([
|
||||
"created #{@num_created_tweets.to_s.bold} tweets",
|
||||
"scanned #{@num_scanned_tweets.to_s.bold} tweets",
|
||||
"created #{@num_created_medias.to_s.bold} medias",
|
||||
].join(", "))
|
||||
|
||||
@user.scanned_timeline_at = Time.now
|
||||
@user.state = "ok"
|
||||
@@ -87,28 +88,31 @@ class Domain::Twitter::Job::UserTimelineTweetsJob < Domain::Twitter::Job::Twitte
|
||||
private
|
||||
|
||||
def maybe_extract_user_info(http_event)
|
||||
return if @user&.id
|
||||
return unless http_event.response_code == 200
|
||||
return unless http_event.response_headers[:'content-type'].starts_with?("application/json")
|
||||
json = JSON.parse(http_event.body)
|
||||
return unless json
|
||||
typename = json.dig("data", "user", "result", "__typename")
|
||||
return unless typename == "User"
|
||||
user_id = json.dig("data", "user", "result", "rest_id")
|
||||
return unless user_id
|
||||
user_id = user_id.to_i
|
||||
tw_id = json.dig("data", "user", "result", "rest_id")
|
||||
return unless tw_id
|
||||
tw_id = tw_id.to_i
|
||||
|
||||
existing = Domain::Twitter::User.find_by(tw_id: user_id)
|
||||
# tw_id is already up to date, do nothing
|
||||
return if @user.tw_id == tw_id
|
||||
|
||||
existing = Domain::Twitter::User.find_by(tw_id: tw_id)
|
||||
if existing
|
||||
logger.info "found existing user model"
|
||||
logger.info "found existing user with tw_id #{tw_id.to_s.bold}"
|
||||
@user = existing
|
||||
@updated_user_from_tweet = false
|
||||
else
|
||||
if @user.tw_id
|
||||
raise "tw_id mismatch" unless @user.tw_id == user_id
|
||||
raise "tw_id mismatch" unless @user.tw_id == tw_id
|
||||
else
|
||||
logger.info "extracted tw_id for user"
|
||||
@user.tw_id = user_id
|
||||
logger.info "extracted tw_id #{tw_id.to_s.bold}"
|
||||
@user.tw_id = tw_id
|
||||
@user.save!
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -11,7 +11,11 @@ class Scraper::JobBase < ApplicationJob
|
||||
end
|
||||
|
||||
def http_client
|
||||
@http_client ||= Scraper::HttpFactory.send(self.class.http_factory_method)
|
||||
@http_client ||= Scraper::ClientFactory.send(self.class.http_factory_method)
|
||||
end
|
||||
|
||||
def gallery_dl_client
|
||||
@gallery_dl_client ||= Scraper::ClientFactory.get_gallery_dl_client
|
||||
end
|
||||
|
||||
good_job_control_concurrency_with(
|
||||
|
||||
58
app/lib/scraper/client_factory.rb
Normal file
58
app/lib/scraper/client_factory.rb
Normal file
@@ -0,0 +1,58 @@
|
||||
class Scraper::ClientFactory
|
||||
@http_clients = Concurrent::ThreadLocalVar.new() { {} }
|
||||
@gallery_dl_clients = Concurrent::ThreadLocalVar.new(nil)
|
||||
|
||||
# for testing only
|
||||
def self.http_client_mock=(mock)
|
||||
raise unless Rails.env.test?
|
||||
@http_client_mock = mock
|
||||
end
|
||||
|
||||
def self.gallery_dl_client_mock=(mock)
|
||||
raise unless Rails.env.test?
|
||||
@gallery_dl_client_mock = mock
|
||||
end
|
||||
|
||||
# public API
|
||||
def self.get_gallery_dl_client
|
||||
if Rails.env.test?
|
||||
@gallery_dl_client_mock || raise("no gallery dl mock set")
|
||||
else
|
||||
_gallery_dl_client_impl
|
||||
end
|
||||
end
|
||||
|
||||
def self.get_twitter_http_client
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:twitter, Scraper::TwitterHttpClientConfig)
|
||||
end
|
||||
end
|
||||
|
||||
def self.get_fa_http_client
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no http client mock set")
|
||||
else
|
||||
_http_client_impl(:fa, Scraper::FaHttpClientConfig)
|
||||
end
|
||||
end
|
||||
|
||||
def self._gallery_dl_client_impl
|
||||
@gallery_dl_clients.value ||= begin
|
||||
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
|
||||
if proxy_config[:gallery_dl].blank?
|
||||
raise("no gallery_dl host defined for proxy config #{proxy_config[:name]}")
|
||||
end
|
||||
Scraper::GalleryDlClient.new(proxy_config[:name], proxy_config[:gallery_dl])
|
||||
end
|
||||
end
|
||||
|
||||
def self._http_client_impl(key, config_klass)
|
||||
@http_clients.value[key] ||= begin
|
||||
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
|
||||
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
|
||||
Scraper::HttpClient.new(config_klass.new, performer)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -14,6 +14,7 @@ class Scraper::GalleryDlClient
|
||||
:response_time_ms,
|
||||
:body,
|
||||
:log_entry,
|
||||
keyword_init: true,
|
||||
)
|
||||
TweetEvent = Struct.new(
|
||||
:tweet,
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
class Scraper::HttpFactory
|
||||
@http_clients = Concurrent::ThreadLocalVar.new() { {} }
|
||||
|
||||
def self.http_client_mock=(mock)
|
||||
@http_client_mock = mock
|
||||
end
|
||||
|
||||
def self.new_fa_http_client
|
||||
if Rails.env.test?
|
||||
@http_client_mock || raise("no mock set yet")
|
||||
else
|
||||
get_or_create_client(:fa, Scraper::FaHttpClientConfig.new)
|
||||
end
|
||||
end
|
||||
|
||||
def self.new_gallery_dl_client
|
||||
raise("not implemented yet")
|
||||
end
|
||||
|
||||
def self.get_or_create_client(key, config)
|
||||
@http_clients.value[key] ||= begin
|
||||
proxy_config = Rails.application.config.x.proxy || raise("no proxy config")
|
||||
performer = Scraper::HttpPerformer.new(proxy_config[:name], proxy_config[:http])
|
||||
Scraper::HttpClient.new(config, performer)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,4 +1,4 @@
|
||||
class Scraper::TwitterHttpClient < Scraper::BaseHttpClient
|
||||
class Scraper::TwitterHttpClientConfig < Scraper::HttpClientConfig
|
||||
DEFAULT_ALLOWED_DOMAINS = [
|
||||
"*.twimg.com",
|
||||
"ipinfo.io",
|
||||
@@ -18,4 +18,8 @@ class Scraper::TwitterHttpClient < Scraper::BaseHttpClient
|
||||
def allowed_domains
|
||||
DEFAULT_ALLOWED_DOMAINS
|
||||
end
|
||||
|
||||
def redirect_limit
|
||||
4
|
||||
end
|
||||
end
|
||||
2
db/schema.rb
generated
2
db/schema.rb
generated
@@ -195,6 +195,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_25_124842) do
|
||||
end
|
||||
|
||||
create_table "domain_twitter_users", force: :cascade do |t|
|
||||
t.integer "tw_id"
|
||||
t.integer "state"
|
||||
t.json "state_detail"
|
||||
t.json "raw_data"
|
||||
@@ -209,6 +210,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_03_25_124842) do
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["name"], name: "index_domain_twitter_users_on_name", unique: true
|
||||
t.index ["tw_id"], name: "index_domain_twitter_users_on_tw_id", unique: true
|
||||
end
|
||||
|
||||
create_table "flat_sst_entries", id: false, force: :cascade do |t|
|
||||
|
||||
56
spec/helpers/twitter_helpers.rb
Normal file
56
spec/helpers/twitter_helpers.rb
Normal file
@@ -0,0 +1,56 @@
|
||||
module TwitterHelpers
|
||||
def set_up_gallery_dl_mock(mock, sequence)
|
||||
sequence.each do |seq|
|
||||
expected = receive(seq[:receive])
|
||||
|
||||
if seq[:with].respond_to?(:call)
|
||||
expected = expected.with(*seq[:with].call(sequence))
|
||||
else
|
||||
expected = expected.with(*seq[:with])
|
||||
end if seq[:with]
|
||||
|
||||
expected = expected.and_return(seq[:return]) if seq[:return]
|
||||
|
||||
expect(mock).to(expected)
|
||||
end
|
||||
|
||||
allow(mock).to(receive(:next_message).and_raise(RuntimeError.new))
|
||||
end
|
||||
|
||||
def gallery_dl_user_with_no_tweets_sequence(mock)
|
||||
set_up_gallery_dl_mock(mock, [
|
||||
{
|
||||
receive: :start_twitter_user,
|
||||
with: ["curtus", { caused_by_entry: nil }],
|
||||
},
|
||||
{
|
||||
receive: :next_message,
|
||||
with: [{ caused_by_entry: nil }],
|
||||
return: GDLClient::HttpRequestEvent.new(
|
||||
log_entry: instance_double("::HttpLogEntry"),
|
||||
response_code: 200,
|
||||
response_headers: {
|
||||
:'content-type' => "application/json",
|
||||
},
|
||||
body: {
|
||||
"data" => {
|
||||
"user" => {
|
||||
"result" => {
|
||||
"__typename" => "User",
|
||||
"rest_id" => "1234567",
|
||||
},
|
||||
},
|
||||
},
|
||||
}.to_json,
|
||||
),
|
||||
},
|
||||
{
|
||||
receive: :next_message,
|
||||
with: proc { |sequence|
|
||||
[{ caused_by_entry: sequence[1][:return].log_entry }]
|
||||
},
|
||||
return: GDLClient::FinishEvent.new,
|
||||
},
|
||||
])
|
||||
end
|
||||
end
|
||||
@@ -3,11 +3,7 @@ require "rails_helper"
|
||||
describe Domain::Fa::Job::BrowsePageJob do
|
||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
||||
before do
|
||||
Scraper::HttpFactory.http_client_mock = http_client_mock
|
||||
end
|
||||
|
||||
around do |block|
|
||||
ColorLogger.quiet(&block)
|
||||
Scraper::ClientFactory.http_client_mock = http_client_mock
|
||||
end
|
||||
|
||||
shared_context "user and post getters" do
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
describe Domain::Twitter::Job::UserTimelineTweetsJob do
|
||||
GDLClient = Scraper::GalleryDlClient
|
||||
|
||||
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
|
||||
let(:gallery_dl_client_mock) { instance_double("::Scraper::GalleryDlClient") }
|
||||
before do
|
||||
Scraper::ClientFactory.http_client_mock = http_client_mock
|
||||
Scraper::ClientFactory.gallery_dl_client_mock = gallery_dl_client_mock
|
||||
end
|
||||
|
||||
context "scanning an unseen user" do
|
||||
context "and the user has no tweets" do
|
||||
it "creates the user" do
|
||||
gallery_dl_user_with_no_tweets_sequence(gallery_dl_client_mock)
|
||||
|
||||
expect do
|
||||
expect(described_class.perform_now({
|
||||
name: "curtus",
|
||||
})).to_not be_a(Exception)
|
||||
end.to change { Domain::Twitter::User.count }.by(1)
|
||||
user = Domain::Twitter::User.find_by(name: "curtus")
|
||||
expect(user).to_not be_nil
|
||||
expect(user.tw_id).to eq(1234567)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "scanning an exisitng user with no tw_id" do
|
||||
it "updates the user", quiet: false do
|
||||
gallery_dl_user_with_no_tweets_sequence(gallery_dl_client_mock)
|
||||
user = Domain::Twitter::User.create!(name: "curtus")
|
||||
expect do
|
||||
expect(described_class.perform_now({
|
||||
name: "curtus",
|
||||
})).to_not be_a(Exception)
|
||||
end.not_to change { Domain::Twitter::User.count }
|
||||
|
||||
user.reload
|
||||
expect(user.tw_id).to eq(1234567)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -13,7 +13,20 @@
|
||||
# it.
|
||||
#
|
||||
# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
||||
require "./spec/helpers/twitter_helpers"
|
||||
|
||||
RSpec.configure do |config|
|
||||
config.include TwitterHelpers
|
||||
|
||||
# can tag classes with `quiet: false` to make ColorLogger loud
|
||||
config.around(:each) do |example|
|
||||
if example.example.metadata[:quiet].is_a?(FalseClass)
|
||||
example.call
|
||||
else
|
||||
ColorLogger.quiet(&example)
|
||||
end
|
||||
end
|
||||
|
||||
# rspec-expectations config goes here. You can use an alternate
|
||||
# assertion/expectation library such as wrong or the stdlib/minitest
|
||||
# assertions if you prefer.
|
||||
|
||||
Reference in New Issue
Block a user