Add testing utilities and improve test coverage with FactoryBot integration

- Added `shoulda-matchers` for enhanced RSpec testing capabilities.
- Introduced `factory_bot_rails` for easier test data creation.
- Created factories for `HttpLogEntry`, `BlobEntry`, and `Domain::Fa::Post` models.
- Updated `rails_helper.rb` to include FactoryBot methods and configure Shoulda matchers.
- Enhanced `HttpLogEntry` model with a new `response_size` method.
- Refactored `justfile` to include parallel test execution.
- Improved `Gemfile` and `Gemfile.lock` with new testing gems.
This commit is contained in:
Dylan Knutson
2024-12-27 16:59:27 +00:00
parent d6ff5f2ebf
commit 5c1807711b
23 changed files with 1006 additions and 232 deletions

View File

@@ -87,6 +87,9 @@ group :test do
gem "rails-controller-testing"
gem "selenium-webdriver"
gem "webdrivers"
gem "shoulda-matchers"
gem "factory_bot_rails"
gem "parallel_tests"
end
gem "xdiff", path: "/gems/xdiff-rb"

View File

@@ -153,6 +153,11 @@ GEM
et-orbi (1.2.11)
tzinfo
execjs (2.8.1)
factory_bot (6.5.0)
activesupport (>= 5.0.0)
factory_bot_rails (6.4.4)
factory_bot (~> 6.5)
railties (>= 5.0.0)
faiss (0.2.5)
numo-narray
rice (>= 4.0.2)
@@ -232,6 +237,9 @@ GEM
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
numo-narray (0.9.2.1)
parallel (1.26.3)
parallel_tests (4.7.2)
parallel
pg (1.4.5)
pg_query (4.2.1)
google-protobuf (>= 3.22.3)
@@ -365,6 +373,8 @@ GEM
rack-proxy (>= 0.6.1)
railties (>= 5.2)
semantic_range (>= 2.3.0)
shoulda-matchers (6.4.0)
activesupport (>= 5.2.0)
sprockets (4.2.0)
concurrent-ruby (~> 1.0)
rack (>= 2.2.4, < 4)
@@ -433,6 +443,7 @@ DEPENDENCIES
diffy
discard
disco
factory_bot_rails
faiss
good_job (~> 4.6)
htmlbeautifier
@@ -442,6 +453,7 @@ DEPENDENCIES
memory_profiler
neighbor
nokogiri
parallel_tests
pg
pg_query (>= 2)
pghero!
@@ -469,6 +481,7 @@ DEPENDENCIES
sd_notify
selenium-webdriver
shakapacker
shoulda-matchers
sprockets-rails
sqlite3 (~> 1.4)
stackprof

View File

@@ -124,6 +124,7 @@ class Domain::Fa::PostsController < ApplicationController
# Use callbacks to share common setup or constraints between actions.
def set_domain_fa_post
@post = Domain::Fa::Post.find_by_fa_id!(params[:fa_id])
@post =
Domain::Fa::Post.includes(:creator, :file).find_by_fa_id!(params[:fa_id])
end
end

View File

@@ -36,10 +36,7 @@ class Domain::Fa::Post < ReduxApplicationRecord
autosave: true
# If the file was scraped, this is the blob entry that represents it
belongs_to :file,
class_name: "::HttpLogEntry",
optional: :true,
autosave: true
belongs_to :file, class_name: "::HttpLogEntry", optional: true, autosave: true
has_many :fav_post_joins, class_name: "::Domain::Fa::Fav", inverse_of: :post
@@ -108,7 +105,7 @@ class Domain::Fa::Post < ReduxApplicationRecord
def scanned_at=(time)
unless time.nil?
unless time.is_a?(Time)
raise ArgumentError("time must be Time, was #{time.class}")
raise ArgumentError.new("time must be Time, was #{time.class}")
end
end
self.state_detail["scanned_at"] = time&.to_i

View File

@@ -44,6 +44,14 @@ class HttpLogEntry < ReduxApplicationRecord
find_by(uri_host: uri.host, uri_path: uri.path)
end
def response_size
if association(:response).loaded?
response.size
else
BlobEntryP.where(sha256: response_sha256).pick(:size)
end
end
def uri=(uri)
uri = Addressable::URI.parse(uri)
self.uri_scheme = uri.scheme
@@ -67,25 +75,4 @@ class HttpLogEntry < ReduxApplicationRecord
the_str += "\##{uri_hash}" if uri_hash
the_str
end
def to_bulk_insert_hash
{
id: self.id,
uri_scheme: self.uri_scheme,
uri_host: self.uri_host,
uri_path: self.uri_path,
uri_query: self.uri_query,
uri_hash: self.uri_hash,
verb: self.verb,
content_type: self.content_type,
status_code: self.status_code,
response_time_ms: self.response_time_ms,
request_headers_id: self.request_headers_id,
response_headers_id: self.response_headers_id,
response_sha256: self.response_sha256,
requested_at: self.requested_at,
created_at: self.created_at,
updated_at: self.updated_at,
}
end
end

View File

@@ -49,10 +49,6 @@ class HttpLogEntryHeader < ReduxApplicationRecord
hash[key].gsub!(/\d\d:\d\d:\d\d/, "(scrubbed)") if hash[key]
end
def to_bulk_insert_hash
{ sha256: sha256, headers: headers }
end
def self.empty
@empty_model ||= self.find_or_create(headers: {})
end

View File

@@ -58,23 +58,10 @@
<% end %>
</section>
<% if @post.file %>
<section class="sky-section">
<div class="section-header">File Details</div>
<div class="bg-slate-100 p-4">
<div class="flex flex-wrap gap-x-4 text-sm text-slate-600">
<span>
<i class="fa-regular fa-file mr-1"></i>
Type: <%= @post.file.content_type %>
</span>
<span>
<i class="fa-solid fa-weight-hanging mr-1"></i>
Size: <%= number_to_human_size(@post.file.response.size) %>
</span>
</div>
</div>
</section>
<% end %>
<%= render partial: "log_entries/file_details_sky_section",
locals: {
log_entry: @post.file,
} %>
<section class="sky-section">
<% if (post_description_html = @post.description) %>

View File

@@ -7,6 +7,9 @@
<span><%= fa_user_account_status(user) %></span> •
<span><%= user.state %></span>
</div>
<div class="text-sm text-slate-500">
Registered <%= time_ago_in_words(user.registered_at) %> ago
</div>
</div>
</div>
<a

View File

@@ -0,0 +1,21 @@
<% if log_entry %>
<section class="sky-section">
<div class="section-header">File Details</div>
<div class="bg-slate-100 p-4">
<div class="flex flex-wrap gap-x-4 text-sm text-slate-600">
<span>
<i class="fa-regular fa-file mr-1"></i>
Type: <%= log_entry.content_type %>
</span>
<span>
<i class="fa-solid fa-weight-hanging mr-1"></i>
Size: <%= number_to_human_size(log_entry.response_size) %>
</span>
<span>
<i class="fa-solid fa-clock mr-1"></i>
Response Time: <%= log_entry.response_time_ms %>ms
</span>
</div>
</div>
</section>
<% end %>

View File

@@ -39,7 +39,7 @@ local_redux_test: &local_redux_test
adapter: postgresql
host: db
port: 5432
database: redux_test
database: redux_test<%= ENV['TEST_ENV_NUMBER'] %>
# username: scraper_redux
# password: pdkFLqRmQwPUPaDDC4pX
username: postgres

612
db/schema.rb generated

File diff suppressed because it is too large Load Diff

View File

@@ -15,3 +15,7 @@ format-all:
psql-dump-domain-fa-favs:
@psql -P pager=off -c 'select user_id, post_id, 1 from domain_fa_favs limit 10000000;' -d redux_prod -h 10.166.33.171 -U scraper_redux -t -A -F ' '
test:
RAILS_ENV=test bin/rails test
RAILS_ENV=test bin/rake parallel:spec

View File

@@ -34,11 +34,29 @@ module IndexedPostsRake
Rails.logger.info(
"Ensuring indexed post on #{relation.name.bold} (#{total} total to process)",
)
progress = ProgressBar.create(total: total, throttle_rate: 0.2)
relation.find_each do |post|
post.ensure_indexed_post!
post.save!
progress.increment
progress =
ProgressBar.create(
total: total,
throttle_rate: 0.2,
format: "%t: %c/%C |%B| %R/s %P%% %e",
)
pool = Concurrent::FixedThreadPool.new(2)
mutex = Mutex.new
relation.find_in_batches do |batch|
pool.post do
relation.model.transaction do
batch.each do |post|
post.ensure_indexed_post!
post.save!
mutex.synchronize { progress.increment }
end
end
end
end
pool.shutdown
pool.wait_for_termination
end
end

View File

@@ -0,0 +1,19 @@
FactoryBot.define do
factory :blob_entry_p do
transient { content { "test content" } }
content_type { "text/plain" }
sha256 { Digest::SHA256.digest(content) }
contents { content }
size { content.size }
trait :html do
content_type { "text/html" }
end
trait :json do
content_type { "application/json" }
content { "{}" }
end
end
end

View File

@@ -0,0 +1,16 @@
FactoryBot.define do
factory :domain_fa_post, class: "Domain::Fa::Post" do
sequence(:fa_id) { |n| n }
state { :ok }
state_detail { {} }
log_entry_detail { {} }
trait :with_creator do
association :creator, factory: :domain_fa_user
end
trait :with_file do
association :file, factory: :http_log_entry
end
end
end

View File

@@ -0,0 +1,6 @@
FactoryBot.define do
factory :domain_fa_user, class: "Domain::Fa::User" do
sequence(:url_name) { |n| "user#{n}" }
sequence(:name) { |n| "User #{n}" }
end
end

View File

@@ -0,0 +1,53 @@
FactoryBot.define do
factory :http_log_entry do
sequence(:uri_host) { |n| "example#{n}.com" }
uri_scheme { "https" }
uri_path { "/path" }
uri_query { nil }
verb { :get }
status_code { 200 }
content_type { "text/html" }
response_time_ms { 100 }
requested_at { Time.current }
created_at { Time.current }
performed_by { "direct" }
# Create associated records
association :response, factory: :blob_entry_p
association :request_headers, factory: :http_log_entry_header
association :response_headers, factory: :http_log_entry_header
# Set the response_sha256 to match the associated response
after(:build) do |entry|
entry.response_sha256 = entry.response.sha256 if entry.response
end
trait :post_request do
verb { :post }
end
trait :with_query do
uri_query { "foo=bar&baz=qux" }
end
trait :with_fragment do
uri_hash { "section1" }
end
trait :with_error do
status_code { 404 }
end
trait :with_caused_by do
association :caused_by_entry, factory: :http_log_entry
end
trait :legacy do
performed_by { "legacy" }
end
trait :proxy do
performed_by { "proxy-1" }
end
end
end

View File

@@ -0,0 +1,11 @@
FactoryBot.define do
factory :http_log_entry_header do
headers do
{
"x-request-id" => SecureRandom.hex(8),
"x-session-id" => SecureRandom.uuid,
}
end
sha256 { Digest::SHA256.digest(headers.sort.to_h.to_s) }
end
end

View File

@@ -1,6 +1,157 @@
require "rails_helper"
describe Domain::Fa::Post do
RSpec.describe Domain::Fa::Post do
describe "validations" do
let(:post) { build(:domain_fa_post) }
it "requires fa_id" do
post.fa_id = nil
expect(post).not_to be_valid
expect(post.errors[:fa_id]).to include("can't be blank")
end
it "requires state" do
post.state = nil
expect(post).not_to be_valid
expect(post.errors[:state]).to include("can't be blank")
end
it "only allows valid states" do
expect { post.state = :invalid_state }.to raise_error(
ArgumentError,
"'invalid_state' is not a valid state",
)
%i[ok removed scan_error file_error].each do |state|
expect { post.state = state }.not_to raise_error
end
end
end
describe "associations" do
it { should belong_to(:creator).class_name("::Domain::Fa::User").optional }
it { should belong_to(:file).class_name("::HttpLogEntry").optional }
it { should have_many(:fav_post_joins).class_name("::Domain::Fa::Fav") }
it { should have_many(:faved_by).through(:fav_post_joins) }
it { should have_one(:disco).dependent(:destroy) }
end
describe "#scanned_at=" do
let(:post) { build(:domain_fa_post) }
it "accepts nil value" do
expect { post.scanned_at = nil }.not_to raise_error
expect(post.state_detail["scanned_at"]).to be_nil
end
it "accepts Time value" do
time = Time.current
post.scanned_at = time
expect(post.state_detail["scanned_at"]).to eq(time.to_i)
end
it "raises ArgumentError for non-Time values" do
expect { post.scanned_at = "2024-03-20" }.to raise_error(ArgumentError)
expect { post.scanned_at = Date.today }.to raise_error(ArgumentError)
end
end
describe "#scanned_at" do
let(:post) { build(:domain_fa_post) }
it "returns nil when no scanned_at is set" do
expect(post.scanned_at).to be_nil
end
it "returns Time object from stored timestamp" do
time = Time.current
post.scanned_at = time
expect(post.scanned_at).to be_within(1.second).of(time)
end
it "falls back to last_submission_page created_at" do
log_entry = create(:http_log_entry)
post.last_submission_page = log_entry
expect(post.scanned_at).to eq(log_entry.created_at)
end
end
describe "#file_uri" do
let(:post) { build(:domain_fa_post) }
it "returns nil when file_url_str is nil" do
post.file_url_str = nil
expect(post.file_uri).to be_nil
end
it "returns parsed URI when file_url_str is present" do
post.file_url_str = "http://example.com/file.jpg"
expect(post.file_uri).to be_a(Addressable::URI)
expect(post.file_uri.to_s).to eq("http://example.com/file.jpg")
end
end
describe "#file_uri=" do
let(:post) { build(:domain_fa_post) }
it "sets file_url_str to nil when given nil" do
post.file_uri = nil
expect(post.file_url_str).to be_nil
end
it "converts string URIs to Addressable::URI" do
post.file_uri = "http://example.com/file.jpg"
expect(post.file_url_str).to eq("http://example.com/file.jpg")
end
it "defaults to https when scheme is blank" do
post.file_uri = "//example.com/file.jpg"
expect(post.file_url_str).to eq("https://example.com/file.jpg")
end
end
describe "#thumbnail_uri" do
let(:post) { build(:domain_fa_post) }
it "returns nil when thumbnail_url_str is not present" do
expect(post.thumbnail_uri).to be_nil
end
it "returns parsed URI when thumbnail_url_str is present" do
post.thumbnail_uri = "http://example.com/thumb.jpg"
expect(post.thumbnail_uri).to be_a(Addressable::URI)
expect(post.thumbnail_uri.to_s).to eq("http://example.com/thumb.jpg")
end
end
describe "#scanned?" do
let(:post) { build(:domain_fa_post) }
it "returns true when file_url_str is present" do
post.file_url_str = "http://example.com/file.jpg"
expect(post).to be_scanned
end
it "returns false when file_url_str is blank" do
post.file_url_str = nil
expect(post).not_to be_scanned
end
end
describe "#have_file?" do
let(:post) { build(:domain_fa_post) }
it "returns true when file_id is present" do
post.file_id = 1
expect(post.have_file?).to be true
end
it "returns false when file_id is nil" do
post.file_id = nil
expect(post.have_file?).to be false
end
end
it "ensures indexed_post is created" do
post = SpecUtil.build_domain_fa_post(created_at: 1.day.ago)
post.save!

View File

@@ -0,0 +1,159 @@
require "rails_helper"
RSpec.describe HttpLogEntry, type: :model do
describe "validations" do
subject { build(:http_log_entry) }
it { should validate_presence_of(:uri_scheme) }
it { should validate_presence_of(:uri_host) }
it { should validate_presence_of(:uri_path) }
it { should validate_presence_of(:verb) }
it { should validate_presence_of(:performed_by) }
it { should validate_presence_of(:status_code) }
it { should validate_presence_of(:response_time_ms) }
it { should validate_presence_of(:content_type) }
it { should validate_presence_of(:requested_at) }
it { should validate_length_of(:response_sha256).is_equal_to(32) }
end
describe "associations" do
it { should belong_to(:response).class_name("::BlobEntryP") }
it { should belong_to(:request_headers).class_name("::HttpLogEntryHeader") }
it do
should belong_to(:response_headers).class_name("::HttpLogEntryHeader")
end
it do
should belong_to(:caused_by_entry).class_name("::HttpLogEntry").optional
end
it { should have_many(:triggered_entries).class_name("::HttpLogEntry") }
end
describe "immutability" do
let(:entry) { create(:http_log_entry) }
it "prevents updates" do
expect { entry.update!(verb: :post) }.to raise_error(
ActiveRecord::ReadOnlyRecord,
)
end
it "prevents deletion" do
expect { entry.destroy }.to raise_error(ActiveRecord::ReadOnlyRecord)
end
end
describe "#uri=" do
let(:entry) { build(:http_log_entry) }
it "parses and sets URI components" do
entry.uri = "https://example.com/path?query=value#fragment"
expect(entry.uri_scheme).to eq("https")
expect(entry.uri_host).to eq("example.com")
expect(entry.uri_path).to eq("/path")
expect(entry.uri_query).to eq("query=value")
expect(entry.uri_hash).to eq("fragment")
end
it "handles URIs without optional components" do
entry.uri = "https://example.com/path"
expect(entry.uri_scheme).to eq("https")
expect(entry.uri_host).to eq("example.com")
expect(entry.uri_path).to eq("/path")
expect(entry.uri_query).to be_nil
expect(entry.uri_hash).to be_nil
end
it "parses complex URLs with multiple query parameters" do
entry.uri =
"https://www.example.com/big/path/here?and=query&other=query2#smaz"
expect(entry.uri_scheme).to eq("https")
expect(entry.uri_host).to eq("www.example.com")
expect(entry.uri_path).to eq("/big/path/here")
expect(entry.uri_query).to eq("and=query&other=query2")
expect(entry.uri_hash).to eq("smaz")
end
end
describe "#uri" do
let(:entry) { build(:http_log_entry, :with_query, :with_fragment) }
it "reconstructs the full URI" do
expect(entry.uri).to be_a(Addressable::URI)
expect(entry.uri.to_s).to eq(
"https://#{entry.uri_host}/path?foo=bar&baz=qux#section1",
)
end
end
describe "#uri_str" do
it "returns full URI string" do
entry = build(:http_log_entry, :with_query, :with_fragment)
expect(entry.uri_str).to eq(
"https://#{entry.uri_host}/path?foo=bar&baz=qux#section1",
)
end
it "handles URI without optional components" do
entry = build(:http_log_entry)
expect(entry.uri_str).to eq("https://#{entry.uri_host}/path")
end
end
describe "#uri_str_without_host" do
it "returns path and query components" do
entry = build(:http_log_entry, :with_query, :with_fragment)
expect(entry.uri_str_without_host).to eq("/path?foo=bar&baz=qux#section1")
end
it "handles path only" do
entry = build(:http_log_entry)
expect(entry.uri_str_without_host).to eq("/path")
end
end
describe ".find_by_uri_host_path" do
let!(:entry) { create(:http_log_entry) }
it "finds entry by URI string" do
found =
described_class.find_by_uri_host_path("https://#{entry.uri_host}/path")
expect(found).to eq(entry)
end
it "finds entry by Addressable::URI" do
uri = Addressable::URI.parse("https://#{entry.uri_host}/path")
found = described_class.find_by_uri_host_path(uri)
expect(found).to eq(entry)
end
it "returns nil for non-existent URI" do
found =
described_class.find_by_uri_host_path("https://nonexistent.com/path")
expect(found).to be_nil
end
end
describe "#response_size" do
let(:entry) { create(:http_log_entry) }
context "when response association is loaded" do
it "returns size from response object" do
test_content = "test content"
entry.response = build(:blob_entry_p, content: test_content)
expect(entry.response_size).to eq(test_content.bytesize)
end
end
context "when response association is not loaded" do
it "queries size directly from database" do
size = entry.response.size
entry.association(:response).reset
expect(entry.response_size).to eq(size)
end
end
end
end

View File

@@ -66,6 +66,16 @@ RSpec.configure do |config|
config.filter_rails_from_backtrace!
# arbitrary gems may also be filtered via:
# config.filter_gems_from_backtrace("gem name")
# Add FactoryBot methods
config.include FactoryBot::Syntax::Methods
end
require "spec_util"
Shoulda::Matchers.configure do |config|
config.integrate do |with|
with.test_framework :rspec
with.library :rails
end
end

View File

@@ -3,6 +3,7 @@ require "test_helper"
class Domain::Fa::UsersControllerTest < ActionDispatch::IntegrationTest
setup do
@user = TestUtil.build_fa_user
@user.registered_at = 1.year.ago
@user.save!
end

View File

@@ -1,60 +0,0 @@
require "test_helper"
class HttpLogEntryTest < ActiveSupport::TestCase
def make_required_params
{
request_headers_id: TestUtil.create_http_headers.id,
response_headers_id: TestUtil.create_http_headers.id,
response_sha256: TestUtil.create_blob_entry.sha256,
response_time_ms: 125,
content_type: "test/text",
verb: :get,
requested_at: Time.now,
uri_scheme: "https",
uri_host: "example.com",
uri_path: "/foo/bar",
status_code: 200,
performed_by: "direct"
}
end
test "requires required params" do
model = ::HttpLogEntry.new(make_required_params)
assert model.valid?, model.errors.full_messages
make_required_params.keys.each do |target_key|
params_without_key = make_required_params
params_without_key.delete(target_key)
refute ::HttpLogEntry.new(params_without_key).valid?
end
end
test "uri is correct" do
uri_str =
"https://www.example.com/big/path/here?and=query&other=query2#smaz"
uri = Addressable::URI.parse(uri_str)
model = ::HttpLogEntry.new({ uri: uri_str })
assert_equal "https", model.uri_scheme
assert_equal "www.example.com", model.uri_host
assert_equal "/big/path/here", model.uri_path
assert_equal "and=query&other=query2", model.uri_query
assert_equal "smaz", model.uri_hash
assert_equal uri, model.uri
assert_equal uri_str, model.uri_str
end
test "model cannot be updated" do
model = ::HttpLogEntry.new(make_required_params)
model.save!
model.verb = :post
assert_raises(ActiveRecord::ReadOnlyRecord) { model.save! }
model.reload
assert_equal "get", model.verb
end
test "model cannot be deleted" do
model = ::HttpLogEntry.new(make_required_params)
model.save!
assert_raises(ActiveRecord::ReadOnlyRecord) { model.destroy }
end
end