feat: implement Bluesky scan posts job and enhance user scanning

- Add new ScanPostsJob for scanning Bluesky posts
- Enhance ScanUserJob with improved error handling and logging
- Update BlueskyPost model with new fields and validation
- Add auxiliary tables for Bluesky posts
- Improve job base classes with better color logging
- Update specs with proper HTTP mocking patterns
- Add factory for BlueskyPost testing
This commit is contained in:
Dylan Knutson
2025-08-10 18:41:01 +00:00
parent 5c71fc6b15
commit eba4b58666
20 changed files with 796 additions and 548 deletions

View File

@@ -6,7 +6,7 @@ FactoryBot.define do
sequence(:at_uri) do |n|
"at://did:plc:#{n.to_s.rjust(10, "0")}/app.bsky.feed.post/rkey#{n}"
end
bluesky_created_at { Time.now }
posted_at { Time.now }
state { "ok" }
text { "Hello from Bluesky" }
hashtags { %w[test bluesky] }

View File

@@ -0,0 +1,332 @@
# typed: false
# frozen_string_literal: true
require "rails_helper"
RSpec.describe Domain::Bluesky::Job::ScanPostsJob do
include PerformJobHelpers
let(:user) do
create(
:domain_user_bluesky_user,
did: "did:plc:test123",
handle: "testuser.bsky.social",
scanned_posts_at: nil,
state: "ok",
)
end
describe "#perform" do
context "when user is in ok state" do
let(:posts_response_body) do
{
"records" => [
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post1",
"cid" => "bafyreiapost123",
"value" => {
"text" => "Hello world with image!",
"createdAt" => "2025-01-08T12:00:00.000Z",
"embed" => {
"$type" => "app.bsky.embed.images",
"images" => [
{
"alt" => "Test image",
"aspectRatio" => {
"width" => 1920,
"height" => 1080,
},
"image" => {
"$type" => "blob",
"ref" => {
"$link" => "bafkreiimage123",
},
"mimeType" => "image/jpeg",
"size" => 256_000,
},
},
],
},
},
},
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post2",
"cid" => "bafyreiapost456",
"value" => {
"text" => "Just a text post",
"createdAt" => "2025-01-08T11:00:00.000Z",
},
},
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post3",
"cid" => "bafyreiapost789",
"value" => {
"text" => "Post with external embed",
"createdAt" => "2025-01-08T10:00:00.000Z",
"embed" => {
"$type" => "app.bsky.embed.external",
"external" => {
"uri" => "https://example.com",
"title" => "Example Site",
"description" => "An example website",
"thumb" => {
"$type" => "blob",
"ref" => {
"$link" => "bafkreithumb123",
},
"mimeType" => "image/jpeg",
"size" => 50_000,
},
},
},
},
},
],
"cursor" => nil,
}.to_json
end
let(:client_mock_config) do
[
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100",
status_code: 200,
content_type: "application/json",
contents: posts_response_body,
},
]
end
before do
@log_entries = HttpClientMockHelpers.init_with(client_mock_config)
# Mock static file job enqueueing
allow(Domain::StaticFileJob).to receive(:perform_later)
end
it "scans user posts and updates scanned_posts_at" do
perform_now({ user: user })
user.reload
expect(user.scanned_posts_at).to be_present
end
it "creates posts with media and associated files" do
expect { perform_now({ user: user }) }.to change(
Domain::Post::BlueskyPost,
:count,
).by(2).and change(Domain::PostFile::BlueskyPostFile, :count).by(2)
# Check image post
image_post = Domain::Post::BlueskyPost.find_by(bluesky_rkey: "post1")
expect(image_post.text).to eq("Hello world with image!")
expect(image_post.creator).to eq(user)
image_file = image_post.files.first
expect(image_file.alt_text).to eq("Test image")
expect(image_file.blob_ref).to eq("bafkreiimage123")
expect(image_file.aspect_ratio_width).to eq(1920)
expect(image_file.aspect_ratio_height).to eq(1080)
expect(image_file.url_str).to eq(
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiimage123",
)
# Check external embed post
external_post = Domain::Post::BlueskyPost.find_by(bluesky_rkey: "post3")
expect(external_post.text).to eq("Post with external embed")
expect(external_post.creator).to eq(user)
external_file = external_post.files.first
expect(external_file.blob_ref).to eq("bafkreithumb123")
expect(external_file.url_str).to eq(
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreithumb123",
)
end
it "does not create posts without media" do
perform_now({ user: user })
# Should only create 2 posts (the ones with media), not the text-only post
expect(Domain::Post::BlueskyPost.count).to eq(2)
expect(Domain::Post::BlueskyPost.pluck(:text)).to contain_exactly(
"Hello world with image!",
"Post with external embed",
)
end
it "enqueues StaticFileJob for each post file" do
perform_now({ user: user })
# Should enqueue 2 StaticFileJobs (one for image, one for external thumbnail)
expect(Domain::StaticFileJob).to have_received(:perform_later).exactly(
2,
).times
end
it "does not create duplicate posts" do
# Create an existing post
existing_post =
create(
:domain_post_bluesky_post,
at_uri: "at://#{user.did}/app.bsky.feed.post/post1",
bluesky_rkey: "post1",
creator: user,
)
expect { perform_now({ user: user }) }.to change(
Domain::Post::BlueskyPost,
:count,
).by(1) # Only the external embed post should be created
expect(Domain::Post::BlueskyPost.find_by(bluesky_rkey: "post1")).to eq(
existing_post,
)
end
end
context "when user is not in ok state" do
let(:user) do
create(
:domain_user_bluesky_user,
did: "did:plc:test123",
handle: "testuser.bsky.social",
state: "error",
)
end
it "does not scan posts for users in error state" do
expect(Scraper::ClientFactory.http_client_mock).not_to receive(:get)
perform_now({ user: user })
end
end
context "when API returns error" do
let(:client_mock_config) do
[
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100",
status_code: 404,
content_type: "application/json",
contents: { "error" => "RepoNotFound" }.to_json,
},
]
end
before do
@log_entries = HttpClientMockHelpers.init_with(client_mock_config)
end
it "handles API errors gracefully" do
expect { perform_now({ user: user }) }.to raise_error(
/failed to get user posts/,
)
user.reload
expect(user.scanned_posts_at).to be_nil # consider the scan as not performed
expect(Domain::Post::BlueskyPost.count).to eq(0)
end
end
context "with pagination" do
let(:first_page_response) do
{
"records" => [
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post1",
"cid" => "bafyreiapost123",
"value" => {
"text" => "First post with image",
"createdAt" => "2025-01-08T12:00:00.000Z",
"embed" => {
"$type" => "app.bsky.embed.images",
"images" => [
{
"alt" => "First image",
"image" => {
"$type" => "blob",
"ref" => {
"$link" => "bafkreiimage123",
},
},
},
],
},
},
},
],
"cursor" => "next_page_cursor",
}.to_json
end
let(:second_page_response) do
{
"records" => [
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post2",
"cid" => "bafyreiapost456",
"value" => {
"text" => "Second post with image",
"createdAt" => "2025-01-08T11:00:00.000Z",
"embed" => {
"$type" => "app.bsky.embed.images",
"images" => [
{
"alt" => "Second image",
"image" => {
"$type" => "blob",
"ref" => {
"$link" => "bafkreiimage456",
},
},
},
],
},
},
},
],
"cursor" => nil,
}.to_json
end
let(:client_mock_config) do
[
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100",
status_code: 200,
content_type: "application/json",
contents: first_page_response,
},
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100&cursor=next_page_cursor",
status_code: 200,
content_type: "application/json",
contents: second_page_response,
caused_by_entry: :any,
},
]
end
before do
@log_entries = HttpClientMockHelpers.init_with(client_mock_config)
allow(Domain::StaticFileJob).to receive(:perform_later)
end
it "handles pagination correctly" do
expect { perform_now({ user: user }) }.to change(
Domain::Post::BlueskyPost,
:count,
).by(2)
posts = Domain::Post::BlueskyPost.order(:created_at)
expect(posts.first.text).to eq("First post with image")
expect(posts.second.text).to eq("Second post with image")
end
end
end
end

View File

@@ -6,7 +6,6 @@ require "rails_helper"
RSpec.describe Domain::Bluesky::Job::ScanUserJob do
include PerformJobHelpers
let(:http_client_mock) { instance_double("::Scraper::HttpClient") }
let(:user) do
create(
:domain_user_bluesky_user,
@@ -17,8 +16,6 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
)
end
before { Scraper::ClientFactory.http_client_mock = http_client_mock }
describe "#perform" do
context "when user profile scanning is due" do
let(:profile_response_body) do
@@ -39,50 +36,6 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
}.to_json
end
let(:posts_response_body) do
{
"records" => [
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post1",
"cid" => "bafyreiapost123",
"value" => {
"text" => "Hello world with image!",
"createdAt" => "2025-01-08T12:00:00.000Z",
"embed" => {
"$type" => "app.bsky.embed.images",
"images" => [
{
"alt" => "Test image",
"aspectRatio" => {
"width" => 1920,
"height" => 1080,
},
"image" => {
"$type" => "blob",
"ref" => {
"$link" => "bafkreiimage123",
},
"mimeType" => "image/jpeg",
"size" => 256_000,
},
},
],
},
},
},
{
"uri" => "at://#{user.did}/app.bsky.feed.post/post2",
"cid" => "bafyreiapost456",
"value" => {
"text" => "Just a text post",
"createdAt" => "2025-01-08T11:00:00.000Z",
},
},
],
"cursor" => nil,
}.to_json
end
let(:client_mock_config) do
[
{
@@ -92,26 +45,11 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
content_type: "application/json",
contents: profile_response_body,
},
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100",
status_code: 200,
content_type: "application/json",
contents: posts_response_body,
caused_by_entry: :any, # Accept any caused_by_entry for the posts call
},
]
end
before do
@log_entries =
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
# Mock static file job enqueueing - allow it but don't require it
allow(Domain::StaticFileJob).to receive(:perform_later)
@log_entries = HttpClientMockHelpers.init_with(client_mock_config)
end
it "scans user profile and updates user data" do
@@ -121,10 +59,21 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
expect(user.display_name).to eq("Test User")
expect(user.description).to eq("A test user profile")
expect(user.scanned_profile_at).to be_present
expect(user.scanned_posts_at).to be_present
expect(user.state).to eq("ok")
end
it "enqueues ScanPostsJob for posts scanning" do
# Clear any existing enqueued jobs first
SpecUtil.clear_enqueued_jobs!
perform_now({ user: user })
# Check that ScanPostsJob was enqueued with correct arguments
enqueued_jobs =
SpecUtil.enqueued_job_args(Domain::Bluesky::Job::ScanPostsJob)
expect(enqueued_jobs).to contain_exactly(hash_including(user: user))
end
it "creates avatar for user with pending state" do
expect { perform_now({ user: user }) }.to change {
user.reload.avatar.present?
@@ -150,37 +99,6 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
enqueued_jobs = SpecUtil.enqueued_job_args(Domain::UserAvatarJob)
expect(enqueued_jobs).to contain_exactly(hash_including(avatar: avatar))
end
it "creates posts with media and associated files" do
expect { perform_now({ user: user }) }.to change(
Domain::Post::BlueskyPost,
:count,
).by(1).and change(Domain::PostFile::BlueskyPostFile, :count).by(1)
post = Domain::Post::BlueskyPost.last
expect(post.text).to eq("Hello world with image!")
expect(post.creator).to eq(user)
expect(post.bluesky_rkey).to eq("post1")
file = post.files.first
expect(file.alt_text).to eq("Test image")
expect(file.blob_ref).to eq("bafkreiimage123")
expect(file.aspect_ratio_width).to eq(1920)
expect(file.aspect_ratio_height).to eq(1080)
expect(file.url_str).to eq(
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiimage123",
)
end
it "does not create posts without media" do
perform_now({ user: user })
# Should only create 1 post (the one with media), not the text-only post
expect(Domain::Post::BlueskyPost.count).to eq(1)
expect(Domain::Post::BlueskyPost.first.text).to eq(
"Hello world with image!",
)
end
end
context "avatar handling scenarios" do
@@ -202,8 +120,6 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
}.to_json
end
let(:posts_response_body) { { "records" => [], "cursor" => nil }.to_json }
let(:client_mock_config) do
[
{
@@ -213,23 +129,11 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
content_type: "application/json",
contents: profile_response_body,
},
{
uri:
"https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=#{user.did}&collection=app.bsky.feed.post&limit=100",
status_code: 200,
content_type: "application/json",
contents: posts_response_body,
caused_by_entry: :any, # Accept any caused_by_entry for the posts call
},
]
end
before do
@log_entries =
HttpClientMockHelpers.init_http_client_mock(
http_client_mock,
client_mock_config,
)
@log_entries = HttpClientMockHelpers.init_with(client_mock_config)
end
context "when user has existing avatar with same URL" do
@@ -263,19 +167,20 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
)
end
it "updates avatar URL and enqueues job" do
it "creates a new avatar and enqueues job" do
perform_now({ user: user })
new_avatar = user.reload.avatar
existing_avatar.reload
expect(existing_avatar.url_str).to eq(
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiavatar123",
"https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=oldavatar456",
)
expect(existing_avatar.state).to eq("pending")
expect(new_avatar.state).to eq("pending")
expect(new_avatar).to_not eq(existing_avatar)
# Should enqueue avatar job with the existing avatar
enqueued_jobs = SpecUtil.enqueued_job_args(Domain::UserAvatarJob)
expect(enqueued_jobs).to contain_exactly(
hash_including(avatar: existing_avatar),
hash_including(avatar: new_avatar),
)
end
end
@@ -310,7 +215,7 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
end
it "skips scanning if not due" do
expect(http_client_mock).not_to receive(:get)
expect(Scraper::ClientFactory.http_client_mock).not_to receive(:get)
perform_now({ user: user })
end
@@ -319,19 +224,27 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
describe "user creation callback" do
it "enqueues scan job when user is created" do
expect(Domain::Bluesky::Job::ScanUserJob).to receive(:perform_later).with(
{ user: instance_of(Domain::User::BlueskyUser) },
)
# Clear any existing enqueued jobs first
SpecUtil.clear_enqueued_jobs!
create(
:domain_user_bluesky_user,
did: "did:plc:newuser123",
handle: "newuser.bsky.social",
created_user =
create(
:domain_user_bluesky_user,
did: "did:plc:newuser123",
handle: "newuser.bsky.social",
)
# Check that ScanUserJob was enqueued with correct arguments
enqueued_jobs =
SpecUtil.enqueued_job_args(Domain::Bluesky::Job::ScanUserJob)
expect(enqueued_jobs).to contain_exactly(
hash_including(user: created_user),
)
end
it "does not enqueue scan job for users in error state" do
expect(Domain::Bluesky::Job::ScanUserJob).not_to receive(:perform_later)
# Clear any existing enqueued jobs first
SpecUtil.clear_enqueued_jobs!
create(
:domain_user_bluesky_user,
@@ -339,6 +252,11 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do
handle: "erroruser.bsky.social",
state: "error",
)
# Check that no ScanUserJob was enqueued
enqueued_jobs =
SpecUtil.enqueued_job_args(Domain::Bluesky::Job::ScanUserJob)
expect(enqueued_jobs).to be_empty
end
end
end

View File

@@ -102,7 +102,7 @@ RSpec.describe Tasks::Bluesky::Monitor do
)
expect(post.text).to eq("Check out this image!")
expect(post.bluesky_rkey).to eq("test123")
expect(post.bluesky_created_at).to eq(base_time)
expect(post.posted_at).to eq(base_time)
files = post.files.order(:file_order)
expect(files.count).to eq(2)