From 43876ef7c44112738d445ae9a717f2c476ff4a12 Mon Sep 17 00:00:00 2001 From: Dylan Knutson Date: Fri, 15 Aug 2025 22:15:09 +0000 Subject: [PATCH] use getProfile for scan user job --- app/jobs/domain/bluesky/job/scan_user_job.rb | 101 ++++++--- .../domain/bluesky/job/scan_user_job_spec.rb | 205 ++++++++++-------- 2 files changed, 184 insertions(+), 122 deletions(-) diff --git a/app/jobs/domain/bluesky/job/scan_user_job.rb b/app/jobs/domain/bluesky/job/scan_user_job.rb index 4df5e87b..f2db20fe 100644 --- a/app/jobs/domain/bluesky/job/scan_user_job.rb +++ b/app/jobs/domain/bluesky/job/scan_user_job.rb @@ -26,37 +26,14 @@ class Domain::Bluesky::Job::ScanUserJob < Domain::Bluesky::Job::Base private - sig { params(user: Domain::User::BlueskyUser).void } - def set_user_registration_time(user) - audit_log = http_client.get("https://plc.directory/#{user.did}/log/audit") - if audit_log.status_code != 200 - fatal_error( - format_tags( - "failed to get user registration time", - make_tags(status_code: audit_log.status_code), - ), - ) - end - - audit_log_data = - T.cast(JSON.parse(audit_log.body), T::Array[T::Hash[String, T.untyped]]) - if (data = audit_log_data.first) - registered_at = Time.parse(data["createdAt"]).in_time_zone("UTC") - user.registered_at = registered_at - logger.info( - format_tags("set user registration time", make_tags(registered_at:)), - ) - end - end - sig { params(user: Domain::User::BlueskyUser).void } def scan_user_profile(user) logger.info(format_tags("scanning user profile")) profile_scan = Domain::UserJobEvent::ProfileScan.create!(user:) - # Use AT Protocol API to get user profile + # Use Bluesky Actor API to get user profile profile_url = - "https://bsky.social/xrpc/com.atproto.repo.getRecord?repo=#{user.did}&collection=app.bsky.actor.profile&rkey=self" + "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=#{user.did}" response = http_client.get(profile_url) user.last_scan_log_entry = response.log_entry @@ -91,20 +68,29 @@ class Domain::Bluesky::Job::ScanUserJob < Domain::Bluesky::Job::Base ) end - record = profile_data["value"] + # The getProfile endpoint returns the profile data directly, not wrapped in "value" + record = profile_data if record # Update user profile information user.description = record["description"] user.display_name = record["displayName"] user.profile_raw = record - # Process avatar if present - if record["avatar"] && record["avatar"]["ref"] - process_user_avatar(user, record["avatar"]) + # Set registration time from profile createdAt + if record["createdAt"] + user.registered_at = Time.parse(record["createdAt"]).in_time_zone("UTC") + logger.info( + format_tags( + "set user registration time", + make_tags(registered_at: user.registered_at), + ), + ) end + + # Process avatar if present + process_user_avatar_url(user, record["avatar"]) if record["avatar"] end - set_user_registration_time(user) user.scanned_profile_at = Time.zone.now end @@ -175,4 +161,59 @@ class Domain::Bluesky::Job::ScanUserJob < Domain::Bluesky::Job::Base ) end end + + sig { params(user: Domain::User::BlueskyUser, avatar_url: String).void } + def process_user_avatar_url(user, avatar_url) + logger.debug( + format_tags("processing user avatar url", make_tags(avatar_url:)), + ) + return if avatar_url.blank? + + # Check if avatar already exists and is downloaded + existing_avatar = user.avatar + if existing_avatar.present? + logger.debug( + format_tags( + "existing avatar found", + make_tags(state: existing_avatar.state), + ), + ) + # Only enqueue if the avatar URL has changed or it's not downloaded yet + if existing_avatar.url_str != avatar_url + avatar = user.avatars.create!(url_str: avatar_url) + logger.info( + format_tags( + "avatar url changed, creating new avatar", + make_arg_tag(avatar), + ), + ) + defer_job( + Domain::UserAvatarJob, + { avatar: avatar }, + { queue: "bluesky", priority: -30 }, + ) + elsif existing_avatar.state_pending? + defer_job( + Domain::UserAvatarJob, + { avatar: existing_avatar }, + { queue: "bluesky", priority: -30 }, + ) + logger.info(format_tags("re-enqueued pending avatar download")) + end + else + # Create new avatar and enqueue download + avatar = user.avatars.create!(url_str: avatar_url) + defer_job( + Domain::UserAvatarJob, + { avatar: }, + { queue: "bluesky", priority: -30 }, + ) + logger.info( + format_tags( + "created avatar and enqueued download", + make_arg_tag(avatar), + ), + ) + end + end end diff --git a/spec/jobs/domain/bluesky/job/scan_user_job_spec.rb b/spec/jobs/domain/bluesky/job/scan_user_job_spec.rb index 635090c7..27fa4bb3 100644 --- a/spec/jobs/domain/bluesky/job/scan_user_job_spec.rb +++ b/spec/jobs/domain/bluesky/job/scan_user_job_spec.rb @@ -20,19 +20,13 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do context "when user profile scanning is due" do let(:profile_response_body) do { - "uri" => "at://#{user.did}/app.bsky.actor.profile/self", - "cid" => "bafyreiabc123", - "value" => { - "displayName" => "Test User", - "description" => "A test user profile", - "avatar" => { - "ref" => { - "$link" => "bafkreiavatar123", - }, - "mimeType" => "image/jpeg", - "size" => 50_000, - }, - }, + "did" => user.did, + "handle" => "testuser.bsky.social", + "displayName" => "Test User", + "description" => "A test user profile", + "avatar" => + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiavatar123", + "createdAt" => "2023-07-03T05:08:27.780Z", }.to_json end @@ -40,41 +34,11 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do [ { uri: - "https://bsky.social/xrpc/com.atproto.repo.getRecord?repo=#{user.did}&collection=app.bsky.actor.profile&rkey=self", + "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=#{user.did}", status_code: 200, content_type: "application/json", contents: profile_response_body, }, - { - uri: "https://plc.directory/#{user.did}/log/audit", - status_code: 200, - content_type: "application/json", - caused_by_entry_idx: 0, - contents: [ - { - did: "did:plc:le66o7kn5k4iqkxbih7gi4w2", - operation: { - sig: "signature", - prev: nil, - type: "plc_operation", - services: { - atproto_pds: { - type: "AtprotoPersonalDataServer", - endpoint: "https://bsky.social", - }, - }, - alsoKnownAs: ["at://handle.bsky.social"], - rotationKeys: ["rotation_key"], - verificationMethods: { - atproto: "verification_method", - }, - }, - cid: "cid", - nullified: false, - createdAt: "2023-07-03T05:08:27.780Z", - }, - ].to_json, - }, ] end @@ -146,19 +110,13 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do context "avatar handling scenarios" do let(:profile_response_body) do { - "uri" => "at://#{user.did}/app.bsky.actor.profile/self", - "cid" => "bafyreiabc123", - "value" => { - "displayName" => "Test User", - "description" => "A test user profile", - "avatar" => { - "ref" => { - "$link" => "bafkreiavatar123", - }, - "mimeType" => "image/jpeg", - "size" => 50_000, - }, - }, + "did" => user.did, + "handle" => "testuser.bsky.social", + "displayName" => "Test User", + "description" => "A test user profile", + "avatar" => + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiavatar123", + "createdAt" => "2023-07-03T05:08:27.780Z", }.to_json end @@ -166,41 +124,11 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do [ { uri: - "https://bsky.social/xrpc/com.atproto.repo.getRecord?repo=#{user.did}&collection=app.bsky.actor.profile&rkey=self", + "https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=#{user.did}", status_code: 200, content_type: "application/json", contents: profile_response_body, }, - { - uri: "https://plc.directory/#{user.did}/log/audit", - status_code: 200, - content_type: "application/json", - caused_by_entry_idx: 0, - contents: [ - { - did: "did:plc:le66o7kn5k4iqkxbih7gi4w2", - operation: { - sig: "signature", - prev: nil, - type: "plc_operation", - services: { - atproto_pds: { - type: "AtprotoPersonalDataServer", - endpoint: "https://bsky.social", - }, - }, - alsoKnownAs: ["at://handle.bsky.social"], - rotationKeys: ["rotation_key"], - verificationMethods: { - atproto: "verification_method", - }, - }, - cid: "cid", - nullified: false, - createdAt: "2023-07-03T05:08:27.780Z", - }, - ].to_json, - }, ] end @@ -240,16 +168,109 @@ RSpec.describe Domain::Bluesky::Job::ScanUserJob do end it "creates a new avatar and enqueues job" do + # Verify initial state + expect(user.avatars.count).to eq(1) + expect(user.avatar).to eq(existing_avatar) + perform_now({ user: user }) - new_avatar = user.reload.avatar + + user.reload existing_avatar.reload + + # Should have 2 avatars now - old one + new one + expect(user.avatars.count).to eq(2) + + # The current avatar should be the new one (most recent) + new_avatar = user.avatar + expect(new_avatar).to_not eq(existing_avatar) + expect(new_avatar.url_str).to eq( + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiavatar123", + ) + expect(new_avatar.state).to eq("pending") + + # Old avatar should remain unchanged expect(existing_avatar.url_str).to eq( "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=oldavatar456", ) - expect(new_avatar.state).to eq("pending") - expect(new_avatar).to_not eq(existing_avatar) + expect(existing_avatar.state).to eq("ok") - # Should enqueue avatar job with the existing avatar + # Should enqueue avatar job for the new avatar + enqueued_jobs = SpecUtil.enqueued_job_args(Domain::UserAvatarJob) + expect(enqueued_jobs).to contain_exactly( + hash_including(avatar: new_avatar), + ) + end + end + + context "when user has existing avatar and profile has no avatar" do + let!(:existing_avatar) do + user.create_avatar!( + url_str: + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=oldavatar456", + state: "ok", + ) + end + + let(:profile_response_body) do + { + "did" => user.did, + "handle" => "testuser.bsky.social", + "displayName" => "Test User", + "description" => "A test user profile", + "createdAt" => "2023-07-03T05:08:27.780Z", + }.to_json + end + + it "does not create new avatar or enqueue job" do + expect(user.avatars.count).to eq(1) + + perform_now({ user: user }) + + user.reload + expect(user.avatars.count).to eq(1) + expect(user.avatar).to eq(existing_avatar) + + # Should not enqueue any avatar job + enqueued_jobs = SpecUtil.enqueued_job_args(Domain::UserAvatarJob) + expect(enqueued_jobs).to be_empty + end + end + + context "when user has multiple existing avatars" do + let!(:first_avatar) do + user.avatars.create!( + url_str: + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=firstavatar123", + state: "ok", + created_at: 2.days.ago, + ) + end + + let!(:second_avatar) do + user.avatars.create!( + url_str: + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=secondavatar456", + state: "ok", + created_at: 1.day.ago, + ) + end + + it "compares against the current (most recent) avatar" do + expect(user.avatar).to eq(second_avatar) + expect(user.avatars.count).to eq(2) + + perform_now({ user: user }) + + user.reload + + # Should create a third avatar since URL is different from current + expect(user.avatars.count).to eq(3) + new_avatar = user.avatar + expect(new_avatar.url_str).to eq( + "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=#{user.did}&cid=bafkreiavatar123", + ) + + # Should enqueue job for new avatar enqueued_jobs = SpecUtil.enqueued_job_args(Domain::UserAvatarJob) expect(enqueued_jobs).to contain_exactly( hash_including(avatar: new_avatar),