extract bsky posts/users from e621

This commit is contained in:
Dylan Knutson
2025-08-16 18:59:21 +00:00
parent 0fd4d13673
commit d06347a771
3 changed files with 101 additions and 1105 deletions

View File

@@ -11,6 +11,20 @@ class Domain::Bluesky::MonitoredObject < ReduxApplicationRecord
after_create_commit :notify_monitor_added
after_destroy_commit :notify_monitor_removed
sig do
params(user: Domain::User::BlueskyUser).returns(
Domain::Bluesky::MonitoredObject,
)
end
def self.build_for_user(user)
build(value: user.did!, kind: :user_did)
end
sig { params(hashtag: String).returns(Domain::Bluesky::MonitoredObject) }
def self.build_for_hashtag(hashtag)
build(value: hashtag, kind: :hashtag)
end
sig { void }
def notify_monitor_added
self.class.connection.execute(

1084
dids.txt

File diff suppressed because it is too large Load Diff

View File

@@ -15,14 +15,22 @@ module BlueskyRakeHelpers
DIDKit::Resolver.new.resolve_did(did).get_validated_handle
end
sig { returns(T.nilable(Domain::User::BlueskyUser)) }
def self.user_from_env
if (handle = ENV["handle"])
did = self.resolve_did(handle)
Domain::User::BlueskyUser.find_or_create_by!(did:) do |user|
user.handle = handle
end
elsif (did = ENV["did"])
sig do
params(handle: T.nilable(String), did: T.nilable(String)).returns(
T.nilable(Domain::User::BlueskyUser),
)
end
def self.user_from_env(handle: nil, did: nil)
if handle
Domain::User::BlueskyUser.find_by(handle:) ||
begin
did = self.resolve_did(handle)
return nil if did.blank?
Domain::User::BlueskyUser.find_or_create_by!(did:) do |user|
user.handle = handle
end
end
elsif did
Domain::User::BlueskyUser.find_or_create_by!(did:) do |user|
user.handle = resolve_handle(did)
end
@@ -42,17 +50,65 @@ namespace :bluesky do
# Remove # if provided
hashtag = hashtag.gsub(/^#/, "")
Domain::Bluesky::MonitoredObject.create!(value: hashtag, kind: :hashtag)
Domain::Bluesky::MonitoredObject.build_for_hashtag(hashtag).save!
puts "Added hashtag: ##{hashtag}"
elsif (user = BlueskyRakeHelpers.user_from_env)
elsif ENV["handle"] || ENV["did"]
user =
BlueskyRakeHelpers.user_from_env(handle: ENV["handle"], did: ENV["did"])
if user.nil?
puts "user not found"
next
end
Domain::Bluesky::Job::ScanUserJob.perform_later(user:)
Domain::Bluesky::Job::ScanPostsJob.perform_later(user:)
Domain::Bluesky::MonitoredObject.create!(value: user.did, kind: :user_did)
Domain::Bluesky::MonitoredObject.build_for_user(user).save!
else
raise "hashtag, handle, or did is required"
end
end
desc "Add DID or user handle, bulk from stdin"
task add_bulk: :environment do
added = 0
file = (ENV["file"] ? File.open(T.must(ENV["file"]), "r") : STDIN)
while line = file.gets
begin
line = line.strip.chomp
next if line.blank?
user =
if line.starts_with?("did:")
BlueskyRakeHelpers.user_from_env(did: line)
else
BlueskyRakeHelpers.user_from_env(handle: line)
end
if user
unless user.scanned_profile_at.present?
Domain::Bluesky::Job::ScanUserJob.perform_later(user:)
puts "enqueued profile scan: #{user.did} / #{user.handle}"
end
unless user.scanned_posts_at.present?
Domain::Bluesky::Job::ScanPostsJob.perform_later(user:)
puts "enqueued posts scan: #{user.did} / #{user.handle}"
end
obj = Domain::Bluesky::MonitoredObject.build_for_user(user)
if obj.save
puts "added #{user.did} / #{user.handle} to monitor"
added += 1
end
else
puts "user not found: #{line}"
end
rescue StandardError => e
puts "error: #{e.message}"
puts "line: #{line}"
raise e
end
end
puts "added #{added} users to monitor"
end
desc "Remove a DID or hashtag from the Bluesky monitor"
task remove: :environment do
if (hashtag = ENV["hashtag"])
@@ -88,20 +144,30 @@ namespace :bluesky do
desc "Watch users that user follows"
task watch_follows: :environment do
user =
BlueskyRakeHelpers.user_from_env ||
raise("user is required, use `handle` or `did`")
BlueskyRakeHelpers.user_from_env(
handle: ENV["handle"],
did: ENV["did"],
) || raise("user is required, use `handle` or `did`")
Domain::Bluesky::Job::ScanUserFollowsJob.perform_now(user:)
user.reload
user.user_user_follows_from.each do |follow|
to_user = T.cast(follow.to, Domain::User::BlueskyUser)
model =
Domain::Bluesky::MonitoredObject.create(
value: to_user.did!,
kind: :user_did,
)
if model.persisted?
puts "added #{to_user.did} / #{to_user.handle} to monitor"
end
model = Domain::Bluesky::MonitoredObject.build_for_user(to_user)
puts "added #{to_user.did} / #{to_user.handle} to monitor" if model.save
end
end
desc "Extract bluesky post rkeys / user dids from E621 sources"
task extract_e621_bluesky_posts_and_users: :environment do
sources = Set.new
Domain::Post::E621Post
.where("sources_array::text LIKE '%https://bsky.app%'")
.find_each(batch_size: 32) do |post|
post.sources_array.each do |source|
if source.include?("https://bsky.app") && sources.add?(source)
puts source
end
end
end
end
end