add taggings to e621 posts

This commit is contained in:
Dylan Knutson
2023-08-22 17:42:11 -07:00
parent d358cdbd7f
commit ea5a2a7d6c
7 changed files with 137 additions and 23 deletions

View File

@@ -49,7 +49,7 @@ task :periodic_tasks => [:environment, :set_logger_stdout] do
Rake::Task["fa:browse_page_job"].execute
Rake::Task["fa:home_page_job"].execute
# Rake::Task["e621:posts_index_job"].execute
puts "emitted browse page and home page job"
puts "enqueue periodic jobs"
sleep 1.minute
end
end

View File

@@ -22,12 +22,44 @@ module Domain::E621::Job
e621_id_to_post = Domain::E621::Post.where(
e621_id: json["posts"].map { |post_json| post_json["id"] },
).index_by(&:e621_id)
).includes(taggings: :tag).index_by(&:e621_id)
@num_updated = 0
@num_created = 0
@num_seen = 0
all_tags = Set.new json["posts"].map { |post_json|
tag_and_cat_for_json(post_json)
}.flatten
all_tag_names = all_tags.map(&:name)
existing_tags = Domain::E621::Tag.where(name: all_tag_names)
@name_to_tag_id = existing_tags.map { |tag| [tag.name, tag.id] }.to_h
e621_id_to_post.each do |e621_id, post|
post.tags.each do |tag|
@name_to_tag_id[tag.name] = tag.id
end
end
missing_tags = all_tag_names - existing_tags.map(&:name)
if missing_tags.any?
logger.info("creating #{missing_tags.size.to_s.bold} missing tags")
upsert_hashes = missing_tags.map do |name|
{ name: name }
end
Domain::E621::Tag.upsert_all(
upsert_hashes,
unique_by: :name,
update_only: :name,
returning: %i[id name],
).each do |row|
@name_to_tag_id[row["name"]] = row["id"]
end
end
json["posts"].each do |post_json|
@num_seen += 1
e621_id = post_json["id"]
post = e621_id_to_post[e621_id] || begin
@num_created += 1
@@ -40,7 +72,7 @@ module Domain::E621::Job
end
logger.prefix = nil
logger.info("#{@num_updated} updated, #{@num_created} created")
logger.info("#{@num_updated} updated, #{@num_created} created, #{@num_seen} seen")
end
private
@@ -78,13 +110,59 @@ module Domain::E621::Job
post.tags_array = post_json["tags"]
post.artists_array = post_json["tags"]["artist"]
is_new = post.new_record?
post.save!
defer_job(Domain::E621::Job::StaticFileJob, {
post: post,
caused_by_entry: @log_entry,
}) if is_new
truth_tag_names = tag_and_cat_for_json(post_json)
existing_tag_names = tag_and_cat_for_model(post)
to_add = truth_tag_names - existing_tag_names
to_remove = existing_tag_names - truth_tag_names
Domain::E621::Post.transaction do
is_new = post.new_record?
post.save!
if to_remove.any?
to_remove_ids = to_remove.
map(&:name).
map { |name| @name_to_tag_id[name] }
post.
taggings.
where(tag_id: to_remove_ids).
delete_all
end
post.taggings.insert_all!(to_add.map do |tag_and_cat|
id = @name_to_tag_id[tag_and_cat.name]
{ tag_id: id, category: tag_and_cat.category }
end) if to_add.any?
defer_job(Domain::E621::Job::StaticFileJob, {
post: post,
caused_by_entry: @log_entry,
}) if is_new
end
true
end
def tag_names_to_ids(names)
names.map { |name| @name_to_tag_id[name] }
end
TAG_CATEGORIES = %w[general species character copyright artist lore meta]
TagAndCategory = Struct.new(:name, :category)
def tag_and_cat_for_model(model)
model.taggings.map do |tagging|
TagAndCategory.new(tagging.tag.name, tagging.category)
end
end
def tag_and_cat_for_json(post_json)
TAG_CATEGORIES.map do |tc|
post_json["tags"][tc].map do |name|
TagAndCategory.new(name, "cat_#{tc}")
end
end.flatten
end
end
end

View File

@@ -5,15 +5,15 @@ class Domain::E621::Tagging < ReduxApplicationRecord
belongs_to :tag,
class_name: "Domain::E621::Tag"
enum type: [
:type_general,
:type_artist,
:type_copyright,
:type_character,
:type_species,
:type_invalid,
:type_meta,
:type_lore,
enum category: [
:cat_general,
:cat_artist,
:cat_copyright,
:cat_character,
:cat_species,
:cat_invalid,
:cat_meta,
:cat_lore,
]
validates_inclusion_of(:type, in: self.types.keys)
validates_inclusion_of(:category, in: self.categories.keys)
end

View File

@@ -0,0 +1,6 @@
class RemoveE621IdFromTags < ActiveRecord::Migration[7.0]
def change
remove_index :domain_e621_tags, :e621_id, unique: true
remove_column :domain_e621_tags, :e621_id, :integer
end
end

View File

@@ -0,0 +1,5 @@
class RenameE621TaggingType < ActiveRecord::Migration[7.0]
def change
rename_column :domain_e621_taggings, :type, :category
end
end

6
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
ActiveRecord::Schema[7.0].define(version: 2023_08_23_002507) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "pg_trgm"
@@ -736,7 +736,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
end
create_table "domain_e621_taggings", force: :cascade do |t|
t.integer "type"
t.integer "category"
t.bigint "post_id"
t.bigint "tag_id"
t.index ["post_id"], name: "index_domain_e621_taggings_on_post_id"
@@ -745,12 +745,10 @@ ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
create_table "domain_e621_tags", force: :cascade do |t|
t.string "name", null: false
t.integer "e621_id"
t.integer "type"
t.string "description"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["e621_id"], name: "index_domain_e621_tags_on_e621_id", unique: true
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
end

View File

@@ -37,5 +37,32 @@ describe Domain::E621::Job::PostsIndexJob do
"https://static1.e621.net/data/1c/61/1c6169aa51668681e9697a48144d7c78.jpg"
)
end
it "fixes tags to reflect reality" do
post = Domain::E621::Post.create!({
e621_id: 4247443,
md5: "1c6169aa51668681e9697a48144d7c78",
})
tag1 = Domain::E621::Tag.create!(name: "tag1")
tag2 = Domain::E621::Tag.create!(name: "mammal")
post.taggings.create!(tag: tag1, category: "cat_general")
post.taggings.create!(tag: tag2, category: "cat_general")
post.save!
perform_now({})
post.reload
# removes the tag1 tag
expect(post.tags.map(&:name)).not_to include("tag1")
# keeps tags that are in the json
expect(post.tags.map(&:name)).to include("absurd_res")
expect(post.tags.map(&:name)).to include("mammal")
# changes existing mammal/general tag to mammal/species
expect(post.taggings.find { |tagging|
tagging.tag.name == "mammal"
}.category).to eq("cat_species")
end
end
end