add taggings to e621 posts
This commit is contained in:
2
Rakefile
2
Rakefile
@@ -49,7 +49,7 @@ task :periodic_tasks => [:environment, :set_logger_stdout] do
|
||||
Rake::Task["fa:browse_page_job"].execute
|
||||
Rake::Task["fa:home_page_job"].execute
|
||||
# Rake::Task["e621:posts_index_job"].execute
|
||||
puts "emitted browse page and home page job"
|
||||
puts "enqueue periodic jobs"
|
||||
sleep 1.minute
|
||||
end
|
||||
end
|
||||
|
||||
@@ -22,12 +22,44 @@ module Domain::E621::Job
|
||||
|
||||
e621_id_to_post = Domain::E621::Post.where(
|
||||
e621_id: json["posts"].map { |post_json| post_json["id"] },
|
||||
).index_by(&:e621_id)
|
||||
).includes(taggings: :tag).index_by(&:e621_id)
|
||||
|
||||
@num_updated = 0
|
||||
@num_created = 0
|
||||
@num_seen = 0
|
||||
|
||||
all_tags = Set.new json["posts"].map { |post_json|
|
||||
tag_and_cat_for_json(post_json)
|
||||
}.flatten
|
||||
all_tag_names = all_tags.map(&:name)
|
||||
|
||||
existing_tags = Domain::E621::Tag.where(name: all_tag_names)
|
||||
@name_to_tag_id = existing_tags.map { |tag| [tag.name, tag.id] }.to_h
|
||||
|
||||
e621_id_to_post.each do |e621_id, post|
|
||||
post.tags.each do |tag|
|
||||
@name_to_tag_id[tag.name] = tag.id
|
||||
end
|
||||
end
|
||||
|
||||
missing_tags = all_tag_names - existing_tags.map(&:name)
|
||||
if missing_tags.any?
|
||||
logger.info("creating #{missing_tags.size.to_s.bold} missing tags")
|
||||
upsert_hashes = missing_tags.map do |name|
|
||||
{ name: name }
|
||||
end
|
||||
Domain::E621::Tag.upsert_all(
|
||||
upsert_hashes,
|
||||
unique_by: :name,
|
||||
update_only: :name,
|
||||
returning: %i[id name],
|
||||
).each do |row|
|
||||
@name_to_tag_id[row["name"]] = row["id"]
|
||||
end
|
||||
end
|
||||
|
||||
json["posts"].each do |post_json|
|
||||
@num_seen += 1
|
||||
e621_id = post_json["id"]
|
||||
post = e621_id_to_post[e621_id] || begin
|
||||
@num_created += 1
|
||||
@@ -40,7 +72,7 @@ module Domain::E621::Job
|
||||
end
|
||||
|
||||
logger.prefix = nil
|
||||
logger.info("#{@num_updated} updated, #{@num_created} created")
|
||||
logger.info("#{@num_updated} updated, #{@num_created} created, #{@num_seen} seen")
|
||||
end
|
||||
|
||||
private
|
||||
@@ -78,13 +110,59 @@ module Domain::E621::Job
|
||||
post.tags_array = post_json["tags"]
|
||||
post.artists_array = post_json["tags"]["artist"]
|
||||
|
||||
is_new = post.new_record?
|
||||
post.save!
|
||||
defer_job(Domain::E621::Job::StaticFileJob, {
|
||||
post: post,
|
||||
caused_by_entry: @log_entry,
|
||||
}) if is_new
|
||||
truth_tag_names = tag_and_cat_for_json(post_json)
|
||||
existing_tag_names = tag_and_cat_for_model(post)
|
||||
to_add = truth_tag_names - existing_tag_names
|
||||
to_remove = existing_tag_names - truth_tag_names
|
||||
|
||||
Domain::E621::Post.transaction do
|
||||
is_new = post.new_record?
|
||||
post.save!
|
||||
|
||||
if to_remove.any?
|
||||
to_remove_ids = to_remove.
|
||||
map(&:name).
|
||||
map { |name| @name_to_tag_id[name] }
|
||||
|
||||
post.
|
||||
taggings.
|
||||
where(tag_id: to_remove_ids).
|
||||
delete_all
|
||||
end
|
||||
|
||||
post.taggings.insert_all!(to_add.map do |tag_and_cat|
|
||||
id = @name_to_tag_id[tag_and_cat.name]
|
||||
{ tag_id: id, category: tag_and_cat.category }
|
||||
end) if to_add.any?
|
||||
|
||||
defer_job(Domain::E621::Job::StaticFileJob, {
|
||||
post: post,
|
||||
caused_by_entry: @log_entry,
|
||||
}) if is_new
|
||||
end
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def tag_names_to_ids(names)
|
||||
names.map { |name| @name_to_tag_id[name] }
|
||||
end
|
||||
|
||||
TAG_CATEGORIES = %w[general species character copyright artist lore meta]
|
||||
TagAndCategory = Struct.new(:name, :category)
|
||||
|
||||
def tag_and_cat_for_model(model)
|
||||
model.taggings.map do |tagging|
|
||||
TagAndCategory.new(tagging.tag.name, tagging.category)
|
||||
end
|
||||
end
|
||||
|
||||
def tag_and_cat_for_json(post_json)
|
||||
TAG_CATEGORIES.map do |tc|
|
||||
post_json["tags"][tc].map do |name|
|
||||
TagAndCategory.new(name, "cat_#{tc}")
|
||||
end
|
||||
end.flatten
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -5,15 +5,15 @@ class Domain::E621::Tagging < ReduxApplicationRecord
|
||||
belongs_to :tag,
|
||||
class_name: "Domain::E621::Tag"
|
||||
|
||||
enum type: [
|
||||
:type_general,
|
||||
:type_artist,
|
||||
:type_copyright,
|
||||
:type_character,
|
||||
:type_species,
|
||||
:type_invalid,
|
||||
:type_meta,
|
||||
:type_lore,
|
||||
enum category: [
|
||||
:cat_general,
|
||||
:cat_artist,
|
||||
:cat_copyright,
|
||||
:cat_character,
|
||||
:cat_species,
|
||||
:cat_invalid,
|
||||
:cat_meta,
|
||||
:cat_lore,
|
||||
]
|
||||
validates_inclusion_of(:type, in: self.types.keys)
|
||||
validates_inclusion_of(:category, in: self.categories.keys)
|
||||
end
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
class RemoveE621IdFromTags < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
remove_index :domain_e621_tags, :e621_id, unique: true
|
||||
remove_column :domain_e621_tags, :e621_id, :integer
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,5 @@
|
||||
class RenameE621TaggingType < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
rename_column :domain_e621_taggings, :type, :category
|
||||
end
|
||||
end
|
||||
6
db/schema.rb
generated
6
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
|
||||
ActiveRecord::Schema[7.0].define(version: 2023_08_23_002507) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pg_stat_statements"
|
||||
enable_extension "pg_trgm"
|
||||
@@ -736,7 +736,7 @@ ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
|
||||
end
|
||||
|
||||
create_table "domain_e621_taggings", force: :cascade do |t|
|
||||
t.integer "type"
|
||||
t.integer "category"
|
||||
t.bigint "post_id"
|
||||
t.bigint "tag_id"
|
||||
t.index ["post_id"], name: "index_domain_e621_taggings_on_post_id"
|
||||
@@ -745,12 +745,10 @@ ActiveRecord::Schema[7.0].define(version: 2023_08_19_011649) do
|
||||
|
||||
create_table "domain_e621_tags", force: :cascade do |t|
|
||||
t.string "name", null: false
|
||||
t.integer "e621_id"
|
||||
t.integer "type"
|
||||
t.string "description"
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.index ["e621_id"], name: "index_domain_e621_tags_on_e621_id", unique: true
|
||||
t.index ["name"], name: "index_domain_e621_tags_on_name", unique: true
|
||||
end
|
||||
|
||||
|
||||
@@ -37,5 +37,32 @@ describe Domain::E621::Job::PostsIndexJob do
|
||||
"https://static1.e621.net/data/1c/61/1c6169aa51668681e9697a48144d7c78.jpg"
|
||||
)
|
||||
end
|
||||
|
||||
it "fixes tags to reflect reality" do
|
||||
post = Domain::E621::Post.create!({
|
||||
e621_id: 4247443,
|
||||
md5: "1c6169aa51668681e9697a48144d7c78",
|
||||
})
|
||||
tag1 = Domain::E621::Tag.create!(name: "tag1")
|
||||
tag2 = Domain::E621::Tag.create!(name: "mammal")
|
||||
post.taggings.create!(tag: tag1, category: "cat_general")
|
||||
post.taggings.create!(tag: tag2, category: "cat_general")
|
||||
post.save!
|
||||
|
||||
perform_now({})
|
||||
|
||||
post.reload
|
||||
# removes the tag1 tag
|
||||
expect(post.tags.map(&:name)).not_to include("tag1")
|
||||
|
||||
# keeps tags that are in the json
|
||||
expect(post.tags.map(&:name)).to include("absurd_res")
|
||||
expect(post.tags.map(&:name)).to include("mammal")
|
||||
|
||||
# changes existing mammal/general tag to mammal/species
|
||||
expect(post.taggings.find { |tagging|
|
||||
tagging.tag.name == "mammal"
|
||||
}.category).to eq("cat_species")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user