parallel blob insert

This commit is contained in:
2023-02-03 20:35:31 +00:00
parent a4062d8eb4
commit c5c77b885b
4 changed files with 16 additions and 8 deletions

View File

@@ -249,7 +249,12 @@ class LegacyImport::HttpLogEntryBulkImporter
# bulk-insert all the new blob entries
timings.start :insert_new_bes
BlobEntry.insert_all!(blob_entries_to_insert.map(&:to_bulk_insert_hash)) if blob_entries_to_insert.any?
slice_size = [(blob_entries_to_insert.size.to_f / @fork_amount).ceil, 1].max
blob_entries_to_insert.each_slice(slice_size).map do |slice|
ForkFuture.new do
BlobEntry.insert_all!(slice.map(&:to_bulk_insert_hash)) if slice.any?
end
end.to_a.map(&:join) if blob_entries_to_insert.any?
insert_stats.blob_entries_inserted += blob_entries_to_insert.size
insert_stats.bytes_length += blob_entries_to_insert.map(&:contents).map(&:size).sum
insert_stats.bytes_stored += blob_entries_to_insert.map(&:bytes_stored).sum
@@ -265,7 +270,7 @@ class LegacyImport::HttpLogEntryBulkImporter
timings.start :build_new_headers
header_sha256_to_header_model = {}
legacy_model_id_to_header_sha256s =
ForkFuture.parallel_map(@fork_amount, legacy_models) do |legacy_model|
ForkFuture.parallel_map(@fork_amount / 2, legacy_models) do |legacy_model|
req_headers = ::HttpLogEntryHeader.build_record(headers: legacy_model.req_headers)
res_headers = ::HttpLogEntryHeader.build_record(headers: legacy_model.res_headers)
[legacy_model.id, {

View File

@@ -40,10 +40,5 @@ class CreateHttpLogEntries < ActiveRecord::Migration[7.0]
t.index :sha256, unique: true
end
add_foreign_key :http_log_entries, :blob_entries, column: :response_sha256, primary_key: :sha256
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :request_headers_id, primary_key: :id
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :response_headers_id, primary_key: :id
add_foreign_key :blob_entries, :blob_entries, column: :base_sha256, primary_key: :sha256
end
end

View File

@@ -0,0 +1,8 @@
class AddHttpLogForeignKeyConstraints < ActiveRecord::Migration[7.0]
def change
add_foreign_key :http_log_entries, :blob_entries, column: :response_sha256, primary_key: :sha256, validate: true
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :request_headers_id, primary_key: :id, validate: true
add_foreign_key :http_log_entries, :http_log_entry_headers, column: :response_headers_id, primary_key: :id, validate: true
add_foreign_key :blob_entries, :blob_entries, column: :base_sha256, primary_key: :sha256, validate: true
end
end

2
db/schema.rb generated
View File

@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2023_01_31_012417) do
ActiveRecord::Schema[7.0].define(version: 2023_02_03_203205) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_stat_statements"
enable_extension "plpgsql"