Files
redux-scraper/lib/tasks/stats.rake
2025-07-12 08:53:49 +00:00

104 lines
3.6 KiB
Ruby

# typed: strict
# frozen_string_literal: true
T.bind(self, T.all(Rake::DSL, Object))
require "unicode_plot"
require "rumale"
require "rumale/linear_model/linear_regression"
require "rumale/preprocessing/polynomial_features"
require "rumale/pipeline/pipeline"
namespace :stats do
desc "Generate graphs of FaUserPostFav models with linear, quadratic, logarithmic, and square root regression lines. Usage: rake stats:fa_fav_graph[max_points]"
task :fa_fav_graph, [:max_points] => :environment do |task, args|
puts "🔍 Analyzing FaUserPostFav data..."
# Parse max_points parameter (default to no limit)
max_points = args[:max_points]&.to_i
# Query and sample data
records_array = Stats::Helpers.sample_records(max_points)
# Create base normalizer for display ranges
base_normalizer = Stats::LinearNormalizer.new(records_array)
puts "📈 X-axis range (fav_id): #{base_normalizer.x_range}"
puts "📈 Y-axis range (explicit_time): #{base_normalizer.y_range}"
# Split data for plotting
split = Stats::Helpers.split_train_test(records_array)
train_normalizer = Stats::LinearNormalizer.new(split.training_records)
eval_normalizer = Stats::LinearNormalizer.new(split.evaluation_records)
# Run regressions using specialized normalizers
regressions = Stats::RegressionAnalyzer.new(records_array).analyze
# Display results (automatically denormalized)
regressions.each do |model_type, result|
puts "\n📊 #{model_type.serialize.humanize} Regression Results:"
puts " #{result.equation_string}"
puts " #{result.score_summary}"
end
# Generate visualizations
puts "\n🎨 Generating visualizations with UnicodePlot..."
plotter = Stats::Plotter.new
plotter.plot_train_eval_scatter(
"Original Data (Train/Eval)",
train_normalizer.x_values,
train_normalizer.y_values,
eval_normalizer.x_values,
eval_normalizer.y_values,
)
# Plot individual regression results
regressions.each do |model_type, result|
plotter.plot_regression(
"#{model_type.serialize.humanize} Regression",
result,
)
end
plotter.plot_combined(
base_normalizer.x_values,
base_normalizer.y_values,
regressions,
)
puts "\n✅ Graph generation completed!"
# remove old regressions for this model
model_name = "fa_fav_id_and_date"
TrainedRegressionModel.where(name: model_name).destroy_all
# Save each regression model to the database
regressions.each do |model_type, result|
equation = result.equation
TrainedRegressionModel.create!(
name: model_name,
model_type: model_type.serialize,
description:
"Trained on FaUserPostFav with #{model_type.serialize} regression.",
total_records_count: records_array.size,
training_records_count: split.training_records.size,
evaluation_records_count: split.evaluation_records.size,
train_test_split_ratio: 0.8, # hardcoded, see split_train_test default
random_seed: 42, # hardcoded, see split_train_test
max_points_limit: max_points,
x_min: equation.x.min,
x_max: equation.x.max,
y_min: equation.y.min,
y_max: equation.y.max,
coefficients: equation.coefficients,
training_r_squared: result.training_r_squared,
evaluation_r_squared: result.evaluation_r_squared,
equation_string: result.equation_string,
metadata: {
x_range: equation.x.range,
y_range: equation.y.range,
},
)
puts "💾 Saved #{model_type.serialize.humanize} regression model to DB."
end
end
end