104 lines
3.6 KiB
Ruby
104 lines
3.6 KiB
Ruby
# typed: strict
|
|
# frozen_string_literal: true
|
|
T.bind(self, T.all(Rake::DSL, Object))
|
|
|
|
require "unicode_plot"
|
|
require "rumale"
|
|
require "rumale/linear_model/linear_regression"
|
|
require "rumale/preprocessing/polynomial_features"
|
|
require "rumale/pipeline/pipeline"
|
|
|
|
namespace :stats do
|
|
desc "Generate graphs of FaUserPostFav models with linear, quadratic, logarithmic, and square root regression lines. Usage: rake stats:fa_fav_graph[max_points]"
|
|
task :fa_fav_graph, [:max_points] => :environment do |task, args|
|
|
puts "🔍 Analyzing FaUserPostFav data..."
|
|
|
|
# Parse max_points parameter (default to no limit)
|
|
max_points = args[:max_points]&.to_i
|
|
|
|
# Query and sample data
|
|
records_array = Stats::Helpers.sample_records(max_points)
|
|
|
|
# Create base normalizer for display ranges
|
|
base_normalizer = Stats::LinearNormalizer.new(records_array)
|
|
puts "📈 X-axis range (fav_id): #{base_normalizer.x_range}"
|
|
puts "📈 Y-axis range (explicit_time): #{base_normalizer.y_range}"
|
|
|
|
# Split data for plotting
|
|
split = Stats::Helpers.split_train_test(records_array)
|
|
train_normalizer = Stats::LinearNormalizer.new(split.training_records)
|
|
eval_normalizer = Stats::LinearNormalizer.new(split.evaluation_records)
|
|
|
|
# Run regressions using specialized normalizers
|
|
regressions = Stats::RegressionAnalyzer.new(records_array).analyze
|
|
|
|
# Display results (automatically denormalized)
|
|
regressions.each do |model_type, result|
|
|
puts "\n📊 #{model_type.serialize.humanize} Regression Results:"
|
|
puts " #{result.equation_string}"
|
|
puts " #{result.score_summary}"
|
|
end
|
|
|
|
# Generate visualizations
|
|
puts "\n🎨 Generating visualizations with UnicodePlot..."
|
|
plotter = Stats::Plotter.new
|
|
|
|
plotter.plot_train_eval_scatter(
|
|
"Original Data (Train/Eval)",
|
|
train_normalizer.x_values,
|
|
train_normalizer.y_values,
|
|
eval_normalizer.x_values,
|
|
eval_normalizer.y_values,
|
|
)
|
|
|
|
# Plot individual regression results
|
|
regressions.each do |model_type, result|
|
|
plotter.plot_regression(
|
|
"#{model_type.serialize.humanize} Regression",
|
|
result,
|
|
)
|
|
end
|
|
plotter.plot_combined(
|
|
base_normalizer.x_values,
|
|
base_normalizer.y_values,
|
|
regressions,
|
|
)
|
|
|
|
puts "\n✅ Graph generation completed!"
|
|
|
|
# remove old regressions for this model
|
|
model_name = "fa_fav_id_and_date"
|
|
TrainedRegressionModel.where(name: model_name).destroy_all
|
|
|
|
# Save each regression model to the database
|
|
regressions.each do |model_type, result|
|
|
equation = result.equation
|
|
TrainedRegressionModel.create!(
|
|
name: model_name,
|
|
model_type: model_type.serialize,
|
|
description:
|
|
"Trained on FaUserPostFav with #{model_type.serialize} regression.",
|
|
total_records_count: records_array.size,
|
|
training_records_count: split.training_records.size,
|
|
evaluation_records_count: split.evaluation_records.size,
|
|
train_test_split_ratio: 0.8, # hardcoded, see split_train_test default
|
|
random_seed: 42, # hardcoded, see split_train_test
|
|
max_points_limit: max_points,
|
|
x_min: equation.x.min,
|
|
x_max: equation.x.max,
|
|
y_min: equation.y.min,
|
|
y_max: equation.y.max,
|
|
coefficients: equation.coefficients,
|
|
training_r_squared: result.training_r_squared,
|
|
evaluation_r_squared: result.evaluation_r_squared,
|
|
equation_string: result.equation_string,
|
|
metadata: {
|
|
x_range: equation.x.range,
|
|
y_range: equation.y.range,
|
|
},
|
|
)
|
|
puts "💾 Saved #{model_type.serialize.humanize} regression model to DB."
|
|
end
|
|
end
|
|
end
|