sqrt regression
This commit is contained in:
@@ -9,7 +9,7 @@ require "rumale/preprocessing/polynomial_features"
|
||||
require "rumale/pipeline/pipeline"
|
||||
|
||||
namespace :stats do
|
||||
desc "Generate graphs of FaFavIdAndDate models with linear and quadratic regression lines. Usage: rake stats:fa_fav_graph[max_points]"
|
||||
desc "Generate graphs of FaFavIdAndDate models with linear, quadratic, logarithmic, and square root regression lines. Usage: rake stats:fa_fav_graph[max_points]"
|
||||
task :fa_fav_graph, [:max_points] => :environment do |task, args|
|
||||
puts "🔍 Analyzing FaFavIdAndDate data..."
|
||||
|
||||
@@ -19,23 +19,29 @@ namespace :stats do
|
||||
# Query and sample data
|
||||
records_array = StatsHelpers.sample_records(max_points)
|
||||
|
||||
# Create normalizer with raw data
|
||||
normalizer = DataNormalizer.new(records_array)
|
||||
# Create base normalizer for display ranges
|
||||
base_normalizer = DataNormalizer.new(records_array)
|
||||
|
||||
puts "📈 X-axis range (fav_fa_id): #{normalizer.x_range}"
|
||||
puts "📈 Y-axis range (date): #{normalizer.y_range}"
|
||||
puts "📈 X-axis range (fav_fa_id): #{base_normalizer.x_range}"
|
||||
puts "📈 Y-axis range (date): #{base_normalizer.y_range}"
|
||||
|
||||
# Run regressions using normalized data
|
||||
results = RegressionAnalyzer.new(normalizer).analyze
|
||||
# Run regressions using specialized normalizers
|
||||
results = RegressionAnalyzer.new(records_array).analyze
|
||||
|
||||
# Define regression types for reuse across display and plotting
|
||||
regressions = [
|
||||
["Linear", results.linear],
|
||||
["Quadratic", results.quadratic],
|
||||
["Logarithmic", results.logarithmic],
|
||||
["Square Root", results.square_root],
|
||||
]
|
||||
|
||||
# Display results (automatically denormalized)
|
||||
puts "\n📊 Linear Regression Results:"
|
||||
puts " #{results.linear.equation}"
|
||||
puts " R² = #{StatsHelpers.format_r_squared(results.linear.r_squared)}"
|
||||
|
||||
puts "\n📊 Quadratic Regression Results:"
|
||||
puts " #{results.quadratic.equation}"
|
||||
puts " R² = #{StatsHelpers.format_r_squared(results.quadratic.r_squared)}"
|
||||
regressions.each do |name, result|
|
||||
puts "\n📊 #{name} Regression Results:"
|
||||
puts " #{result.equation_string}"
|
||||
puts " R² = #{StatsHelpers.format_r_squared(result.r_squared)}"
|
||||
end
|
||||
|
||||
# Generate visualizations
|
||||
puts "\n🎨 Generating visualizations with UnicodePlot..."
|
||||
@@ -43,12 +49,19 @@ namespace :stats do
|
||||
|
||||
plotter.plot_scatter(
|
||||
"Original Data",
|
||||
normalizer.x_values,
|
||||
normalizer.y_values,
|
||||
base_normalizer.x_values,
|
||||
base_normalizer.y_values,
|
||||
)
|
||||
|
||||
# Plot individual regression results
|
||||
regressions.each do |name, result|
|
||||
plotter.plot_regression("#{name} Regression", result)
|
||||
end
|
||||
plotter.plot_combined(
|
||||
base_normalizer.x_values,
|
||||
base_normalizer.y_values,
|
||||
regressions,
|
||||
)
|
||||
plotter.plot_regression("Linear Regression", results.linear)
|
||||
plotter.plot_regression("Quadratic Regression", results.quadratic)
|
||||
plotter.plot_combined(normalizer.x_values, normalizer.y_values, results)
|
||||
|
||||
puts "\n✅ Graph generation completed!"
|
||||
end
|
||||
@@ -106,7 +119,7 @@ module StatsHelpers
|
||||
end
|
||||
end
|
||||
|
||||
# Handles data normalization and denormalization to prevent numerical instability
|
||||
# Base class for data normalization and denormalization
|
||||
class DataNormalizer
|
||||
extend T::Sig
|
||||
|
||||
@@ -184,6 +197,27 @@ class DataNormalizer
|
||||
@y.as_string { |x| Time.at(x) }
|
||||
end
|
||||
|
||||
# Accessors for equation classes
|
||||
sig { returns(Float) }
|
||||
def x_scale
|
||||
@x.scale
|
||||
end
|
||||
|
||||
sig { returns(Float) }
|
||||
def y_scale
|
||||
@y.scale
|
||||
end
|
||||
|
||||
sig { returns(Float) }
|
||||
def x_min
|
||||
@x.min
|
||||
end
|
||||
|
||||
sig { returns(Float) }
|
||||
def y_min
|
||||
@y.min
|
||||
end
|
||||
|
||||
# Convert raw data to normalized [0,1] scale for Rumale
|
||||
sig { returns(T::Array[T::Array[Float]]) }
|
||||
def normalized_x_matrix
|
||||
@@ -202,6 +236,22 @@ class DataNormalizer
|
||||
@x.range.step(step_size).to_a
|
||||
end
|
||||
|
||||
# Default transformation matrix (identity for linear/quadratic)
|
||||
sig { returns(T::Array[T::Array[Float]]) }
|
||||
def transformed_x_matrix
|
||||
normalized_x_matrix
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(Range) }
|
||||
attr_reader :x, :y
|
||||
end
|
||||
|
||||
# Linear regression specific normalizer
|
||||
class LinearNormalizer < DataNormalizer
|
||||
extend T::Sig
|
||||
|
||||
# Denormalize linear regression results back to original scale
|
||||
sig do
|
||||
params(
|
||||
@@ -210,7 +260,7 @@ class DataNormalizer
|
||||
norm_intercept: Float,
|
||||
).returns(T::Array[Float])
|
||||
end
|
||||
def denormalize_linear(regression_x, norm_slope, norm_intercept)
|
||||
def denormalize_regression(regression_x, norm_slope, norm_intercept)
|
||||
regression_x.map do |x|
|
||||
x_norm = @x.normalize(x)
|
||||
y_norm = norm_slope * x_norm + norm_intercept
|
||||
@@ -218,6 +268,22 @@ class DataNormalizer
|
||||
end
|
||||
end
|
||||
|
||||
# Denormalize linear regression coefficients back to original scale
|
||||
sig do
|
||||
params(norm_intercept: Float, norm_slope: Float).returns(T::Array[Float])
|
||||
end
|
||||
def denormalize_coefficients(norm_intercept, norm_slope)
|
||||
slope_orig = norm_slope * @y.scale / @x.scale
|
||||
intercept_orig = (norm_intercept * @y.scale + @y.min) - slope_orig * @x.min
|
||||
|
||||
[intercept_orig, slope_orig]
|
||||
end
|
||||
end
|
||||
|
||||
# Quadratic regression specific normalizer
|
||||
class QuadraticNormalizer < DataNormalizer
|
||||
extend T::Sig
|
||||
|
||||
# Denormalize quadratic regression results back to original scale
|
||||
sig do
|
||||
params(
|
||||
@@ -227,7 +293,7 @@ class DataNormalizer
|
||||
norm_c: Float,
|
||||
).returns(T::Array[Float])
|
||||
end
|
||||
def denormalize_quadratic(regression_x, norm_a, norm_b, norm_c)
|
||||
def denormalize_regression(regression_x, norm_a, norm_b, norm_c)
|
||||
regression_x.map do |x|
|
||||
x_norm = @x.normalize(x)
|
||||
y_norm = norm_a * x_norm * x_norm + norm_b * x_norm + norm_c
|
||||
@@ -235,44 +301,106 @@ class DataNormalizer
|
||||
end
|
||||
end
|
||||
|
||||
# Generate equation strings with coefficients in original scale
|
||||
sig { params(norm_slope: Float, norm_intercept: Float).returns(String) }
|
||||
def linear_equation(norm_slope, norm_intercept)
|
||||
slope_orig = norm_slope * @y.scale / @x.scale
|
||||
intercept_orig = (norm_intercept * @y.scale + @y.min) - slope_orig * @x.min
|
||||
|
||||
"y = #{polynomial_equation([slope_orig, intercept_orig])}"
|
||||
# Denormalize quadratic regression coefficients back to original scale
|
||||
sig do
|
||||
params(norm_c: Float, norm_b: Float, norm_a: Float).returns(T::Array[Float])
|
||||
end
|
||||
|
||||
sig { params(norm_a: Float, norm_b: Float, norm_c: Float).returns(String) }
|
||||
def quadratic_equation(norm_a, norm_b, norm_c)
|
||||
def denormalize_coefficients(norm_c, norm_b, norm_a)
|
||||
a_orig = norm_a * @y.scale / (@x.scale * @x.scale)
|
||||
b_orig = norm_b * @y.scale / @x.scale - 2 * a_orig * @x.min
|
||||
c_orig =
|
||||
(norm_c * @y.scale + @y.min) - b_orig * @x.min - a_orig * @x.min * @x.min
|
||||
|
||||
"y = #{polynomial_equation([a_orig, b_orig, c_orig])}"
|
||||
[c_orig, b_orig, a_orig]
|
||||
end
|
||||
end
|
||||
|
||||
# Base class for transformations that follow y = a * f(x) + b pattern
|
||||
# where f(x) is a transformation function and denormalization only requires y-scaling
|
||||
class TransformedNormalizer < DataNormalizer
|
||||
extend T::Sig
|
||||
|
||||
# Denormalize coefficients for simple transformations (only y-scaling needed)
|
||||
sig do
|
||||
params(norm_intercept: Float, norm_slope: Float).returns(T::Array[Float])
|
||||
end
|
||||
def denormalize_coefficients(norm_intercept, norm_slope)
|
||||
slope_orig = norm_slope * @y.scale
|
||||
intercept_orig = norm_intercept * @y.scale + @y.min
|
||||
|
||||
[intercept_orig, slope_orig]
|
||||
end
|
||||
|
||||
# Convert array of coefficients into polynomial equation string
|
||||
sig { params(coefficients: T::Array[Float]).returns(String) }
|
||||
def polynomial_equation(coefficients)
|
||||
terms =
|
||||
coefficients.each_with_index.map do |coeff, power|
|
||||
next if coeff.zero?
|
||||
# Common denormalization logic using the transformation function
|
||||
sig do
|
||||
params(
|
||||
regression_x: T::Array[Float],
|
||||
norm_slope: Float,
|
||||
norm_intercept: Float,
|
||||
).returns(T::Array[Float])
|
||||
end
|
||||
def denormalize_regression(regression_x, norm_slope, norm_intercept)
|
||||
regression_x.map do |x|
|
||||
# y = a * f(x) + b, where coefficients are in normalized space
|
||||
y_norm = norm_slope * transform_x(x) + norm_intercept
|
||||
@y.denormalize(y_norm)
|
||||
end
|
||||
end
|
||||
|
||||
term = format_number(coeff)
|
||||
case power
|
||||
when 0
|
||||
term
|
||||
when 1
|
||||
"#{term}x"
|
||||
else
|
||||
"#{term}x#{power.to_s.tr("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")}"
|
||||
end
|
||||
end
|
||||
protected
|
||||
|
||||
terms.compact.reverse.join(" + ").gsub("+ -", "- ")
|
||||
# Abstract method for applying the transformation function
|
||||
sig { params(x: Float).returns(Float) }
|
||||
def transform_x(x)
|
||||
raise NotImplementedError, "Subclasses must implement transform_x"
|
||||
end
|
||||
end
|
||||
|
||||
# Logarithmic regression specific normalizer
|
||||
class LogarithmicNormalizer < TransformedNormalizer
|
||||
extend T::Sig
|
||||
|
||||
# Convert x values to log-transformed matrix for logarithmic regression
|
||||
sig { returns(T::Array[T::Array[Float]]) }
|
||||
def transformed_x_matrix
|
||||
@x_values.map { |x| [Math.log(x)] }
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# Apply logarithmic transformation
|
||||
sig { params(x: Float).returns(Float) }
|
||||
def transform_x(x)
|
||||
Math.log(x)
|
||||
end
|
||||
end
|
||||
|
||||
# Square root regression specific normalizer
|
||||
class SquareRootNormalizer < TransformedNormalizer
|
||||
extend T::Sig
|
||||
|
||||
# Convert x values to square root transformed matrix for square root regression
|
||||
sig { returns(T::Array[T::Array[Float]]) }
|
||||
def transformed_x_matrix
|
||||
@x_values.map { |x| [Math.sqrt(x)] }
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# Apply square root transformation
|
||||
sig { params(x: Float).returns(Float) }
|
||||
def transform_x(x)
|
||||
Math.sqrt(x)
|
||||
end
|
||||
end
|
||||
|
||||
# Base class for regression equations with common formatting logic
|
||||
class Equation
|
||||
extend T::Sig
|
||||
|
||||
sig { params(normalizer: DataNormalizer).void }
|
||||
def initialize(normalizer)
|
||||
@normalizer = normalizer
|
||||
end
|
||||
|
||||
# Format a number with significant figures and scientific notation when needed
|
||||
@@ -298,16 +426,130 @@ class DataNormalizer
|
||||
num.round(decimal_places).to_s
|
||||
end
|
||||
end
|
||||
|
||||
sig { returns(String) }
|
||||
def to_s
|
||||
format_equation
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(String) }
|
||||
def format_equation
|
||||
raise NotImplementedError, "Subclasses must implement format_equation"
|
||||
end
|
||||
|
||||
sig { returns(DataNormalizer) }
|
||||
attr_reader :normalizer
|
||||
end
|
||||
|
||||
class PolynomialEquation < Equation
|
||||
extend T::Sig
|
||||
|
||||
sig { params(normalizer: DataNormalizer, coefficients: T::Array[Float]).void }
|
||||
def initialize(normalizer, coefficients)
|
||||
super(normalizer)
|
||||
@coefficients = coefficients
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(String) }
|
||||
def format_equation
|
||||
"y = #{polynomial_equation(@coefficients)}"
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Convert array of coefficients into polynomial equation string
|
||||
sig { params(coefficients: T::Array[Float]).returns(String) }
|
||||
def polynomial_equation(coefficients)
|
||||
terms =
|
||||
coefficients.each_with_index.map do |coeff, power|
|
||||
next if coeff.zero?
|
||||
|
||||
term = format_number(coeff)
|
||||
case power
|
||||
when 0
|
||||
term
|
||||
when 1
|
||||
"#{term}x"
|
||||
else
|
||||
"#{term}x#{power.to_s.tr("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")}"
|
||||
end
|
||||
end
|
||||
|
||||
terms.compact.reverse.join(" + ").gsub("+ -", "- ")
|
||||
end
|
||||
end
|
||||
|
||||
class LogarithmicEquation < Equation
|
||||
extend T::Sig
|
||||
|
||||
sig do
|
||||
params(
|
||||
normalizer: DataNormalizer,
|
||||
norm_slope: Float,
|
||||
norm_intercept: Float,
|
||||
).void
|
||||
end
|
||||
def initialize(normalizer, norm_slope, norm_intercept)
|
||||
super(normalizer)
|
||||
@norm_slope = norm_slope
|
||||
@norm_intercept = norm_intercept
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(String) }
|
||||
def format_equation
|
||||
slope_orig = @norm_slope * @normalizer.y_scale
|
||||
intercept_orig = @norm_intercept * @normalizer.y_scale + @normalizer.y_min
|
||||
|
||||
"y = #{format_number(slope_orig)} * ln(x) + #{format_number(intercept_orig)}"
|
||||
end
|
||||
end
|
||||
|
||||
class SquareRootEquation < Equation
|
||||
extend T::Sig
|
||||
|
||||
sig do
|
||||
params(
|
||||
normalizer: DataNormalizer,
|
||||
norm_slope: Float,
|
||||
norm_intercept: Float,
|
||||
).void
|
||||
end
|
||||
def initialize(normalizer, norm_slope, norm_intercept)
|
||||
super(normalizer)
|
||||
@norm_slope = norm_slope
|
||||
@norm_intercept = norm_intercept
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
sig { returns(String) }
|
||||
def format_equation
|
||||
slope_orig = @norm_slope * @normalizer.y_scale
|
||||
intercept_orig = @norm_intercept * @normalizer.y_scale + @normalizer.y_min
|
||||
|
||||
"y = #{format_number(slope_orig)} * √x + #{format_number(intercept_orig)}"
|
||||
end
|
||||
end
|
||||
|
||||
# Immutable struct representing a single regression analysis result
|
||||
class RegressionResult < T::ImmutableStruct
|
||||
extend T::Sig
|
||||
|
||||
const :equation, String
|
||||
const :equation, Equation
|
||||
const :r_squared, Float
|
||||
const :x_values, T::Array[Float]
|
||||
const :y_values, T::Array[Float]
|
||||
|
||||
sig { returns(String) }
|
||||
def equation_string
|
||||
equation.to_s
|
||||
end
|
||||
end
|
||||
|
||||
# Immutable struct representing the complete analysis results
|
||||
@@ -316,40 +558,38 @@ class AnalysisResults < T::ImmutableStruct
|
||||
|
||||
const :linear, RegressionResult
|
||||
const :quadratic, RegressionResult
|
||||
const :logarithmic, RegressionResult
|
||||
const :square_root, RegressionResult
|
||||
end
|
||||
|
||||
# Handles regression analysis using Rumale with normalized data
|
||||
class RegressionAnalyzer
|
||||
extend T::Sig
|
||||
|
||||
sig { params(normalizer: DataNormalizer).void }
|
||||
def initialize(normalizer)
|
||||
@normalizer = normalizer
|
||||
sig { params(records: T::Array[Domain::FaFavIdAndDate]).void }
|
||||
def initialize(records)
|
||||
@records = records
|
||||
end
|
||||
|
||||
sig { returns(AnalysisResults) }
|
||||
def analyze
|
||||
# Use normalized data for Rumale calculations to prevent numerical instability
|
||||
x_matrix = @normalizer.normalized_x_matrix
|
||||
y_vector = @normalizer.normalized_y_vector
|
||||
regression_x = @normalizer.regression_x_range
|
||||
|
||||
AnalysisResults.new(
|
||||
linear: analyze_linear(x_matrix, y_vector, regression_x),
|
||||
quadratic: analyze_quadratic(x_matrix, y_vector, regression_x),
|
||||
linear: analyze_linear,
|
||||
quadratic: analyze_quadratic,
|
||||
logarithmic: analyze_logarithmic,
|
||||
square_root: analyze_square_root,
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
sig do
|
||||
params(
|
||||
x_matrix: T::Array[T::Array[Float]],
|
||||
y_vector: T::Array[Float],
|
||||
regression_x: T::Array[Float],
|
||||
).returns(RegressionResult)
|
||||
end
|
||||
def analyze_linear(x_matrix, y_vector, regression_x)
|
||||
sig { returns(RegressionResult) }
|
||||
def analyze_linear
|
||||
normalizer = LinearNormalizer.new(@records)
|
||||
x_matrix = normalizer.normalized_x_matrix
|
||||
y_vector = normalizer.normalized_y_vector
|
||||
regression_x = normalizer.regression_x_range
|
||||
|
||||
poly_features = Rumale::Preprocessing::PolynomialFeatures.new(degree: 1)
|
||||
regressor = Rumale::LinearModel::LinearRegression.new
|
||||
pipeline =
|
||||
@@ -370,24 +610,31 @@ class RegressionAnalyzer
|
||||
|
||||
# Generate regression line data in original scale
|
||||
linear_y =
|
||||
@normalizer.denormalize_linear(regression_x, norm_slope, norm_intercept)
|
||||
normalizer.denormalize_regression(
|
||||
regression_x,
|
||||
norm_slope,
|
||||
norm_intercept,
|
||||
)
|
||||
|
||||
# Denormalize coefficients for equation display
|
||||
coefficients =
|
||||
normalizer.denormalize_coefficients(norm_intercept, norm_slope)
|
||||
|
||||
RegressionResult.new(
|
||||
equation: @normalizer.linear_equation(norm_slope, norm_intercept),
|
||||
equation: PolynomialEquation.new(normalizer, coefficients),
|
||||
r_squared: r_squared,
|
||||
x_values: regression_x,
|
||||
y_values: linear_y,
|
||||
)
|
||||
end
|
||||
|
||||
sig do
|
||||
params(
|
||||
x_matrix: T::Array[T::Array[Float]],
|
||||
y_vector: T::Array[Float],
|
||||
regression_x: T::Array[Float],
|
||||
).returns(RegressionResult)
|
||||
end
|
||||
def analyze_quadratic(x_matrix, y_vector, regression_x)
|
||||
sig { returns(RegressionResult) }
|
||||
def analyze_quadratic
|
||||
normalizer = QuadraticNormalizer.new(@records)
|
||||
x_matrix = normalizer.normalized_x_matrix
|
||||
y_vector = normalizer.normalized_y_vector
|
||||
regression_x = normalizer.regression_x_range
|
||||
|
||||
# Use pipeline approach as recommended in documentation
|
||||
poly_features = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
|
||||
regressor = Rumale::LinearModel::LinearRegression.new(fit_bias: true)
|
||||
@@ -410,15 +657,108 @@ class RegressionAnalyzer
|
||||
|
||||
# Generate regression line data in original scale
|
||||
quadratic_y =
|
||||
@normalizer.denormalize_quadratic(regression_x, norm_a, norm_b, norm_c)
|
||||
normalizer.denormalize_regression(regression_x, norm_a, norm_b, norm_c)
|
||||
|
||||
# Denormalize coefficients for equation display
|
||||
coefficients = normalizer.denormalize_coefficients(norm_c, norm_b, norm_a)
|
||||
|
||||
RegressionResult.new(
|
||||
equation: @normalizer.quadratic_equation(norm_a, norm_b, norm_c),
|
||||
equation: PolynomialEquation.new(normalizer, coefficients),
|
||||
r_squared: r_squared,
|
||||
x_values: regression_x,
|
||||
y_values: quadratic_y,
|
||||
)
|
||||
end
|
||||
|
||||
sig { returns(RegressionResult) }
|
||||
def analyze_logarithmic
|
||||
normalizer = LogarithmicNormalizer.new(@records)
|
||||
y_vector = normalizer.normalized_y_vector
|
||||
regression_x = normalizer.regression_x_range
|
||||
|
||||
# Transform x values using natural log for logarithmic regression
|
||||
# y = a * ln(x) + b
|
||||
log_x_matrix = normalizer.transformed_x_matrix
|
||||
poly_features = Rumale::Preprocessing::PolynomialFeatures.new(degree: 1)
|
||||
regressor = Rumale::LinearModel::LinearRegression.new
|
||||
|
||||
pipeline =
|
||||
Rumale::Pipeline::Pipeline.new(
|
||||
steps: {
|
||||
transformer: poly_features,
|
||||
estimator: regressor,
|
||||
},
|
||||
)
|
||||
|
||||
# Fit the regression on log-transformed x values
|
||||
pipeline.fit(log_x_matrix, y_vector)
|
||||
r_squared = pipeline.score(log_x_matrix, y_vector)
|
||||
|
||||
# Extract coefficients (same pattern as linear regression)
|
||||
weight_vec = pipeline.steps[:estimator].weight_vec
|
||||
norm_intercept = weight_vec[0]
|
||||
norm_slope = weight_vec[1]
|
||||
|
||||
# Generate regression line data in original scale
|
||||
logarithmic_y =
|
||||
normalizer.denormalize_regression(
|
||||
regression_x,
|
||||
norm_slope,
|
||||
norm_intercept,
|
||||
)
|
||||
|
||||
RegressionResult.new(
|
||||
equation: LogarithmicEquation.new(normalizer, norm_slope, norm_intercept),
|
||||
r_squared: r_squared,
|
||||
x_values: regression_x,
|
||||
y_values: logarithmic_y,
|
||||
)
|
||||
end
|
||||
|
||||
sig { returns(RegressionResult) }
|
||||
def analyze_square_root
|
||||
normalizer = SquareRootNormalizer.new(@records)
|
||||
y_vector = normalizer.normalized_y_vector
|
||||
regression_x = normalizer.regression_x_range
|
||||
|
||||
# Transform x values using square root for square root regression
|
||||
# y = a * √x + b
|
||||
sqrt_x_matrix = normalizer.transformed_x_matrix
|
||||
poly_features = Rumale::Preprocessing::PolynomialFeatures.new(degree: 1)
|
||||
regressor = Rumale::LinearModel::LinearRegression.new
|
||||
|
||||
pipeline =
|
||||
Rumale::Pipeline::Pipeline.new(
|
||||
steps: {
|
||||
transformer: poly_features,
|
||||
estimator: regressor,
|
||||
},
|
||||
)
|
||||
|
||||
# Fit the regression on square root transformed x values
|
||||
pipeline.fit(sqrt_x_matrix, y_vector)
|
||||
r_squared = pipeline.score(sqrt_x_matrix, y_vector)
|
||||
|
||||
# Extract coefficients (same pattern as other regressions)
|
||||
weight_vec = pipeline.steps[:estimator].weight_vec
|
||||
norm_intercept = weight_vec[0]
|
||||
norm_slope = weight_vec[1]
|
||||
|
||||
# Generate regression line data in original scale
|
||||
square_root_y =
|
||||
normalizer.denormalize_regression(
|
||||
regression_x,
|
||||
norm_slope,
|
||||
norm_intercept,
|
||||
)
|
||||
|
||||
RegressionResult.new(
|
||||
equation: SquareRootEquation.new(normalizer, norm_slope, norm_intercept),
|
||||
r_squared: r_squared,
|
||||
x_values: regression_x,
|
||||
y_values: square_root_y,
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
# Simplified plotting class with extracted common functionality
|
||||
@@ -466,10 +806,10 @@ class StatsPlotter
|
||||
params(
|
||||
x_values: T::Array[Float],
|
||||
y_values: T::Array[Float],
|
||||
results: AnalysisResults,
|
||||
regressions: T::Array[[String, RegressionResult]],
|
||||
).void
|
||||
end
|
||||
def plot_combined(x_values, y_values, results)
|
||||
def plot_combined(x_values, y_values, regressions)
|
||||
plot_with_error_handling("📈 Combined Visualization:") do
|
||||
# Base scatter plot
|
||||
plot =
|
||||
@@ -485,18 +825,14 @@ class StatsPlotter
|
||||
)
|
||||
|
||||
# Add regression lines
|
||||
UnicodePlot.lineplot!(
|
||||
plot,
|
||||
results.linear.x_values,
|
||||
results.linear.y_values,
|
||||
name: "Linear (R²=#{results.linear.r_squared.round(3)})",
|
||||
)
|
||||
UnicodePlot.lineplot!(
|
||||
plot,
|
||||
results.quadratic.x_values,
|
||||
results.quadratic.y_values,
|
||||
name: "Quadratic (R²=#{results.quadratic.r_squared.round(3)})",
|
||||
)
|
||||
regressions.each do |name, result|
|
||||
UnicodePlot.lineplot!(
|
||||
plot,
|
||||
result.x_values,
|
||||
result.y_values,
|
||||
name: "#{name} (R²=#{result.r_squared.round(3)})",
|
||||
)
|
||||
end
|
||||
plot
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user