724 lines
28 KiB
Ruby
Generated
724 lines
28 KiB
Ruby
Generated
# typed: true
|
|
|
|
# DO NOT EDIT MANUALLY
|
|
# This is an autogenerated file for types exported from the `rumale-model_selection` gem.
|
|
# Please instead update this file by running `bin/tapioca gem rumale-model_selection`.
|
|
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#5
|
|
module Rumale; end
|
|
|
|
# This module consists of the classes for model validation techniques.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#6
|
|
module Rumale::ModelSelection
|
|
private
|
|
|
|
# Split randomly data set into test and train data.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/function'
|
|
#
|
|
# x_train, x_test, y_train, y_test = Rumale::ModelSelection.train_test_split(x, y, test_size: 0.2, stratify: true, random_seed: 1)
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The dataset to be used to generate data indices.
|
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used to generate data indices for stratified random permutation.
|
|
# If stratify = false, this parameter is ignored.
|
|
# @param test_size [Float] The ratio of number of samples for test data.
|
|
# @param train_size [Float] The ratio of number of samples for train data.
|
|
# If nil is given, it sets to 1 - test_size.
|
|
# @param stratify [Boolean] The flag indicating whether to perform stratify split.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [Array<Numo::NArray>] The set of training and testing data.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/function.rb#29
|
|
def train_test_split(x, y = T.unsafe(nil), test_size: T.unsafe(nil), train_size: T.unsafe(nil), stratify: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
class << self
|
|
# Split randomly data set into test and train data.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/function'
|
|
#
|
|
# x_train, x_test, y_train, y_test = Rumale::ModelSelection.train_test_split(x, y, test_size: 0.2, stratify: true, random_seed: 1)
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The dataset to be used to generate data indices.
|
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used to generate data indices for stratified random permutation.
|
|
# If stratify = false, this parameter is ignored.
|
|
# @param test_size [Float] The ratio of number of samples for test data.
|
|
# @param train_size [Float] The ratio of number of samples for train data.
|
|
# If nil is given, it sets to 1 - test_size.
|
|
# @param stratify [Boolean] The flag indicating whether to perform stratify split.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [Array<Numo::NArray>] The set of training and testing data.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/function.rb#29
|
|
def train_test_split(x, y = T.unsafe(nil), test_size: T.unsafe(nil), train_size: T.unsafe(nil), stratify: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
end
|
|
end
|
|
|
|
# CrossValidation is a class that evaluates a given classifier with cross-validation method.
|
|
#
|
|
# @example
|
|
# require 'rumale/linear_model'
|
|
# require 'rumale/model_selection/stratified_k_fold'
|
|
# require 'rumale/model_selection/cross_validation'
|
|
#
|
|
# svc = Rumale::LinearModel::SVC.new
|
|
# kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5)
|
|
# cv = Rumale::ModelSelection::CrossValidation.new(estimator: svc, splitter: kf)
|
|
# report = cv.perform(samples, labels)
|
|
# mean_test_score = report[:test_score].inject(:+) / kf.n_splits
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#21
|
|
class Rumale::ModelSelection::CrossValidation
|
|
# Create a new evaluator with cross-validation method.
|
|
#
|
|
# @param estimator [Classifier] The classifier of which performance is evaluated.
|
|
# @param splitter [Splitter] The splitter that divides dataset to training and testing dataset.
|
|
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
|
|
# @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
|
|
# @return [CrossValidation] a new instance of CrossValidation
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#44
|
|
def initialize(estimator: T.unsafe(nil), splitter: T.unsafe(nil), evaluator: T.unsafe(nil), return_train_score: T.unsafe(nil)); end
|
|
|
|
# Return the classifier of which performance is evaluated.
|
|
#
|
|
# @return [Classifier]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#24
|
|
def estimator; end
|
|
|
|
# Return the evaluator that calculates score.
|
|
#
|
|
# @return [Evaluator]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#32
|
|
def evaluator; end
|
|
|
|
# Perform the evalution of given classifier with cross-validation method.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
|
# The dataset to be used to evaluate the estimator.
|
|
# @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
|
|
# The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
|
|
# @return [Hash] The report summarizing the results of cross-validation.
|
|
# * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
|
|
# * :test_score (Array<Float>) The scores of testing dataset for each split.
|
|
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
|
# the return_train_score is false.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#62
|
|
def perform(x, y); end
|
|
|
|
# Return the flag indicating whether to caculate the score of training dataset.
|
|
#
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#36
|
|
def return_train_score; end
|
|
|
|
# Return the splitter that divides dataset.
|
|
#
|
|
# @return [Splitter]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#28
|
|
def splitter; end
|
|
|
|
private
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#98
|
|
def kernel_machine?; end
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/cross_validation.rb#102
|
|
def log_loss?; end
|
|
end
|
|
|
|
# GridSearchCV is a class that performs hyperparameter optimization with grid search method.
|
|
#
|
|
# @example
|
|
# require 'rumale/ensemble'
|
|
# require 'rumale/model_selection/stratified_k_fold'
|
|
# require 'rumale/model_selection/grid_search_cv'
|
|
#
|
|
# rfc = Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
|
|
# pg = { n_estimators: [5, 10], max_depth: [3, 5], max_leaf_nodes: [15, 31] }
|
|
# kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5)
|
|
# gs = Rumale::ModelSelection::GridSearchCV.new(estimator: rfc, param_grid: pg, splitter: kf)
|
|
# gs.fit(samples, labels)
|
|
# p gs.cv_results
|
|
# p gs.best_params
|
|
# @example
|
|
# rbf = Rumale::KernelApproximation::RBF.new(random_seed: 1)
|
|
# svc = Rumale::LinearModel::SVC.new
|
|
# pipe = Rumale::Pipeline::Pipeline.new(steps: { rbf: rbf, svc: svc })
|
|
# pg = { rbf__gamma: [32.0, 1.0], rbf__n_components: [4, 128], svc__reg_param: [16.0, 0.1] }
|
|
# kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5)
|
|
# gs = Rumale::ModelSelection::GridSearchCV.new(estimator: pipe, param_grid: pg, splitter: kf)
|
|
# gs.fit(samples, labels)
|
|
# p gs.cv_results
|
|
# p gs.best_params
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#34
|
|
class Rumale::ModelSelection::GridSearchCV < ::Rumale::Base::Estimator
|
|
# Create a new grid search method.
|
|
#
|
|
# @param estimator [Classifier/Regresor] The estimator to be searched for optimal parameters with grid search method.
|
|
# @param param_grid [Array<Hash>] The parameter sets is represented with array of hash that
|
|
# consists of parameter names as keys and array of parameter values as values.
|
|
# @param splitter [Splitter] The splitter that divides dataset to training and testing dataset on cross validation.
|
|
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results on cross validation.
|
|
# If nil is given, the score method of estimator is used to evaluation.
|
|
# @param greater_is_better [Boolean] The flag that indicates whether the estimator is better as
|
|
# evaluation score is larger.
|
|
# @return [GridSearchCV] a new instance of GridSearchCV
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#65
|
|
def initialize(estimator: T.unsafe(nil), param_grid: T.unsafe(nil), splitter: T.unsafe(nil), evaluator: T.unsafe(nil), greater_is_better: T.unsafe(nil)); end
|
|
|
|
# Return the estimator learned with the best parameter.
|
|
#
|
|
# @return [Estimator]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#53
|
|
def best_estimator; end
|
|
|
|
# Return the index of the best parameter.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#49
|
|
def best_index; end
|
|
|
|
# Return the best parameter set.
|
|
#
|
|
# @return [Hash]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#45
|
|
def best_params; end
|
|
|
|
# Return the score of the estimator learned with the best parameter.
|
|
#
|
|
# @return [Float]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#41
|
|
def best_score; end
|
|
|
|
# Return the result of cross validation for each parameter.
|
|
#
|
|
# @return [Hash]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#37
|
|
def cv_results; end
|
|
|
|
# Call the decision_function method of learned estimator with the best parameter.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#102
|
|
def decision_function(x); end
|
|
|
|
# Fit the model with given training data and all sets of parameters.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
|
# @return [GridSearchCV] The learned estimator with grid search.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#81
|
|
def fit(x, y); end
|
|
|
|
# Call the predict method of learned estimator with the best parameter.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
|
# @return [Numo::NArray] Predicted results.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#110
|
|
def predict(x); end
|
|
|
|
# Call the predict_log_proba method of learned estimator with the best parameter.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#118
|
|
def predict_log_proba(x); end
|
|
|
|
# Call the predict_proba method of learned estimator with the best parameter.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#126
|
|
def predict_proba(x); end
|
|
|
|
# Call the score method of learned estimator with the best parameter.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
|
# @return [Float] The score of estimator.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#135
|
|
def score(x, y); end
|
|
|
|
private
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#166
|
|
def configurated_estimator(prms); end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#202
|
|
def find_best_params; end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#179
|
|
def init_attrs; end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#152
|
|
def param_combinations; end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#159
|
|
def perform_cross_validation(x, y, prms); end
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#208
|
|
def pipeline?; end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#189
|
|
def store_cv_result(prms, report); end
|
|
|
|
# @raise [TypeError]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/grid_search_cv.rb#141
|
|
def valid_param_grid(grid); end
|
|
end
|
|
|
|
# GroupKFold is a class that generates the set of data indices for K-fold cross-validation.
|
|
# The data points belonging to the same group do not be split into different folds.
|
|
# The number of groups should be greater than or equal to the number of splits.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/group_k_fold'
|
|
#
|
|
# cv = Rumale::ModelSelection::GroupKFold.new(n_splits: 3)
|
|
# x = Numo::DFloat.new(8, 2).rand
|
|
# groups = Numo::Int32[1, 1, 1, 2, 2, 3, 3, 3]
|
|
# cv.split(x, nil, groups).each do |train_ids, test_ids|
|
|
# puts '---'
|
|
# pp train_ids
|
|
# pp test_ids
|
|
# end
|
|
#
|
|
# # ---
|
|
# # [0, 1, 2, 3, 4]
|
|
# # [5, 6, 7]
|
|
# # ---
|
|
# # [3, 4, 5, 6, 7]
|
|
# # [0, 1, 2]
|
|
# # ---
|
|
# # [0, 1, 2, 5, 6, 7]
|
|
# # [3, 4]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_k_fold.rb#34
|
|
class Rumale::ModelSelection::GroupKFold
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for grouped K-fold cross validation.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @return [GroupKFold] a new instance of GroupKFold
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_k_fold.rb#44
|
|
def initialize(n_splits: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_k_fold.rb#39
|
|
def n_splits; end
|
|
|
|
# Generate data indices for grouped K-fold cross validation.
|
|
#
|
|
# @overload split
|
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_k_fold.rb#58
|
|
def split(x, _y, groups); end
|
|
end
|
|
|
|
# GroupShuffleSplit is a class that generates the set of data indices
|
|
# for random permutation cross-validation by randomly selecting group labels.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/group_shuffle_split'
|
|
#
|
|
# cv = Rumale::ModelSelection::GroupShuffleSplit.new(n_splits: 2, test_size: 0.2, random_seed: 1)
|
|
# x = Numo::DFloat.new(8, 2).rand
|
|
# groups = Numo::Int32[1, 1, 1, 2, 2, 3, 3, 3]
|
|
# cv.split(x, nil, groups).each do |train_ids, test_ids|
|
|
# puts '---'
|
|
# pp train_ids
|
|
# pp test_ids
|
|
# end
|
|
#
|
|
# # ---
|
|
# # [0, 1, 2, 5, 6, 7]
|
|
# # [3, 4]
|
|
# # ---
|
|
# # [3, 4, 5, 6, 7]
|
|
# # [0, 1, 2]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#29
|
|
class Rumale::ModelSelection::GroupShuffleSplit
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for random permutation cross validation with given group labels.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @param test_size [Float] The ratio of number of groups for test data.
|
|
# @param train_size [Float/Nil] The ratio of number of groups for train data.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [GroupShuffleSplit] a new instance of GroupShuffleSplit
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#46
|
|
def initialize(n_splits: T.unsafe(nil), test_size: T.unsafe(nil), train_size: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#34
|
|
def n_splits; end
|
|
|
|
# Return the random generator for shuffling the dataset.
|
|
#
|
|
# @return [Random]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#38
|
|
def rng; end
|
|
|
|
# Generate train and test data indices by randomly selecting group labels.
|
|
#
|
|
# @overload split
|
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#65
|
|
def split(_x, _y, groups); end
|
|
|
|
private
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/group_shuffle_split.rb#101
|
|
def in1d(a, b); end
|
|
end
|
|
|
|
# KFold is a class that generates the set of data indices for K-fold cross-validation.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/k_fold'
|
|
#
|
|
# kf = Rumale::ModelSelection::KFold.new(n_splits: 3, shuffle: true, random_seed: 1)
|
|
# kf.split(samples, labels).each do |train_ids, test_ids|
|
|
# train_samples = samples[train_ids, true]
|
|
# test_samples = samples[test_ids, true]
|
|
# ...
|
|
# end
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#20
|
|
class Rumale::ModelSelection::KFold
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for K-fold cross validation.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [KFold] a new instance of KFold
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#40
|
|
def initialize(n_splits: T.unsafe(nil), shuffle: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#25
|
|
def n_splits; end
|
|
|
|
# Return the random generator for shuffling the dataset.
|
|
#
|
|
# @return [Random]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#33
|
|
def rng; end
|
|
|
|
# Return the flag indicating whether to shuffle the dataset.
|
|
#
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#29
|
|
def shuffle; end
|
|
|
|
# Generate data indices for K-fold cross validation.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
|
# The dataset to be used to generate data indices for K-fold cross validation.
|
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/k_fold.rb#53
|
|
def split(x, _y = T.unsafe(nil)); end
|
|
end
|
|
|
|
# ShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/shuffle_split'
|
|
#
|
|
# ss = Rumale::ModelSelection::ShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
|
|
# ss.split(samples, labels).each do |train_ids, test_ids|
|
|
# train_samples = samples[train_ids, true]
|
|
# test_samples = samples[test_ids, true]
|
|
# ...
|
|
# end
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/shuffle_split.rb#19
|
|
class Rumale::ModelSelection::ShuffleSplit
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for random permutation cross validation.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @param test_size [Float] The ratio of number of samples for test data.
|
|
# @param train_size [Float] The ratio of number of samples for train data.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [ShuffleSplit] a new instance of ShuffleSplit
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/shuffle_split.rb#36
|
|
def initialize(n_splits: T.unsafe(nil), test_size: T.unsafe(nil), train_size: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/shuffle_split.rb#24
|
|
def n_splits; end
|
|
|
|
# Return the random generator for shuffling the dataset.
|
|
#
|
|
# @return [Random]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/shuffle_split.rb#28
|
|
def rng; end
|
|
|
|
# Generate data indices for random permutation cross validation.
|
|
#
|
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
|
# The dataset to be used to generate data indices for random permutation cross validation.
|
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/shuffle_split.rb#50
|
|
def split(x, _y = T.unsafe(nil)); end
|
|
end
|
|
|
|
# StratifiedKFold is a class that generates the set of data indices for K-fold cross-validation.
|
|
# The proportion of the number of samples in each class will be almost equal for each fold.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/stratified_k_fold'
|
|
#
|
|
# kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 3, shuffle: true, random_seed: 1)
|
|
# kf.split(samples, labels).each do |train_ids, test_ids|
|
|
# train_samples = samples[train_ids, true]
|
|
# test_samples = samples[test_ids, true]
|
|
# ...
|
|
# end
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#20
|
|
class Rumale::ModelSelection::StratifiedKFold
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for stratified K-fold cross validation.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [StratifiedKFold] a new instance of StratifiedKFold
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#40
|
|
def initialize(n_splits: T.unsafe(nil), shuffle: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#25
|
|
def n_splits; end
|
|
|
|
# Return the random generator for shuffling the dataset.
|
|
#
|
|
# @return [Random]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#33
|
|
def rng; end
|
|
|
|
# Return the flag indicating whether to shuffle the dataset.
|
|
#
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#29
|
|
def shuffle; end
|
|
|
|
# Generate data indices for stratified K-fold cross validation.
|
|
#
|
|
# @overload split
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#57
|
|
def split(_x, y); end
|
|
|
|
private
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#76
|
|
def fold_sets(y, label, sub_rng); end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#87
|
|
def train_test_sets(fold_sets_each_class, fold_id); end
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_k_fold.rb#72
|
|
def valid_n_splits?(y); end
|
|
end
|
|
|
|
# StratifiedShuffleSplit is a class that generates the set of data indices for random permutation cross-validation.
|
|
# The proportion of the number of samples in each class will be almost equal for each fold.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/stratified_shuffle_split'
|
|
#
|
|
# ss = Rumale::ModelSelection::StratifiedShuffleSplit.new(n_splits: 3, test_size: 0.2, random_seed: 1)
|
|
# ss.split(samples, labels).each do |train_ids, test_ids|
|
|
# train_samples = samples[train_ids, true]
|
|
# test_samples = samples[test_ids, true]
|
|
# ...
|
|
# end
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#20
|
|
class Rumale::ModelSelection::StratifiedShuffleSplit
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for random permutation cross validation.
|
|
#
|
|
# @param n_splits [Integer] The number of folds.
|
|
# @param test_size [Float] The ratio of number of samples for test data.
|
|
# @param train_size [Float] The ratio of number of samples for train data.
|
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
|
# @return [StratifiedShuffleSplit] a new instance of StratifiedShuffleSplit
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#37
|
|
def initialize(n_splits: T.unsafe(nil), test_size: T.unsafe(nil), train_size: T.unsafe(nil), random_seed: T.unsafe(nil)); end
|
|
|
|
# Return the number of folds.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#25
|
|
def n_splits; end
|
|
|
|
# Return the random generator for shuffling the dataset.
|
|
#
|
|
# @return [Random]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#29
|
|
def rng; end
|
|
|
|
# Generate data indices for stratified random permutation cross validation.
|
|
#
|
|
# @overload split
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#55
|
|
def split(_x, y); end
|
|
|
|
private
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#104
|
|
def enough_data_size_each_class?(y, data_size, data_type); end
|
|
|
|
# @return [Boolean]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/stratified_shuffle_split.rb#100
|
|
def valid_n_splits?(y); end
|
|
end
|
|
|
|
# TimeSeriesSplit is a class that generates the set of data indices for time series cross-validation.
|
|
# It is assumed that the dataset given are already ordered by time information.
|
|
#
|
|
# @example
|
|
# require 'rumale/model_selection/time_series_split'
|
|
#
|
|
# cv = Rumale::ModelSelection::TimeSeriesSplit.new(n_splits: 5)
|
|
# x = Numo::DFloat.new(6, 2).rand
|
|
# cv.split(x, nil).each do |train_ids, test_ids|
|
|
# puts '---'
|
|
# pp train_ids
|
|
# pp test_ids
|
|
# end
|
|
#
|
|
# # ---
|
|
# # [0]
|
|
# # [1]
|
|
# # ---
|
|
# # [0, 1]
|
|
# # [2]
|
|
# # ---
|
|
# # [0, 1, 2]
|
|
# # [3]
|
|
# # ---
|
|
# # [0, 1, 2, 3]
|
|
# # [4]
|
|
# # ---
|
|
# # [0, 1, 2, 3, 4]
|
|
# # [5]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/time_series_split.rb#37
|
|
class Rumale::ModelSelection::TimeSeriesSplit
|
|
include ::Rumale::Base::Splitter
|
|
|
|
# Create a new data splitter for time series cross-validation.
|
|
#
|
|
# @param n_splits [Integer] The number of splits.
|
|
# @param max_train_size [Integer/Nil] The maximum number of training samples in a split.
|
|
# @return [TimeSeriesSplit] a new instance of TimeSeriesSplit
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/time_series_split.rb#52
|
|
def initialize(n_splits: T.unsafe(nil), max_train_size: T.unsafe(nil)); end
|
|
|
|
# Return the maximum number of training samples in a split.
|
|
#
|
|
# @return [Integer/Nil]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/time_series_split.rb#46
|
|
def max_train_size; end
|
|
|
|
# Return the number of splits.
|
|
#
|
|
# @return [Integer]
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/time_series_split.rb#42
|
|
def n_splits; end
|
|
|
|
# Generate data indices for time series cross-validation.
|
|
#
|
|
# @overload split
|
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
|
#
|
|
# source://rumale-model_selection//lib/rumale/model_selection/time_series_split.rb#66
|
|
def split(x, _y); end
|
|
end
|
|
|
|
# source://rumale-model_selection//lib/rumale/model_selection/version.rb#6
|
|
Rumale::ModelSelection::VERSION = T.let(T.unsafe(nil), String)
|