cargo clippy

This commit is contained in:
Dylan Knutson
2024-12-28 03:46:30 +00:00
parent 6ebbd6aaa9
commit 2738b8469b
4 changed files with 29 additions and 33 deletions

View File

@@ -181,8 +181,8 @@ async fn main() -> Result<()> {
// For each cluster, select a set of archetypal items that define the cluster // For each cluster, select a set of archetypal items that define the cluster
for cluster_id in 0..args.item_clusters { for cluster_id in 0..args.item_clusters {
let start_idx = (cluster_id * args.num_items as i32 / args.item_clusters) as usize; let start_idx = (cluster_id * args.num_items / args.item_clusters) as usize;
let end_idx = ((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize; let end_idx = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize;
// Set high affinity for items in this cluster's range // Set high affinity for items in this cluster's range
for i in start_idx..end_idx { for i in start_idx..end_idx {
@@ -192,17 +192,15 @@ async fn main() -> Result<()> {
// Add some lower affinity to neighboring clusters' items // Add some lower affinity to neighboring clusters' items
let noise = args.noise_level; let noise = args.noise_level;
if cluster_id > 0 { if cluster_id > 0 {
let prev_start = let prev_start = ((cluster_id - 1) * args.num_items / args.item_clusters) as usize;
((cluster_id - 1) * args.num_items as i32 / args.item_clusters) as usize; let prev_end = (cluster_id * args.num_items / args.item_clusters) as usize;
let prev_end = (cluster_id * args.num_items as i32 / args.item_clusters) as usize;
for i in prev_start..prev_end { for i in prev_start..prev_end {
cluster_affinities[cluster_id as usize][i] = noise; cluster_affinities[cluster_id as usize][i] = noise;
} }
} }
if cluster_id < args.item_clusters - 1 { if cluster_id < args.item_clusters - 1 {
let next_start = let next_start = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize;
((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize; let next_end = ((cluster_id + 2) * args.num_items / args.item_clusters) as usize;
let next_end = ((cluster_id + 2) * args.num_items as i32 / args.item_clusters) as usize;
for i in next_start..next_end { for i in next_start..next_end {
cluster_affinities[cluster_id as usize][i] = noise; cluster_affinities[cluster_id as usize][i] = noise;
} }

View File

@@ -38,7 +38,7 @@ async fn create_pool() -> Result<Pool> {
Ok(config.create_pool(Some(Runtime::Tokio1), NoTls)?) Ok(config.create_pool(Some(Runtime::Tokio1), NoTls)?)
} }
async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args) -> Result<()> { async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, _args: &Args) -> Result<()> {
info!("Analyzing cluster cohesion..."); info!("Analyzing cluster cohesion...");
// Calculate cosine similarity between affinity vectors // Calculate cosine similarity between affinity vectors
@@ -131,8 +131,7 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args)
} }
// Calculate separation between specific cluster pairs // Calculate separation between specific cluster pairs
let query = format!( let query = "WITH similarities AS (
"WITH similarities AS (
SELECT SELECT
a.cluster_id as cluster1, a.cluster_id as cluster1,
b.cluster_id as cluster2, b.cluster_id as cluster2,
@@ -150,8 +149,8 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args)
COUNT(*) as num_pairs COUNT(*) as num_pairs
FROM similarities FROM similarities
GROUP BY cluster1, cluster2 GROUP BY cluster1, cluster2
ORDER BY cluster1, cluster2", ORDER BY cluster1, cluster2"
); .to_string();
info!("\nBetween-cluster separation:"); info!("\nBetween-cluster separation:");
let rows = client.query(&query, &[]).await?; let rows = client.query(&query, &[]).await?;
@@ -222,7 +221,7 @@ async fn analyze_embedding_stats(client: &tokio_postgres::Client, args: &Args) -
Ok(()) Ok(())
} }
async fn analyze_cluster_correlation(client: &tokio_postgres::Client, args: &Args) -> Result<()> { async fn analyze_cluster_correlation(client: &tokio_postgres::Client, _args: &Args) -> Result<()> {
info!("Analyzing correlation between cluster affinities and embedding similarities..."); info!("Analyzing correlation between cluster affinities and embedding similarities...");
// Calculate correlation between affinity similarities and embedding similarities // Calculate correlation between affinity similarities and embedding similarities

View File

@@ -50,7 +50,7 @@ async fn create_pool() -> Result<Pool> {
fn perform_pca(data: &Array2<f64>, n_components: usize) -> Result<Array2<f64>> { fn perform_pca(data: &Array2<f64>, n_components: usize) -> Result<Array2<f64>> {
// Center the data // Center the data
let means = data.mean_axis(ndarray::Axis(0)).unwrap(); let means = data.mean_axis(ndarray::Axis(0)).unwrap();
let centered = data.clone() - &means.view().insert_axis(ndarray::Axis(0)); let centered = data.clone() - means.view().insert_axis(ndarray::Axis(0));
// Perform SVD // Perform SVD
let svd = centered.svd(true, true)?; let svd = centered.svd(true, true)?;
@@ -81,26 +81,24 @@ async fn main() -> Result<()> {
let rows = client.query(&query, &[]).await?; let rows = client.query(&query, &[]).await?;
let n_items = rows.len(); let n_items = rows.len();
let n_dims = if let Some(first_row) = rows.first() { let (n_dims, affinity_dims) = if let Some(first_row) = rows.first() {
let embedding: Vec<f64> = first_row.get(1); let embedding: Vec<f64> = first_row.get(1);
embedding.len() let affinities: Vec<f64> = first_row.get(3);
(embedding.len(), affinities.len())
} else { } else {
return Ok(()); return Ok(());
}; };
// Get affinity dimension (should be number of items) info!(
let affinity_dims = if let Some(first_row) = rows.first() { "Embedding dimension: {}, Affinity dimension: {}",
let affinities: Vec<f64> = first_row.get(3); n_dims, affinity_dims
affinities.len() );
} else {
return Ok(());
};
// Convert data to ndarray format // Convert data to ndarray format
let mut data = Array2::zeros((n_items, n_dims)); let mut embedding_data = Array2::zeros((n_items, n_dims));
let mut affinity_data = Array2::zeros((n_items, affinity_dims));
let mut item_ids = Vec::with_capacity(n_items); let mut item_ids = Vec::with_capacity(n_items);
let mut cluster_ids = Vec::with_capacity(n_items); let mut cluster_ids = Vec::with_capacity(n_items);
let mut affinity_data = Array2::zeros((n_items, affinity_dims)); // Use full affinity dimension
for (i, row) in rows.iter().enumerate() { for (i, row) in rows.iter().enumerate() {
let item_id: i32 = row.get(0); let item_id: i32 = row.get(0);
@@ -110,7 +108,9 @@ async fn main() -> Result<()> {
item_ids.push(item_id); item_ids.push(item_id);
cluster_ids.push(cluster_id); cluster_ids.push(cluster_id);
data.row_mut(i).assign(&ArrayView1::from(&embedding)); embedding_data
.row_mut(i)
.assign(&ArrayView1::from(&embedding));
affinity_data affinity_data
.row_mut(i) .row_mut(i)
.assign(&ArrayView1::from(&affinities)); .assign(&ArrayView1::from(&affinities));
@@ -118,7 +118,7 @@ async fn main() -> Result<()> {
// Perform PCA on both embeddings and affinity vectors // Perform PCA on both embeddings and affinity vectors
info!("Performing PCA..."); info!("Performing PCA...");
let projected_embeddings = perform_pca(&data, 3)?; let projected_embeddings = perform_pca(&embedding_data, 3)?;
let projected_affinities = perform_pca(&affinity_data, 3)?; let projected_affinities = perform_pca(&affinity_data, 3)?;
// Create scatter plot for each cluster using embeddings // Create scatter plot for each cluster using embeddings
@@ -157,7 +157,7 @@ async fn main() -> Result<()> {
.collect(); .collect();
let trace = Scatter3D::new(x, y, z) let trace = Scatter3D::new(x, y, z)
.name(&format!("Cluster {} (Embeddings)", cluster_id)) .name(format!("Cluster {} (Embeddings)", cluster_id))
.mode(Mode::Markers) .mode(Mode::Markers)
.text_array(text) .text_array(text)
.marker( .marker(
@@ -199,7 +199,7 @@ async fn main() -> Result<()> {
.collect(); .collect();
let trace = Scatter3D::new(x, y, z) let trace = Scatter3D::new(x, y, z)
.name(&format!("Cluster {} (Affinities)", cluster_id)) .name(format!("Cluster {} (Affinities)", cluster_id))
.mode(Mode::Markers) .mode(Mode::Markers)
.text_array(text) .text_array(text)
.marker( .marker(

View File

@@ -4,7 +4,6 @@ use deadpool_postgres::{Config, Pool, Runtime};
use dotenv::dotenv; use dotenv::dotenv;
use libmf::{Loss, Matrix, Model}; use libmf::{Loss, Matrix, Model};
use log::info; use log::info;
use num_cpus;
use std::env; use std::env;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc; use std::sync::Arc;
@@ -224,7 +223,7 @@ async fn main() -> Result<()> {
// Process batch // Process batch
for (user_id, item_id) in batch { for (user_id, item_id) in batch {
matrix.push(user_id as i32, item_id as i32, 1.0f32); matrix.push(user_id, item_id, 1.0f32);
} }
} }
@@ -243,7 +242,7 @@ async fn main() -> Result<()> {
// Set up training parameters // Set up training parameters
let model = Model::params() let model = Model::params()
.factors(args.factors as i32) .factors(args.factors)
.lambda_p1(args.lambda1) .lambda_p1(args.lambda1)
.lambda_q1(args.lambda1) .lambda_q1(args.lambda1)
.lambda_p2(args.lambda2) .lambda_p2(args.lambda2)