From 2738b8469b56ccf7b1934d1c362cea2d1715e908 Mon Sep 17 00:00:00 2001 From: Dylan Knutson Date: Sat, 28 Dec 2024 03:46:30 +0000 Subject: [PATCH] cargo clippy --- src/bin/generate_test_data.rs | 14 ++++++-------- src/bin/validate_embeddings.rs | 11 +++++------ src/bin/visualize_embeddings.rs | 32 ++++++++++++++++---------------- src/main.rs | 5 ++--- 4 files changed, 29 insertions(+), 33 deletions(-) diff --git a/src/bin/generate_test_data.rs b/src/bin/generate_test_data.rs index f532432..49e033c 100644 --- a/src/bin/generate_test_data.rs +++ b/src/bin/generate_test_data.rs @@ -181,8 +181,8 @@ async fn main() -> Result<()> { // For each cluster, select a set of archetypal items that define the cluster for cluster_id in 0..args.item_clusters { - let start_idx = (cluster_id * args.num_items as i32 / args.item_clusters) as usize; - let end_idx = ((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize; + let start_idx = (cluster_id * args.num_items / args.item_clusters) as usize; + let end_idx = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize; // Set high affinity for items in this cluster's range for i in start_idx..end_idx { @@ -192,17 +192,15 @@ async fn main() -> Result<()> { // Add some lower affinity to neighboring clusters' items let noise = args.noise_level; if cluster_id > 0 { - let prev_start = - ((cluster_id - 1) * args.num_items as i32 / args.item_clusters) as usize; - let prev_end = (cluster_id * args.num_items as i32 / args.item_clusters) as usize; + let prev_start = ((cluster_id - 1) * args.num_items / args.item_clusters) as usize; + let prev_end = (cluster_id * args.num_items / args.item_clusters) as usize; for i in prev_start..prev_end { cluster_affinities[cluster_id as usize][i] = noise; } } if cluster_id < args.item_clusters - 1 { - let next_start = - ((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize; - let next_end = ((cluster_id + 2) * args.num_items as i32 / args.item_clusters) as usize; + let next_start = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize; + let next_end = ((cluster_id + 2) * args.num_items / args.item_clusters) as usize; for i in next_start..next_end { cluster_affinities[cluster_id as usize][i] = noise; } diff --git a/src/bin/validate_embeddings.rs b/src/bin/validate_embeddings.rs index 92de794..1434de4 100644 --- a/src/bin/validate_embeddings.rs +++ b/src/bin/validate_embeddings.rs @@ -38,7 +38,7 @@ async fn create_pool() -> Result { Ok(config.create_pool(Some(Runtime::Tokio1), NoTls)?) } -async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args) -> Result<()> { +async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, _args: &Args) -> Result<()> { info!("Analyzing cluster cohesion..."); // Calculate cosine similarity between affinity vectors @@ -131,8 +131,7 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args) } // Calculate separation between specific cluster pairs - let query = format!( - "WITH similarities AS ( + let query = "WITH similarities AS ( SELECT a.cluster_id as cluster1, b.cluster_id as cluster2, @@ -150,8 +149,8 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args) COUNT(*) as num_pairs FROM similarities GROUP BY cluster1, cluster2 - ORDER BY cluster1, cluster2", - ); + ORDER BY cluster1, cluster2" + .to_string(); info!("\nBetween-cluster separation:"); let rows = client.query(&query, &[]).await?; @@ -222,7 +221,7 @@ async fn analyze_embedding_stats(client: &tokio_postgres::Client, args: &Args) - Ok(()) } -async fn analyze_cluster_correlation(client: &tokio_postgres::Client, args: &Args) -> Result<()> { +async fn analyze_cluster_correlation(client: &tokio_postgres::Client, _args: &Args) -> Result<()> { info!("Analyzing correlation between cluster affinities and embedding similarities..."); // Calculate correlation between affinity similarities and embedding similarities diff --git a/src/bin/visualize_embeddings.rs b/src/bin/visualize_embeddings.rs index 14e39a4..0726c2f 100644 --- a/src/bin/visualize_embeddings.rs +++ b/src/bin/visualize_embeddings.rs @@ -50,7 +50,7 @@ async fn create_pool() -> Result { fn perform_pca(data: &Array2, n_components: usize) -> Result> { // Center the data let means = data.mean_axis(ndarray::Axis(0)).unwrap(); - let centered = data.clone() - &means.view().insert_axis(ndarray::Axis(0)); + let centered = data.clone() - means.view().insert_axis(ndarray::Axis(0)); // Perform SVD let svd = centered.svd(true, true)?; @@ -81,26 +81,24 @@ async fn main() -> Result<()> { let rows = client.query(&query, &[]).await?; let n_items = rows.len(); - let n_dims = if let Some(first_row) = rows.first() { + let (n_dims, affinity_dims) = if let Some(first_row) = rows.first() { let embedding: Vec = first_row.get(1); - embedding.len() + let affinities: Vec = first_row.get(3); + (embedding.len(), affinities.len()) } else { return Ok(()); }; - // Get affinity dimension (should be number of items) - let affinity_dims = if let Some(first_row) = rows.first() { - let affinities: Vec = first_row.get(3); - affinities.len() - } else { - return Ok(()); - }; + info!( + "Embedding dimension: {}, Affinity dimension: {}", + n_dims, affinity_dims + ); // Convert data to ndarray format - let mut data = Array2::zeros((n_items, n_dims)); + let mut embedding_data = Array2::zeros((n_items, n_dims)); + let mut affinity_data = Array2::zeros((n_items, affinity_dims)); let mut item_ids = Vec::with_capacity(n_items); let mut cluster_ids = Vec::with_capacity(n_items); - let mut affinity_data = Array2::zeros((n_items, affinity_dims)); // Use full affinity dimension for (i, row) in rows.iter().enumerate() { let item_id: i32 = row.get(0); @@ -110,7 +108,9 @@ async fn main() -> Result<()> { item_ids.push(item_id); cluster_ids.push(cluster_id); - data.row_mut(i).assign(&ArrayView1::from(&embedding)); + embedding_data + .row_mut(i) + .assign(&ArrayView1::from(&embedding)); affinity_data .row_mut(i) .assign(&ArrayView1::from(&affinities)); @@ -118,7 +118,7 @@ async fn main() -> Result<()> { // Perform PCA on both embeddings and affinity vectors info!("Performing PCA..."); - let projected_embeddings = perform_pca(&data, 3)?; + let projected_embeddings = perform_pca(&embedding_data, 3)?; let projected_affinities = perform_pca(&affinity_data, 3)?; // Create scatter plot for each cluster using embeddings @@ -157,7 +157,7 @@ async fn main() -> Result<()> { .collect(); let trace = Scatter3D::new(x, y, z) - .name(&format!("Cluster {} (Embeddings)", cluster_id)) + .name(format!("Cluster {} (Embeddings)", cluster_id)) .mode(Mode::Markers) .text_array(text) .marker( @@ -199,7 +199,7 @@ async fn main() -> Result<()> { .collect(); let trace = Scatter3D::new(x, y, z) - .name(&format!("Cluster {} (Affinities)", cluster_id)) + .name(format!("Cluster {} (Affinities)", cluster_id)) .mode(Mode::Markers) .text_array(text) .marker( diff --git a/src/main.rs b/src/main.rs index 39f169e..19c7e76 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,7 +4,6 @@ use deadpool_postgres::{Config, Pool, Runtime}; use dotenv::dotenv; use libmf::{Loss, Matrix, Model}; use log::info; -use num_cpus; use std::env; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -224,7 +223,7 @@ async fn main() -> Result<()> { // Process batch for (user_id, item_id) in batch { - matrix.push(user_id as i32, item_id as i32, 1.0f32); + matrix.push(user_id, item_id, 1.0f32); } } @@ -243,7 +242,7 @@ async fn main() -> Result<()> { // Set up training parameters let model = Model::params() - .factors(args.factors as i32) + .factors(args.factors) .lambda_p1(args.lambda1) .lambda_q1(args.lambda1) .lambda_p2(args.lambda2)