cargo clippy
This commit is contained in:
@@ -181,8 +181,8 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
// For each cluster, select a set of archetypal items that define the cluster
|
// For each cluster, select a set of archetypal items that define the cluster
|
||||||
for cluster_id in 0..args.item_clusters {
|
for cluster_id in 0..args.item_clusters {
|
||||||
let start_idx = (cluster_id * args.num_items as i32 / args.item_clusters) as usize;
|
let start_idx = (cluster_id * args.num_items / args.item_clusters) as usize;
|
||||||
let end_idx = ((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize;
|
let end_idx = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize;
|
||||||
|
|
||||||
// Set high affinity for items in this cluster's range
|
// Set high affinity for items in this cluster's range
|
||||||
for i in start_idx..end_idx {
|
for i in start_idx..end_idx {
|
||||||
@@ -192,17 +192,15 @@ async fn main() -> Result<()> {
|
|||||||
// Add some lower affinity to neighboring clusters' items
|
// Add some lower affinity to neighboring clusters' items
|
||||||
let noise = args.noise_level;
|
let noise = args.noise_level;
|
||||||
if cluster_id > 0 {
|
if cluster_id > 0 {
|
||||||
let prev_start =
|
let prev_start = ((cluster_id - 1) * args.num_items / args.item_clusters) as usize;
|
||||||
((cluster_id - 1) * args.num_items as i32 / args.item_clusters) as usize;
|
let prev_end = (cluster_id * args.num_items / args.item_clusters) as usize;
|
||||||
let prev_end = (cluster_id * args.num_items as i32 / args.item_clusters) as usize;
|
|
||||||
for i in prev_start..prev_end {
|
for i in prev_start..prev_end {
|
||||||
cluster_affinities[cluster_id as usize][i] = noise;
|
cluster_affinities[cluster_id as usize][i] = noise;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if cluster_id < args.item_clusters - 1 {
|
if cluster_id < args.item_clusters - 1 {
|
||||||
let next_start =
|
let next_start = ((cluster_id + 1) * args.num_items / args.item_clusters) as usize;
|
||||||
((cluster_id + 1) * args.num_items as i32 / args.item_clusters) as usize;
|
let next_end = ((cluster_id + 2) * args.num_items / args.item_clusters) as usize;
|
||||||
let next_end = ((cluster_id + 2) * args.num_items as i32 / args.item_clusters) as usize;
|
|
||||||
for i in next_start..next_end {
|
for i in next_start..next_end {
|
||||||
cluster_affinities[cluster_id as usize][i] = noise;
|
cluster_affinities[cluster_id as usize][i] = noise;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ async fn create_pool() -> Result<Pool> {
|
|||||||
Ok(config.create_pool(Some(Runtime::Tokio1), NoTls)?)
|
Ok(config.create_pool(Some(Runtime::Tokio1), NoTls)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args) -> Result<()> {
|
async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, _args: &Args) -> Result<()> {
|
||||||
info!("Analyzing cluster cohesion...");
|
info!("Analyzing cluster cohesion...");
|
||||||
|
|
||||||
// Calculate cosine similarity between affinity vectors
|
// Calculate cosine similarity between affinity vectors
|
||||||
@@ -131,8 +131,7 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Calculate separation between specific cluster pairs
|
// Calculate separation between specific cluster pairs
|
||||||
let query = format!(
|
let query = "WITH similarities AS (
|
||||||
"WITH similarities AS (
|
|
||||||
SELECT
|
SELECT
|
||||||
a.cluster_id as cluster1,
|
a.cluster_id as cluster1,
|
||||||
b.cluster_id as cluster2,
|
b.cluster_id as cluster2,
|
||||||
@@ -150,8 +149,8 @@ async fn analyze_cluster_cohesion(client: &tokio_postgres::Client, args: &Args)
|
|||||||
COUNT(*) as num_pairs
|
COUNT(*) as num_pairs
|
||||||
FROM similarities
|
FROM similarities
|
||||||
GROUP BY cluster1, cluster2
|
GROUP BY cluster1, cluster2
|
||||||
ORDER BY cluster1, cluster2",
|
ORDER BY cluster1, cluster2"
|
||||||
);
|
.to_string();
|
||||||
|
|
||||||
info!("\nBetween-cluster separation:");
|
info!("\nBetween-cluster separation:");
|
||||||
let rows = client.query(&query, &[]).await?;
|
let rows = client.query(&query, &[]).await?;
|
||||||
@@ -222,7 +221,7 @@ async fn analyze_embedding_stats(client: &tokio_postgres::Client, args: &Args) -
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn analyze_cluster_correlation(client: &tokio_postgres::Client, args: &Args) -> Result<()> {
|
async fn analyze_cluster_correlation(client: &tokio_postgres::Client, _args: &Args) -> Result<()> {
|
||||||
info!("Analyzing correlation between cluster affinities and embedding similarities...");
|
info!("Analyzing correlation between cluster affinities and embedding similarities...");
|
||||||
|
|
||||||
// Calculate correlation between affinity similarities and embedding similarities
|
// Calculate correlation between affinity similarities and embedding similarities
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ async fn create_pool() -> Result<Pool> {
|
|||||||
fn perform_pca(data: &Array2<f64>, n_components: usize) -> Result<Array2<f64>> {
|
fn perform_pca(data: &Array2<f64>, n_components: usize) -> Result<Array2<f64>> {
|
||||||
// Center the data
|
// Center the data
|
||||||
let means = data.mean_axis(ndarray::Axis(0)).unwrap();
|
let means = data.mean_axis(ndarray::Axis(0)).unwrap();
|
||||||
let centered = data.clone() - &means.view().insert_axis(ndarray::Axis(0));
|
let centered = data.clone() - means.view().insert_axis(ndarray::Axis(0));
|
||||||
|
|
||||||
// Perform SVD
|
// Perform SVD
|
||||||
let svd = centered.svd(true, true)?;
|
let svd = centered.svd(true, true)?;
|
||||||
@@ -81,26 +81,24 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
let rows = client.query(&query, &[]).await?;
|
let rows = client.query(&query, &[]).await?;
|
||||||
let n_items = rows.len();
|
let n_items = rows.len();
|
||||||
let n_dims = if let Some(first_row) = rows.first() {
|
let (n_dims, affinity_dims) = if let Some(first_row) = rows.first() {
|
||||||
let embedding: Vec<f64> = first_row.get(1);
|
let embedding: Vec<f64> = first_row.get(1);
|
||||||
embedding.len()
|
let affinities: Vec<f64> = first_row.get(3);
|
||||||
|
(embedding.len(), affinities.len())
|
||||||
} else {
|
} else {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
};
|
};
|
||||||
|
|
||||||
// Get affinity dimension (should be number of items)
|
info!(
|
||||||
let affinity_dims = if let Some(first_row) = rows.first() {
|
"Embedding dimension: {}, Affinity dimension: {}",
|
||||||
let affinities: Vec<f64> = first_row.get(3);
|
n_dims, affinity_dims
|
||||||
affinities.len()
|
);
|
||||||
} else {
|
|
||||||
return Ok(());
|
|
||||||
};
|
|
||||||
|
|
||||||
// Convert data to ndarray format
|
// Convert data to ndarray format
|
||||||
let mut data = Array2::zeros((n_items, n_dims));
|
let mut embedding_data = Array2::zeros((n_items, n_dims));
|
||||||
|
let mut affinity_data = Array2::zeros((n_items, affinity_dims));
|
||||||
let mut item_ids = Vec::with_capacity(n_items);
|
let mut item_ids = Vec::with_capacity(n_items);
|
||||||
let mut cluster_ids = Vec::with_capacity(n_items);
|
let mut cluster_ids = Vec::with_capacity(n_items);
|
||||||
let mut affinity_data = Array2::zeros((n_items, affinity_dims)); // Use full affinity dimension
|
|
||||||
|
|
||||||
for (i, row) in rows.iter().enumerate() {
|
for (i, row) in rows.iter().enumerate() {
|
||||||
let item_id: i32 = row.get(0);
|
let item_id: i32 = row.get(0);
|
||||||
@@ -110,7 +108,9 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
item_ids.push(item_id);
|
item_ids.push(item_id);
|
||||||
cluster_ids.push(cluster_id);
|
cluster_ids.push(cluster_id);
|
||||||
data.row_mut(i).assign(&ArrayView1::from(&embedding));
|
embedding_data
|
||||||
|
.row_mut(i)
|
||||||
|
.assign(&ArrayView1::from(&embedding));
|
||||||
affinity_data
|
affinity_data
|
||||||
.row_mut(i)
|
.row_mut(i)
|
||||||
.assign(&ArrayView1::from(&affinities));
|
.assign(&ArrayView1::from(&affinities));
|
||||||
@@ -118,7 +118,7 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
// Perform PCA on both embeddings and affinity vectors
|
// Perform PCA on both embeddings and affinity vectors
|
||||||
info!("Performing PCA...");
|
info!("Performing PCA...");
|
||||||
let projected_embeddings = perform_pca(&data, 3)?;
|
let projected_embeddings = perform_pca(&embedding_data, 3)?;
|
||||||
let projected_affinities = perform_pca(&affinity_data, 3)?;
|
let projected_affinities = perform_pca(&affinity_data, 3)?;
|
||||||
|
|
||||||
// Create scatter plot for each cluster using embeddings
|
// Create scatter plot for each cluster using embeddings
|
||||||
@@ -157,7 +157,7 @@ async fn main() -> Result<()> {
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let trace = Scatter3D::new(x, y, z)
|
let trace = Scatter3D::new(x, y, z)
|
||||||
.name(&format!("Cluster {} (Embeddings)", cluster_id))
|
.name(format!("Cluster {} (Embeddings)", cluster_id))
|
||||||
.mode(Mode::Markers)
|
.mode(Mode::Markers)
|
||||||
.text_array(text)
|
.text_array(text)
|
||||||
.marker(
|
.marker(
|
||||||
@@ -199,7 +199,7 @@ async fn main() -> Result<()> {
|
|||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let trace = Scatter3D::new(x, y, z)
|
let trace = Scatter3D::new(x, y, z)
|
||||||
.name(&format!("Cluster {} (Affinities)", cluster_id))
|
.name(format!("Cluster {} (Affinities)", cluster_id))
|
||||||
.mode(Mode::Markers)
|
.mode(Mode::Markers)
|
||||||
.text_array(text)
|
.text_array(text)
|
||||||
.marker(
|
.marker(
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ use deadpool_postgres::{Config, Pool, Runtime};
|
|||||||
use dotenv::dotenv;
|
use dotenv::dotenv;
|
||||||
use libmf::{Loss, Matrix, Model};
|
use libmf::{Loss, Matrix, Model};
|
||||||
use log::info;
|
use log::info;
|
||||||
use num_cpus;
|
|
||||||
use std::env;
|
use std::env;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
@@ -224,7 +223,7 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
// Process batch
|
// Process batch
|
||||||
for (user_id, item_id) in batch {
|
for (user_id, item_id) in batch {
|
||||||
matrix.push(user_id as i32, item_id as i32, 1.0f32);
|
matrix.push(user_id, item_id, 1.0f32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -243,7 +242,7 @@ async fn main() -> Result<()> {
|
|||||||
|
|
||||||
// Set up training parameters
|
// Set up training parameters
|
||||||
let model = Model::params()
|
let model = Model::params()
|
||||||
.factors(args.factors as i32)
|
.factors(args.factors)
|
||||||
.lambda_p1(args.lambda1)
|
.lambda_p1(args.lambda1)
|
||||||
.lambda_q1(args.lambda1)
|
.lambda_q1(args.lambda1)
|
||||||
.lambda_p2(args.lambda2)
|
.lambda_p2(args.lambda2)
|
||||||
|
|||||||
Reference in New Issue
Block a user