Skip to content

Commit 7a8ea6b

Browse files
author
Ian
committed
Version bump
- Changed orientation for testing operations - Implemented speedup for testing
1 parent e86353b commit 7a8ea6b

File tree

5 files changed

+72
-61
lines changed

5 files changed

+72
-61
lines changed

Cargo.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "single-statistics"
3-
version = "0.2.0"
3+
version = "0.3.0"
44
edition = "2024"
55
license-file = "LICENSE.md"
66
readme = "README.md"
@@ -15,8 +15,8 @@ anyhow = "1.0.98"
1515
nalgebra-sparse = "0.10.0"
1616
num-traits = "0.2.19"
1717
rayon = "1.10.0"
18-
single-utilities = "0.3.0"
18+
single-utilities = "0.6.0"
1919
statrs = "0.18.0"
2020

2121
[dev-dependencies]
22-
approx = "0.5.1"
22+
approx = "0.5.1"

src/testing/effect/mod.rs

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
use nalgebra_sparse::CsrMatrix;
2-
use num_traits::{Float, NumCast, FromPrimitive};
3-
use std::fmt::Debug;
2+
use num_traits::{Float, FromPrimitive, NumCast};
43
use single_utilities::traits::FloatOpsTS;
4+
use std::fmt::Debug;
55

66
/// Calculate log2 fold change between two groups
77
pub fn calculate_log2_fold_change<T>(
88
matrix: &CsrMatrix<T>,
9-
row: usize,
10-
group1_indices: &[usize],
11-
group2_indices: &[usize],
12-
pseudo_count: f64,
9+
col: usize,
10+
group1_indices: &[usize], // Group of interest
11+
group2_indices: &[usize], // Reference group
12+
pseudo_count: f64, // Small value like 1e-9 or 1.0
1313
) -> anyhow::Result<f64>
1414
where
1515
T: FloatOpsTS,
@@ -18,30 +18,30 @@ where
1818
return Err(anyhow::anyhow!("Group indices cannot be empty"));
1919
}
2020

21-
// Calculate mean for group 1
2221
let mut sum1 = 0.0;
23-
for &col in group1_indices {
22+
let mut count1 = 0;
23+
for &row in group1_indices {
2424
if let Some(entry) = matrix.get_entry(row, col) {
2525
let value = entry.into_value();
26-
sum1 += value.to_f64().unwrap();
26+
sum1 += value.to_f64().unwrap_or(0.0);
27+
count1 += 1;
2728
}
28-
// Missing entries are treated as zero
2929
}
30-
let mean1 = sum1 / group1_indices.len() as f64 + pseudo_count;
3130

32-
// Calculate mean for group 2
3331
let mut sum2 = 0.0;
34-
for &col in group2_indices {
32+
let mut count2 = 0;
33+
for &row in group2_indices {
3534
if let Some(entry) = matrix.get_entry(row, col) {
3635
let value = entry.into_value();
37-
sum2 += value.to_f64().unwrap();
36+
sum2 += value.to_f64().unwrap_or(0.0);
37+
count2 += 1;
3838
}
39-
// Missing entries are treated as zero
4039
}
40+
41+
let mean1 = sum1 / group1_indices.len() as f64 + pseudo_count;
4142
let mean2 = sum2 / group2_indices.len() as f64 + pseudo_count;
4243

43-
// Calculate log2 fold change
44-
let log2_fc = (mean2 / mean1).log2();
44+
let log2_fc = (mean1 / mean2).log2();
4545

4646
Ok(log2_fc)
4747
}
@@ -57,7 +57,9 @@ where
5757
T: FloatOpsTS,
5858
{
5959
if group1_indices.len() < 2 || group2_indices.len() < 2 {
60-
return Err(anyhow::anyhow!("Each group must have at least 2 samples for Cohen's d"));
60+
return Err(anyhow::anyhow!(
61+
"Each group must have at least 2 samples for Cohen's d"
62+
));
6163
}
6264

6365
// Extract values for group 1
@@ -87,13 +89,17 @@ where
8789
let mean2 = group2_values.iter().sum::<f64>() / group2_values.len() as f64;
8890

8991
// Calculate variances
90-
let var1 = group1_values.iter()
92+
let var1 = group1_values
93+
.iter()
9194
.map(|&x| (x - mean1).powi(2))
92-
.sum::<f64>() / (group1_values.len() - 1) as f64;
95+
.sum::<f64>()
96+
/ (group1_values.len() - 1) as f64;
9397

94-
let var2 = group2_values.iter()
98+
let var2 = group2_values
99+
.iter()
95100
.map(|&x| (x - mean2).powi(2))
96-
.sum::<f64>() / (group2_values.len() - 1) as f64;
101+
.sum::<f64>()
102+
/ (group2_values.len() - 1) as f64;
97103

98104
// Calculate pooled standard deviation
99105
let n1 = group1_values.len() as f64;
@@ -148,35 +154,33 @@ mod tests {
148154
// Row 3: Extreme difference
149155
// Row 4: All zeros in group 1
150156
let rows = vec![
151-
0, 0, 0, 0, 0, 0, // Row 0: all positions filled
152-
1, 1, 1, 1, 1, 1, // Row 1: all positions filled
153-
2, 2, 2, 2, 2, 2, // Row 2: all positions filled
154-
3, 3, 3, 3, 3, 3, // Row 3: all positions filled
155-
4, 4, 4 // Row 4: sparse (no entries for group 1)
157+
0, 0, 0, 0, 0, 0, // Row 0: all positions filled
158+
1, 1, 1, 1, 1, 1, // Row 1: all positions filled
159+
2, 2, 2, 2, 2, 2, // Row 2: all positions filled
160+
3, 3, 3, 3, 3, 3, // Row 3: all positions filled
161+
4, 4, 4, // Row 4: sparse (no entries for group 1)
156162
];
157163
let cols = vec![
158-
0, 1, 2, 3, 4, 5, // Row 0
159-
0, 1, 2, 3, 4, 5, // Row 1
160-
0, 1, 2, 3, 4, 5, // Row 2
161-
0, 1, 2, 3, 4, 5, // Row 3
162-
3, 4, 5 // Row 4 (only group 2 values)
164+
0, 1, 2, 3, 4, 5, // Row 0
165+
0, 1, 2, 3, 4, 5, // Row 1
166+
0, 1, 2, 3, 4, 5, // Row 2
167+
0, 1, 2, 3, 4, 5, // Row 3
168+
3, 4, 5, // Row 4 (only group 2 values)
163169
];
164170
let vals = vec![
165-
2.0, 2.2, 1.8, 8.0, 7.5, 8.5, // Row 0: ~2 vs ~8 = big difference
166-
5.0, 5.1, 4.9, 5.0, 5.1, 4.9, // Row 1: ~5 vs ~5 = no difference
167-
3.0, 3.3, 2.7, 5.0, 4.7, 5.3, // Row 2: ~3 vs ~5 = moderate
171+
2.0, 2.2, 1.8, 8.0, 7.5, 8.5, // Row 0: ~2 vs ~8 = big difference
172+
5.0, 5.1, 4.9, 5.0, 5.1, 4.9, // Row 1: ~5 vs ~5 = no difference
173+
3.0, 3.3, 2.7, 5.0, 4.7, 5.3, // Row 2: ~3 vs ~5 = moderate
168174
0.1, 0.2, 0.1, 20.0, 19.0, 21.0, // Row 3: ~0.1 vs ~20 = extreme
169-
10.0, 8.0, 12.0 // Row 4: 0 vs ~10 = missing data test
175+
10.0, 8.0, 12.0, // Row 4: 0 vs ~10 = missing data test
170176
];
171177

172178
let coo = CooMatrix::try_from_triplets(
173-
5, // 5 rows
174-
6, // 6 columns
175-
rows,
176-
cols,
177-
vals,
179+
5, // 5 rows
180+
6, // 6 columns
181+
rows, cols, vals,
178182
)
179-
.unwrap();
183+
.unwrap();
180184

181185
CsrMatrix::from(&coo)
182186
}
@@ -186,7 +190,7 @@ mod tests {
186190
let matrix = create_test_matrix();
187191
let group1 = vec![0, 1, 2]; // First group indices
188192
let group2 = vec![3, 4, 5]; // Second group indices
189-
let pseudo_count = 0.01; // Small pseudo count
193+
let pseudo_count = 0.01; // Small pseudo count
190194

191195
// Row 0: Clear difference (~2 vs ~8)
192196
let fc0 = calculate_log2_fold_change(&matrix, 0, &group1, &group2, pseudo_count).unwrap();
@@ -281,8 +285,10 @@ mod tests {
281285
// This will fail if sample sizes are out of range, but the test is to show
282286
// that larger sample sizes bring Hedge's g closer to Cohen's d
283287
if matrix.ncols() >= 16 {
284-
let d_large = calculate_cohens_d(&matrix, 0, &large_group1, &large_group2).unwrap_or(d0);
285-
let g_large = calculate_hedges_g(&matrix, 0, &large_group1, &large_group2).unwrap_or(g0);
288+
let d_large =
289+
calculate_cohens_d(&matrix, 0, &large_group1, &large_group2).unwrap_or(d0);
290+
let g_large =
291+
calculate_hedges_g(&matrix, 0, &large_group1, &large_group2).unwrap_or(g0);
286292

287293
// With more samples, g should be closer to d
288294
assert!(g_large / d_large > g0 / d0);
@@ -295,8 +301,8 @@ mod tests {
295301
let rows = vec![0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1];
296302
let cols = vec![0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5];
297303
let vals = vec![
298-
5.0, 5.0, 5.0, 10.0, 10.0, 10.0, // Row 0: all values identical within groups
299-
1.0, 2.0, 3.0, 5.0, 5.0, 5.0 // Row 1: variance in group 1, none in group 2
304+
5.0, 5.0, 5.0, 10.0, 10.0, 10.0, // Row 0: all values identical within groups
305+
1.0, 2.0, 3.0, 5.0, 5.0, 5.0, // Row 1: variance in group 1, none in group 2
300306
];
301307

302308
let coo = CooMatrix::try_from_triplets(2, 6, rows, cols, vals).unwrap();
@@ -354,4 +360,4 @@ mod tests {
354360
assert_abs_diff_eq!(value.abs(), 15.76, epsilon = 0.1);
355361
}
356362
}
357-
}
363+
}

src/testing/inference/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ where
2121
alternative: Alternative,
2222
) -> anyhow::Result<Vec<TestResult>>;
2323

24+
2425
fn mann_whitney_test(
2526
&self,
2627
group1_indices: &[usize],

src/testing/inference/parametric.rs

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
use rayon::iter::ParallelIterator;
1+
use crate::testing::{Alternative, TTestType, TestResult};
22
use nalgebra_sparse::CsrMatrix;
3+
use rayon::iter::ParallelIterator;
34
use rayon::prelude::IntoParallelIterator;
45
use single_utilities::traits::FloatOpsTS;
56
use statrs::distribution::{ContinuousCDF, StudentsT};
6-
use crate::testing::{Alternative, TTestType, TestResult};
77

88
pub fn t_test_matrix_groups<T>(
99
matrix: &CsrMatrix<T>,
@@ -20,28 +20,32 @@ where
2020
return Err(anyhow::anyhow!("Group indices cannot be empty"));
2121
}
2222

23-
let nrows = matrix.nrows();
23+
let ncols = matrix.ncols();
2424

25-
let results: Vec<_> = (0..nrows)
25+
let results: Vec<_> = (0..ncols)
2626
.into_par_iter()
27-
.map(|row| {
27+
.map(|col| {
28+
// Iterate through genes (columns)
2829
let mut group1_values: Vec<f64> = Vec::with_capacity(group1_indices.len());
2930
let mut group2_values: Vec<f64> = Vec::with_capacity(group2_indices.len());
3031

31-
for &col in group1_indices {
32+
// Get expression of this gene in group 1 cells
33+
for &row in group1_indices {
3234
if let Some(entry) = matrix.get_entry(row, col) {
3335
let value = entry.into_value();
3436
group1_values.push(value.into());
3537
}
3638
}
3739

38-
for &col in group2_indices {
40+
// Get expression of this gene in group 2 cells
41+
for &row in group2_indices {
3942
if let Some(entry) = matrix.get_entry(row, col) {
4043
let value = entry.into_value();
4144
group2_values.push(value.into());
4245
}
4346
}
4447

48+
// Run t-test for this gene
4549
t_test(&group1_values, &group2_values, test_type, alternative)
4650
})
4751
.collect();

0 commit comments

Comments
 (0)