11use nalgebra_sparse:: CsrMatrix ;
2- use num_traits:: { Float , NumCast , FromPrimitive } ;
3- use std:: fmt:: Debug ;
2+ use num_traits:: { Float , FromPrimitive , NumCast } ;
43use single_utilities:: traits:: FloatOpsTS ;
4+ use std:: fmt:: Debug ;
55
66/// Calculate log2 fold change between two groups
77pub fn calculate_log2_fold_change < T > (
88 matrix : & CsrMatrix < T > ,
9- row : usize ,
10- group1_indices : & [ usize ] ,
11- group2_indices : & [ usize ] ,
12- pseudo_count : f64 ,
9+ col : usize ,
10+ group1_indices : & [ usize ] , // Group of interest
11+ group2_indices : & [ usize ] , // Reference group
12+ pseudo_count : f64 , // Small value like 1e-9 or 1.0
1313) -> anyhow:: Result < f64 >
1414where
1515 T : FloatOpsTS ,
@@ -18,30 +18,30 @@ where
1818 return Err ( anyhow:: anyhow!( "Group indices cannot be empty" ) ) ;
1919 }
2020
21- // Calculate mean for group 1
2221 let mut sum1 = 0.0 ;
23- for & col in group1_indices {
22+ let mut count1 = 0 ;
23+ for & row in group1_indices {
2424 if let Some ( entry) = matrix. get_entry ( row, col) {
2525 let value = entry. into_value ( ) ;
26- sum1 += value. to_f64 ( ) . unwrap ( ) ;
26+ sum1 += value. to_f64 ( ) . unwrap_or ( 0.0 ) ;
27+ count1 += 1 ;
2728 }
28- // Missing entries are treated as zero
2929 }
30- let mean1 = sum1 / group1_indices. len ( ) as f64 + pseudo_count;
3130
32- // Calculate mean for group 2
3331 let mut sum2 = 0.0 ;
34- for & col in group2_indices {
32+ let mut count2 = 0 ;
33+ for & row in group2_indices {
3534 if let Some ( entry) = matrix. get_entry ( row, col) {
3635 let value = entry. into_value ( ) ;
37- sum2 += value. to_f64 ( ) . unwrap ( ) ;
36+ sum2 += value. to_f64 ( ) . unwrap_or ( 0.0 ) ;
37+ count2 += 1 ;
3838 }
39- // Missing entries are treated as zero
4039 }
40+
41+ let mean1 = sum1 / group1_indices. len ( ) as f64 + pseudo_count;
4142 let mean2 = sum2 / group2_indices. len ( ) as f64 + pseudo_count;
4243
43- // Calculate log2 fold change
44- let log2_fc = ( mean2 / mean1) . log2 ( ) ;
44+ let log2_fc = ( mean1 / mean2) . log2 ( ) ;
4545
4646 Ok ( log2_fc)
4747}
5757 T : FloatOpsTS ,
5858{
5959 if group1_indices. len ( ) < 2 || group2_indices. len ( ) < 2 {
60- return Err ( anyhow:: anyhow!( "Each group must have at least 2 samples for Cohen's d" ) ) ;
60+ return Err ( anyhow:: anyhow!(
61+ "Each group must have at least 2 samples for Cohen's d"
62+ ) ) ;
6163 }
6264
6365 // Extract values for group 1
@@ -87,13 +89,17 @@ where
8789 let mean2 = group2_values. iter ( ) . sum :: < f64 > ( ) / group2_values. len ( ) as f64 ;
8890
8991 // Calculate variances
90- let var1 = group1_values. iter ( )
92+ let var1 = group1_values
93+ . iter ( )
9194 . map ( |& x| ( x - mean1) . powi ( 2 ) )
92- . sum :: < f64 > ( ) / ( group1_values. len ( ) - 1 ) as f64 ;
95+ . sum :: < f64 > ( )
96+ / ( group1_values. len ( ) - 1 ) as f64 ;
9397
94- let var2 = group2_values. iter ( )
98+ let var2 = group2_values
99+ . iter ( )
95100 . map ( |& x| ( x - mean2) . powi ( 2 ) )
96- . sum :: < f64 > ( ) / ( group2_values. len ( ) - 1 ) as f64 ;
101+ . sum :: < f64 > ( )
102+ / ( group2_values. len ( ) - 1 ) as f64 ;
97103
98104 // Calculate pooled standard deviation
99105 let n1 = group1_values. len ( ) as f64 ;
@@ -148,35 +154,33 @@ mod tests {
148154 // Row 3: Extreme difference
149155 // Row 4: All zeros in group 1
150156 let rows = vec ! [
151- 0 , 0 , 0 , 0 , 0 , 0 , // Row 0: all positions filled
152- 1 , 1 , 1 , 1 , 1 , 1 , // Row 1: all positions filled
153- 2 , 2 , 2 , 2 , 2 , 2 , // Row 2: all positions filled
154- 3 , 3 , 3 , 3 , 3 , 3 , // Row 3: all positions filled
155- 4 , 4 , 4 // Row 4: sparse (no entries for group 1)
157+ 0 , 0 , 0 , 0 , 0 , 0 , // Row 0: all positions filled
158+ 1 , 1 , 1 , 1 , 1 , 1 , // Row 1: all positions filled
159+ 2 , 2 , 2 , 2 , 2 , 2 , // Row 2: all positions filled
160+ 3 , 3 , 3 , 3 , 3 , 3 , // Row 3: all positions filled
161+ 4 , 4 , 4 , // Row 4: sparse (no entries for group 1)
156162 ] ;
157163 let cols = vec ! [
158- 0 , 1 , 2 , 3 , 4 , 5 , // Row 0
159- 0 , 1 , 2 , 3 , 4 , 5 , // Row 1
160- 0 , 1 , 2 , 3 , 4 , 5 , // Row 2
161- 0 , 1 , 2 , 3 , 4 , 5 , // Row 3
162- 3 , 4 , 5 // Row 4 (only group 2 values)
164+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 0
165+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 1
166+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 2
167+ 0 , 1 , 2 , 3 , 4 , 5 , // Row 3
168+ 3 , 4 , 5 , // Row 4 (only group 2 values)
163169 ] ;
164170 let vals = vec ! [
165- 2.0 , 2.2 , 1.8 , 8.0 , 7.5 , 8.5 , // Row 0: ~2 vs ~8 = big difference
166- 5.0 , 5.1 , 4.9 , 5.0 , 5.1 , 4.9 , // Row 1: ~5 vs ~5 = no difference
167- 3.0 , 3.3 , 2.7 , 5.0 , 4.7 , 5.3 , // Row 2: ~3 vs ~5 = moderate
171+ 2.0 , 2.2 , 1.8 , 8.0 , 7.5 , 8.5 , // Row 0: ~2 vs ~8 = big difference
172+ 5.0 , 5.1 , 4.9 , 5.0 , 5.1 , 4.9 , // Row 1: ~5 vs ~5 = no difference
173+ 3.0 , 3.3 , 2.7 , 5.0 , 4.7 , 5.3 , // Row 2: ~3 vs ~5 = moderate
168174 0.1 , 0.2 , 0.1 , 20.0 , 19.0 , 21.0 , // Row 3: ~0.1 vs ~20 = extreme
169- 10.0 , 8.0 , 12.0 // Row 4: 0 vs ~10 = missing data test
175+ 10.0 , 8.0 , 12.0 , // Row 4: 0 vs ~10 = missing data test
170176 ] ;
171177
172178 let coo = CooMatrix :: try_from_triplets (
173- 5 , // 5 rows
174- 6 , // 6 columns
175- rows,
176- cols,
177- vals,
179+ 5 , // 5 rows
180+ 6 , // 6 columns
181+ rows, cols, vals,
178182 )
179- . unwrap ( ) ;
183+ . unwrap ( ) ;
180184
181185 CsrMatrix :: from ( & coo)
182186 }
@@ -186,7 +190,7 @@ mod tests {
186190 let matrix = create_test_matrix ( ) ;
187191 let group1 = vec ! [ 0 , 1 , 2 ] ; // First group indices
188192 let group2 = vec ! [ 3 , 4 , 5 ] ; // Second group indices
189- let pseudo_count = 0.01 ; // Small pseudo count
193+ let pseudo_count = 0.01 ; // Small pseudo count
190194
191195 // Row 0: Clear difference (~2 vs ~8)
192196 let fc0 = calculate_log2_fold_change ( & matrix, 0 , & group1, & group2, pseudo_count) . unwrap ( ) ;
@@ -281,8 +285,10 @@ mod tests {
281285 // This will fail if sample sizes are out of range, but the test is to show
282286 // that larger sample sizes bring Hedge's g closer to Cohen's d
283287 if matrix. ncols ( ) >= 16 {
284- let d_large = calculate_cohens_d ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( d0) ;
285- let g_large = calculate_hedges_g ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( g0) ;
288+ let d_large =
289+ calculate_cohens_d ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( d0) ;
290+ let g_large =
291+ calculate_hedges_g ( & matrix, 0 , & large_group1, & large_group2) . unwrap_or ( g0) ;
286292
287293 // With more samples, g should be closer to d
288294 assert ! ( g_large / d_large > g0 / d0) ;
@@ -295,8 +301,8 @@ mod tests {
295301 let rows = vec ! [ 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 1 , 1 ] ;
296302 let cols = vec ! [ 0 , 1 , 2 , 3 , 4 , 5 , 0 , 1 , 2 , 3 , 4 , 5 ] ;
297303 let vals = vec ! [
298- 5.0 , 5.0 , 5.0 , 10.0 , 10.0 , 10.0 , // Row 0: all values identical within groups
299- 1.0 , 2.0 , 3.0 , 5.0 , 5.0 , 5.0 // Row 1: variance in group 1, none in group 2
304+ 5.0 , 5.0 , 5.0 , 10.0 , 10.0 , 10.0 , // Row 0: all values identical within groups
305+ 1.0 , 2.0 , 3.0 , 5.0 , 5.0 , 5.0 , // Row 1: variance in group 1, none in group 2
300306 ] ;
301307
302308 let coo = CooMatrix :: try_from_triplets ( 2 , 6 , rows, cols, vals) . unwrap ( ) ;
@@ -354,4 +360,4 @@ mod tests {
354360 assert_abs_diff_eq ! ( value. abs( ) , 15.76 , epsilon = 0.1 ) ;
355361 }
356362 }
357- }
363+ }
0 commit comments