Mds.pl
Jump to navigation
Jump to search
#!/usr/bin/perl use strict; use warnings; sub CalculateMDS(\@); sub PrintOutStatistics(\@); sub main(); #run program &main; ############################################################ sub main { open IN, "/Users/chris/projects/John_gene_MDS/average_class_probabilities.tsv" or die; my $linecount = 0; my @mds_result_array; my $line; my @column_averages = (); # read in file line by line while( $line = <IN>) { ++$linecount; # skip introductory lines if( $linecount < 5 ){ next; } # strip off the carriage return at the end of the line chomp $line; my @columns = split( "\t", $line ); if( $#columns < 16 ) { die "incorrect number of columns in avg class prob file. check file and try again.\n"; } # column 0 is a label, get rid of it shift @columns; # add each column to a master list for( my $i = 0; $i <= $#columns; $i++ ) { $column_averages[$i] += $columns[$i]; } # calculate the morphological divergence score and store it in an array push @mds_result_array, &CalculateMDS( \@columns ); } close IN; &PrintOutStatistics( \@mds_result_array ); for(my $i = 0; $i <= $#column_averages; $i++) { $column_averages[$i] /= $linecount - 5; } print "method 2:\n"; &CalculateMDS( \@column_averages ); return 1; } ################################################################### sub CalculateMDS(\@) { # adding doxycyclin keeps ESC in embryonic state, therefore dox+ is gene OFF my @columns = @{ $_[0] }; my @gene1_OFF = (); # line 1 - gene1+dox my @gene1_ON = (); # line 2 - gene1-dox my @gene2_OFF = (); # line 3 - gene2+dox my @gene2_ON = (); #line 4 - gene2-dox $gene1_OFF[0] = $columns[0]; $gene1_OFF[1] = $columns[1]; $gene1_OFF[2] = $columns[2]; $gene1_OFF[3] = $columns[3]; $gene1_ON[0] = $columns[4]; $gene1_ON[1] = $columns[5]; $gene1_ON[2] = $columns[6]; $gene1_ON[3] = $columns[7]; $gene2_OFF[0] = $columns[8]; $gene2_OFF[1] = $columns[9]; $gene2_OFF[2] = $columns[10]; $gene2_OFF[3] = $columns[11]; $gene2_ON[0] = $columns[12]; $gene2_ON[1] = $columns[13]; $gene2_ON[2] = $columns[14]; $gene2_ON[3] = $columns[15]; # vectorON is line 4 - line 2 (gene2-dox - gene1-dox) my @vectorON = ( ($gene2_ON[0]-$gene1_ON[0]), ($gene2_ON[1]-$gene1_ON[1]), ($gene2_ON[2]-$gene1_ON[2]), ($gene2_ON[3]-$gene1_ON[3]) ); # vectorOFF is line 3 - line 1 (gene2+dox - gene1+dox) my @vectorOFF = ( ($gene2_OFF[0]-$gene1_OFF[0]), ($gene2_OFF[1]-$gene1_OFF[1]), ($gene2_OFF[2]-$gene1_OFF[2]), ($gene2_OFF[3]-$gene1_OFF[3]) ); my @cv = ( ($vectorON[0]-$vectorOFF[0]), ($vectorON[1]-$vectorOFF[1]), ($vectorON[2]-$vectorOFF[2]), ($vectorON[3]-$vectorOFF[3]) ); print "\tresult vector (ON-OFF): "; foreach (@cv) { print $_ . ", "; } print "\n"; my $correctedvector_MAG = sqrt( $cv[0]**2 + $cv[1]**2 + $cv[2]**2 + $cv[3]**2 ); print "\tmagnitude of result vector: " . $correctedvector_MAG . "\n"; return $correctedvector_MAG; } ################################################################### sub PrintOutStatistics(\@) { my @result_array = @{ $_[0] }; my $mean = 0; foreach ( @result_array ) { $mean += $_; } $mean /= ($#result_array +1 ); my $variance = 0; foreach ( @result_array ) { $variance += ($_ - $mean)**2; } $variance /= ($#result_array+1); my $std_dev = sqrt($variance); my $std_error = $std_dev / sqrt( $#result_array+1 ); # z-score for 95% interval is ~ 1.96 my $confidence_interval = 1.95966 * $std_error; print "\n===============================\n"; print "count\tmean\tvar\tstd dev\tstd err\tconfidence interval\n"; printf( "%d\t%0.4f\t%0.4f\t%0.4f\t%0.4f\t%0.4f\n", ($#result_array +1 ), $mean, $variance, $std_dev, $std_error, $confidence_interval); } __END__