Create master dend file
Jump to navigation
Jump to search
#!/usr/bin/perl # # Phase 3.pl # Custom script written for John Delaney's pairwise wndchrm distance gene experiment # # Pseudocode: # 1. scoop up all the dendfiles # 2. Parse the filename to ascertain which pairwise experiment it is # 3. Parse the file itself for the information # 4. Perform the calculation, and store in an orderly way # 5. After all files have been parsed, generate master dendfile from data use strict; use warnings; my @dendfile_list = split( "\n", `find . -name "*dend_file*"` ); my %class_info; # Two dimensional hash stores the names and pairwise distances my $DEBUG0 = 1; for( my $i = 0; $i <= $#dendfile_list; $i++) { # Get rid of path my $dend_file = $dendfile_list[$i]; $dend_file =~ s/.+\///g; if( $dend_file !~ /_(.+)-D_VS_(.+)-D_dend_file\.txt/ ) { die "$dend_file doesn't follow naming convention of \"...<gene1>-D_VS_<gene2>-D\"\n"; } print "dend file $dend_file has genes $1 and $2.\n" if( $DEBUG0 ); my $gene1 = $1; my $gene2 = $2; my $distance1 = 0; my $distance2 = 0; open DENDFILE, $dendfile_list[$i] or die "Can't open $dendfile_list[$i]: $!\n"; my $count = 0; while (<DENDFILE>) { $count++; next if $count == 1; # skip first line in file, we know it's a "4" if( $count == 2 ) { # Arrangement should be Genename #.## #.## #.## #.## # Capture the third number if( /\S+\s+\S+\s+\S+\s+(\S+)\s+\S+/ ) { $distance1 = $1; print "\tPulled $distance1 from row one.\n" if( $DEBUG0 ); } else { die "Wasn't able to pull the third number from row 1 of dendrogram.\n"; } } elsif ( $count == 3 ) { # Here we capture the fourth number if( /\S+\s+\S+\s+\S+\s+\S+\s+(\S+)/ ) { $distance2 = $1; print "\tPulled $distance2 from row two.\n" if( $DEBUG0 ); } else { die "Wasn't able to pull the fourth number from row 2 of dendrogram.\n"; } } } $class_info{$gene1}{$gene2} = sqrt( $distance1*$distance1 + $distance2*$distance2 ); $class_info{$gene2}{$gene1} = $class_info{$gene1}{$gene2}; close DENDFILE; } open OUTPUT, ">master_dend_file.txt" or die "Can't open output file: $!\n"; my @master_gene_list = sort keys %class_info; print OUTPUT $#master_gene_list . "\n"; foreach my $row ( @master_gene_list ) { print OUTPUT $row . "\t"; foreach my $col ( @master_gene_list ) { if( defined $class_info{$row}{$col} ) { printf OUTPUT "%0.3f\t", $class_info{$row}{$col}; } else { print OUTPUT "0.000\t"; } } print OUTPUT "\n"; } close OUTPUT;