Interior image finder.pl
Jump to navigation
Jump to search
#!/usr/bin/perl # Assumes that you already went through and cleaned out the completely # blank slides # The point list csv and the images have to be in the same directory. # This script operates on a directory of Zeiss created microscope images # and a Zeiss point list describing their relative positions, and returns a list # of images that are the outermost edge images. # The purpose of this script is to be able to derive with images are the outermost # to be able to exclude them from wndchrm processing. # The format of the .csv file read is as follows: # "Slide" # "Name","Width","Height","Description" # "Slide2",76000,24000,"Slide - 76 mm x 24 mm (3 x 1)" # # # "Positions" # "Comments","PositionX","PositionY","PositionZ","Color","Classification" # "1",26638,48717,23995.750,"blue","blue" # "2",26638,48919,23995.750,"blue","blue" # ... # ... use strict; use warnings; use Getopt::Long; my $SlideNumber = "3883"; my $_16bit = 0; my $select_number = undef; GetOptions( "slide=s" => \$SlideNumber, # =s implies the value is a string "16bit=s" => \$_16bit, "n=i" => \$select_number ); my $CSVFileName = "slide" . $SlideNumber . ( $_16bit ? "_16bit" : "" ) . ".csv"; print $CSVFileName . "\n"; my $DEBUG0 = 0; open CSVFILE, $CSVFileName or die $!; # All information is stored in the @point_list master array. # @point_list is an array of references to hashes with key-value pairs # associated with XPos, YPos, ZPos, Color, FileExists, FileList, and NeighborList my @point_list; my %color_hash; my $line_num = 0; my $href; my $total_line_count = 0; my $total_file_count = 0; while (my $line = <CSVFILE>) { $line_num ++; if( $DEBUG0 ) { print $line_num . ": " . $line; } # Read in the first seven lines and disregard, as they are comments if( $line_num <= 7 ) { next; } if( $line =~ /"(\d+)",(\d+\.?\d*),(\d+\.?\d*),(\d+\.?\d*),"(\w+)"/ ) { if( $DEBUG0 ) { print "matched first $1; second $2; third $3; fourth $4; fifth $5\n"; } $total_line_count++; $point_list[$1] = { XPos => $2, YPos => $3, ZPos => $4, Color => $5 }; if( !defined $color_hash{$5} ) { $color_hash{$5} = 1; } else { $color_hash{$5}++; } } } if( $DEBUG0 ) { for $href (@point_list) { print "{"; for my $key ( keys %$href ) { print "$key=$href->{$key} "; } print "}\n"; } } # Read in the tiffs from the current directory and cross-reference that list against # csv list we just imported. Usually, tiffs that contain completely a blank image are # deleted immediately after the imaging run. Therefore, There may not be an actual # file for each line in the point list. my $DEBUG1 = 0; my $prefix = "monkey_muscle_" . ($_16bit ? "16bit_" : "" ) . $SlideNumber; if( $DEBUG1 ) { print "Looking for files that match the prefix $prefix...\n"; } my @files = <*>; foreach my $filename (@files) { # files can be in the following formats: # H_monkey_muscle_3883_92.tif # monkey_muscle_3883_104.tif # E_monkey_muscle_3883_57.tif # Also rarely : # monkey_muscle_171A_0001_69.tif # a case which we'll ignore for now if( $filename =~ /${prefix}_(\d+)/ ) { if( $DEBUG1 ) { print "\tFile $filename corresponts with Image $1.\n"; } if( !defined $point_list[$1]->{FileExists} ) { $point_list[$1]->{FileExists} = 1; # If the deconvolved images exist in the directory side by side # just count them as one. $total_file_count++; } push @{ $point_list[$1]->{FileList} }, $filename; } } # A non-sequential array # will be filled out in loop below. my %file_doesnt_exist_list; my $DEBUG2 = 0; if( $DEBUG2 ) { print "Begin iterating over the colors: " . join(", ", %color_hash) . "\n"; } # A critical piece of information is the delta x and delta y values that the # microscope moves to go to the next image. my $deltaX = 282; my $deltaY = 202; # my ($maxX, $maxY); my ($XDist, $YDist); my ($i, $j); my ($this_image, $that_image); # Iterate over each color, because each color represents a contiguous # slab of specimen on the slide for my $color ( keys %color_hash ) { if( $DEBUG2 ) { print "\tFor Color: " . $color . "\n"; } for( $i = 0; $i < $#point_list; $i++) { if( !defined $point_list[$i] ) { next; } $this_image = $point_list[$i]; if( $this_image->{Color} ne $color ) { if( $DEBUG2 ) { print "\t\tImage $i is not $color, but is $this_image->{Color}. Skipping for now...\n"; } next; } if( !defined( $this_image->{FileExists} ) ) { if( $DEBUG2 ) { print "\t\tImage $i does not have files associated with it. Skipping...\n"; } $file_doesnt_exist_list{$i} = 1; next; } if( $DEBUG2 ) { print "\t\tImage $i is $color. Proceeding with proximity analysis.\n"; } for( $j = 0; $j < $#point_list; $j++ ) { if( !defined $point_list[$j] ) { next; } $that_image = $point_list[$j]; if( $this_image == $that_image ) { if( $DEBUG2 ) { print "\t\t\tWon't compare Image $i to itself.\n"; } next; } if( $DEBUG2 ) { print "\t\t\tComparing images $i and $j.\n"; } # Assumes all coordinates are positive $XDist = abs( $this_image->{XPos} - $that_image->{XPos} ); $YDist = abs( $this_image->{YPos} - $that_image->{YPos} ); if( $DEBUG2 ) { print "\t\t\t\tX Dist: $XDist, Y Dist: $YDist\n"; } if( $XDist <= $deltaX && $YDist <= $deltaY) { push @{ $this_image->{NeighborList} }, $j; } } if( $DEBUG2 ) { print "\t\t\tNeighbor List for Image $i: " . join( " & ", @{ $this_image->{NeighborList} } ) . "\n"; } } # end iterating over @point_list } # end iterating over %color_hash my $DEBUG3 = 0; if( $DEBUG3 ) { print "First round of image neighbor analysis:\n"; } for( $i = 0; $i < $#point_list; $i++ ) { if( defined $point_list[$i]->{NeighborList} ) { if( $DEBUG3 ) { print "\tImage $i neighbors these images " . join( " & ", @{ $point_list[$i]->{NeighborList} } ) . "\n"; print "Number of neighbors Image $i has: " . (1 + $#{ $point_list[$i]->{NeighborList} } ) . "\n" ; } if( (1 + $#{ $point_list[$i]->{NeighborList} } ) == 8 ) { $point_list[$i]->{Border} = 1; } if( (1 + $#{ $point_list[$i]->{NeighborList} } ) < 8) { $point_list[$i]->{Edge} = 1; } } } my $DEBUG4 = 0; my $round2 = 1; my @file_doesnt_exist_array = sort keys %file_doesnt_exist_list; my @border_images; my @interior_images; my @exterior_images; if( ! $round2 ) { # If there isn't a second round of edge analysis, just go straight to results for( $i = 0; $i < $#point_list; $i++ ) { push( @interior_images, $i) if( defined $point_list[$i]->{Border} ); push( @exterior_images, $i) if( defined $point_list[$i]->{Edge} ); } } else { if( $DEBUG4 ) { print "SECOND ROUND OF INTERIOR ANALYSIS...\n"; } # FOR THIS ROUND, WE ONLY ITERATE OVER THOSE IMAGES LABELLED "BORDER" # IN THE LAST ROUND. WHEN WE FIGURE OUT INTERIOR IMAGES OUT OF THIS REDUCED POOL # WE'LL ADD THE ADDITIONAL LABEL "INTERIOR" # These vars were declared above. # my $deltaX = 282; # my $deltaY = 202; # my ($maxX, $maxY); # my ($XDist, $YDist); # my ($i, $j); # my ($this_image, $that_image); # Iterate over each color, because each color represents a contiguous # slab of specimen on the slide for my $color ( keys %color_hash ) { if( $DEBUG4 ) { print "\tFor Color: " . $color . "\n"; } for( $i = 0; $i < $#point_list; $i++) { if( !defined $point_list[$i] || !defined $point_list[$i]->{Color} ) { next; } $this_image = $point_list[$i]; if( $this_image->{Color} ne $color ) { if( $DEBUG4 ) { print "\t\tImage $i is not $color, but is $this_image->{Color}. Skipping for now...\n"; } next; } if( !defined( $this_image->{FileExists} ) ) { if( $DEBUG4 ) { print "\t\tImage $i does not have files associated with it. Skipping...\n"; } $file_doesnt_exist_list{$i} = 1; next; } if( defined( $this_image->{Edge} ) ) { if( $DEBUG4 ) { print "\t\tImage $i was determined last round to be an edge image. Skipping...\n"; } next; } if( $DEBUG4 ) { print "\t\tImage $i is $color and is a not an edge image. Proceeding with proximity analysis.\n"; } for( $j = 0; $j < $#point_list; $j++ ) { if( !defined $point_list[$j] || !defined $point_list[$j]->{XPos} ) { next; } if( defined $point_list[$j]->{Edge} ) { next; } $that_image = $point_list[$j]; if( $this_image == $that_image ) { if( $DEBUG4 ) { print "\t\t\tWon't compare Image $i to itself.\n"; } next; } if( $DEBUG4 ) { print "\t\t\tComparing images $i and $j.\n"; } # Assumes all coordinates are positive $XDist = abs( $this_image->{XPos} - $that_image->{XPos} ); $YDist = abs( $this_image->{YPos} - $that_image->{YPos} ); if( $DEBUG4 ) { print "\t\t\t\tX Dist: $XDist, Y Dist: $YDist\n"; } if( $XDist <= $deltaX && $YDist <= $deltaY) { push @{ $this_image->{InteriorNeighborList} }, $j; } } if( $DEBUG4 ) { if( defined $this_image->{InteriorNeighborList} ) { print "\t\t\tNeighbor List for Image $i: " . join( " & ", @{ $this_image->{InteriorNeighborList} } ) . "\n"; } } } # end iterating over @point_list } # end iterating over %color_hash my $DEBUG5 = 0; if( $DEBUG5 ) { print "Second round of image neighbor analysis:\n"; } for( $i = 0; $i < $#point_list; $i++ ) { if( defined $point_list[$i]->{InteriorNeighborList} ) { if( $DEBUG5 ) { print "\tImage $i neighbors these images " . join( " & ", @{ $point_list[$i]->{InteriorNeighborList} } ) . "\n"; print "\tNumber of neighbors Image $i has: " . (1 + $#{ $point_list[$i]->{InteriorNeighborList} } ) . "\n" ; } if( (1 + $#{ $point_list[$i]->{InteriorNeighborList} } ) == 8 ) { $point_list[$i]->{Interior} = 1; $point_list[$i]->{Border} = undef; } } } # If there isn't a second round of edge analysis, just go straight to results for( $i = 0; $i < $#point_list; $i++ ) { push( @interior_images, $i) if( defined $point_list[$i]->{Interior} ); push( @border_images, $i) if( defined $point_list[$i]->{Border} ); push( @exterior_images, $i) if( defined $point_list[$i]->{Edge} ); } } my $DEBUG6 = 0; if( $DEBUG6 ) { print "The following images do not have a file associated with them:\n"; foreach ( @file_doesnt_exist_array ) { print $_ . ", "; } print "\n\n"; print "List of exterior images: \n"; foreach ( sort @exterior_images ) { print $_ . ", "; } print "\n\n"; if( $round2 ) { print "List of border images: \n"; foreach ( sort @border_images ) { print $_ . ", "; } print "\n\n"; } print "List of interior images: \n"; foreach ( sort @interior_images ) { print $_ . ", "; } print "\n\n"; } my $DEBUG7 = 0; if( $DEBUG7 ) { print "Statistics: \n"; print "Total images mentioned in csv: $total_line_count \n"; print "Total number of corresponding files: $total_file_count \n"; print "Total number of images without files: " . ($#file_doesnt_exist_array + 1) . "\n"; print "Total number of exterior images: " . ($#exterior_images + 1) ."\n"; print( "Total number of border images: " . ($#border_images + 1) ."\n") if( $round2 ); print "Total number of interior images: " . ($#interior_images + 1) ."\n"; } if( defined $select_number ) { my $DEBUG8 = 0; if( $#interior_images < 0 ) { # If there are no interior images, go to the border images @interior_images = @border_images; } if( $select_number > ( $#interior_images +1 ) ) { # die "Unable to select $select_number images. Only $#interior_images interior images exist.\n"; $select_number = $#interior_images; } if( $DEBUG8 ) { print "Original order: \n"; for ($i = 0; $i < $#interior_images; $i++ ) { print "($i -> $interior_images[$i]) "; } print "\n\n"; } for ($i = @interior_images; --$i; ) { my $j = int rand ($i+1); # print "i: $i j: $j \n"; next if $i == $j; @interior_images[$i,$j] = @interior_images[$j,$i]; } if( $DEBUG8 ) { print "New order: \n"; for ($i = 0; $i < $#interior_images; $i++ ) { print "($i -> $interior_images[$i]) "; } print "\n\n"; } # Output a list of files associated with the first $select_number images # in the randomized array for( $i = 0; $i < $select_number; $i++ ) { if( defined $point_list[$interior_images[$i]]->{FileList} ) { foreach (@{ $point_list[$interior_images[$i]]->{FileList} } ) { print $_ . "\n" ; } } } } #1;