Interior image finder.pl

From Colettapedia
Jump to navigation Jump to search
#!/usr/bin/perl


# Assumes that you already went through and cleaned out the  completely
# blank slides

# The point list csv and the images have to be in the same directory.

# This script operates on a directory of Zeiss created microscope images
# and a Zeiss point list describing their relative positions, and returns a list
# of images that are the outermost edge images.

# The purpose of this script is to be able to derive with images are the outermost
# to be able to exclude them from wndchrm processing.

# The format of the .csv file read is as follows:
#  "Slide"
#  "Name","Width","Height","Description"
#  "Slide2",76000,24000,"Slide - 76 mm x 24 mm (3 x 1)"
#  
#  
#  "Positions"
#  "Comments","PositionX","PositionY","PositionZ","Color","Classification"
#  "1",26638,48717,23995.750,"blue","blue"
#  "2",26638,48919,23995.750,"blue","blue"
#  ...
#  ...

use strict;
use warnings;

use Getopt::Long;
my $SlideNumber = "3883";
my $_16bit = 0;
my $select_number = undef;
GetOptions( "slide=s"  => \$SlideNumber, # =s implies the value is a string
            "16bit=s" => \$_16bit,
            "n=i" => \$select_number );


my $CSVFileName = "slide" . $SlideNumber . ( $_16bit ? "_16bit" : "" ) . ".csv"; 
print $CSVFileName . "\n";

my $DEBUG0 = 0;

open CSVFILE, $CSVFileName or die $!;

# All information is stored in the @point_list master array.
# @point_list is an array of references to hashes with key-value pairs
# associated with XPos, YPos, ZPos, Color, FileExists, FileList, and NeighborList
my @point_list;

my %color_hash;
my $line_num = 0;
my $href;
my $total_line_count = 0;
my $total_file_count = 0;

while (my $line = <CSVFILE>)
{
  $line_num ++;

  if( $DEBUG0 ) {
    print $line_num . ":   " . $line;
  }

  # Read in the first seven lines and disregard, as they are comments
  if( $line_num <= 7 ) {
    next;
  }

  if( $line =~ /"(\d+)",(\d+\.?\d*),(\d+\.?\d*),(\d+\.?\d*),"(\w+)"/ ) {
    if( $DEBUG0 ) {
      print "matched first $1; second $2; third $3; fourth $4; fifth $5\n";
    }
    $total_line_count++;
    $point_list[$1] = {
      XPos => $2,
      YPos => $3,
      ZPos => $4,
      Color => $5
    };
    if( !defined $color_hash{$5} ) {
      $color_hash{$5} = 1;
    } else {
      $color_hash{$5}++;
    }
  }

}

if( $DEBUG0 ) {
  for $href (@point_list) {
    print "{";
    for my $key ( keys %$href ) {
      print "$key=$href->{$key} ";
    }
    print "}\n";
  }
  
}

# Read in the tiffs from the current directory and cross-reference that list against
# csv list we just imported. Usually, tiffs that contain completely a blank image are
# deleted immediately after the imaging run. Therefore, There may not be an actual
# file for each line in the point list.

my $DEBUG1 = 0;

my $prefix = "monkey_muscle_" . ($_16bit ? "16bit_" : "" ) . $SlideNumber;

if( $DEBUG1 ) {
  print "Looking for files that match the prefix $prefix...\n";
}

my @files = <*>;

foreach my $filename (@files) {
  # files can be in the following formats:
  # H_monkey_muscle_3883_92.tif
  # monkey_muscle_3883_104.tif
  # E_monkey_muscle_3883_57.tif
  # Also rarely :
  # monkey_muscle_171A_0001_69.tif
  # a case which we'll ignore for now

  if( $filename =~ /${prefix}_(\d+)/ ) {
    if( $DEBUG1 ) {
      print "\tFile $filename corresponts with Image $1.\n";
    }
    if( !defined $point_list[$1]->{FileExists} ) {
      $point_list[$1]->{FileExists} = 1;
      # If the deconvolved images exist in the directory side by side
      # just count them as one.
      $total_file_count++;
    }
    push @{ $point_list[$1]->{FileList} }, $filename;
  }
}
# A non-sequential array
# will be filled out in loop below.
my %file_doesnt_exist_list;
my $DEBUG2 = 0;

if( $DEBUG2 ) {
  print "Begin iterating over the colors: " . join(", ", %color_hash) . "\n";
}

# A critical piece of information is the delta x and delta y values that the
# microscope moves to go to the next image.
my $deltaX = 282;
my $deltaY = 202;
# my ($maxX, $maxY);
my ($XDist, $YDist);
my ($i, $j);
my ($this_image, $that_image);
 
# Iterate over each color, because each color represents a contiguous
# slab of specimen on the slide
for my $color ( keys %color_hash ) {
  if( $DEBUG2 ) {
    print "\tFor Color: " . $color . "\n";
  }
  for( $i = 0; $i < $#point_list; $i++) {
    if( !defined $point_list[$i] ) {
      next;
    }
    $this_image = $point_list[$i]; 
    if( $this_image->{Color} ne $color ) {
      if( $DEBUG2 ) {
        print "\t\tImage $i is not $color, but is $this_image->{Color}. Skipping for now...\n";
      }
      next;
    }
    if( !defined( $this_image->{FileExists} ) ) {
      if( $DEBUG2 ) {
        print "\t\tImage $i does not have files associated with it. Skipping...\n";
      }
      $file_doesnt_exist_list{$i} = 1;
      next;
    }
    if( $DEBUG2 ) {
      print "\t\tImage $i is $color. Proceeding with proximity analysis.\n";
    }
    for( $j = 0; $j < $#point_list; $j++ ) {
      if( !defined $point_list[$j] ) {
        next;
      }
      $that_image = $point_list[$j];
      if( $this_image == $that_image ) {
        if( $DEBUG2 ) {
          print "\t\t\tWon't compare Image $i to itself.\n";
        }
        next;
      }
      if( $DEBUG2 ) {
        print "\t\t\tComparing images $i and $j.\n";
      }
      # Assumes all coordinates are positive
      $XDist = abs( $this_image->{XPos} - $that_image->{XPos} );
      $YDist = abs( $this_image->{YPos} - $that_image->{YPos} );
      if( $DEBUG2 ) {
        print "\t\t\t\tX Dist: $XDist, Y Dist: $YDist\n";
      }
      if( $XDist <= $deltaX  && $YDist <= $deltaY) {
        push @{ $this_image->{NeighborList} }, $j;
      }
    }
    if( $DEBUG2 ) {
      print "\t\t\tNeighbor List for Image $i: " . 
        join(   " & ",   @{ $this_image->{NeighborList} }   ) . "\n";
    }  } # end iterating over @point_list
} # end iterating over %color_hash

my $DEBUG3 = 0;

if( $DEBUG3 ) {
  print "First round of image neighbor analysis:\n";
}
for( $i = 0; $i < $#point_list; $i++ ) {
  if( defined $point_list[$i]->{NeighborList} ) {
    if( $DEBUG3 ) {
      print "\tImage $i neighbors these images " . join( " & ", @{ $point_list[$i]->{NeighborList} } ) . "\n";
      print "Number of neighbors Image $i has: " . (1 + $#{ $point_list[$i]->{NeighborList} } ) . "\n" ; 
    }  
    if( (1 + $#{ $point_list[$i]->{NeighborList} } ) == 8 ) {
      $point_list[$i]->{Border} = 1;
    }
    if( (1 + $#{ $point_list[$i]->{NeighborList} } ) < 8) {
      $point_list[$i]->{Edge} = 1;
    }
  }
}


my $DEBUG4 = 0;
my $round2 = 1;
my @file_doesnt_exist_array = sort keys %file_doesnt_exist_list; 
my @border_images;
my @interior_images;
my @exterior_images;

if( ! $round2 ) {
  # If there isn't a second round of edge analysis, just go straight to results  
  for( $i = 0; $i < $#point_list; $i++ ) {
    push( @interior_images, $i) if( defined $point_list[$i]->{Border} );
    push( @exterior_images, $i) if( defined $point_list[$i]->{Edge} );
  }
}
else {
  if( $DEBUG4 ) {
    print "SECOND ROUND OF INTERIOR ANALYSIS...\n";
  }

  # FOR THIS ROUND, WE ONLY ITERATE OVER THOSE IMAGES LABELLED "BORDER"
  # IN THE LAST ROUND. WHEN WE FIGURE OUT INTERIOR IMAGES OUT OF THIS REDUCED POOL
  # WE'LL ADD THE ADDITIONAL LABEL "INTERIOR"

  # These vars were declared above.
  # my $deltaX = 282;
  # my $deltaY = 202;
  # my ($maxX, $maxY);
  # my ($XDist, $YDist);
  # my ($i, $j);
  # my ($this_image, $that_image);
   
  # Iterate over each color, because each color represents a contiguous
  # slab of specimen on the slide
  for my $color ( keys %color_hash ) {
    if( $DEBUG4 ) {
      print "\tFor Color: " . $color . "\n";
    }
    for( $i = 0; $i < $#point_list; $i++) {
      if( !defined $point_list[$i] || !defined $point_list[$i]->{Color} ) {
        next;
      }
      $this_image = $point_list[$i]; 
      if( $this_image->{Color} ne $color ) {
        if( $DEBUG4 ) {
          print "\t\tImage $i is not $color, but is $this_image->{Color}. Skipping for now...\n";
        }
        next;
      }
      if( !defined( $this_image->{FileExists} ) ) {
        if( $DEBUG4 ) {
          print "\t\tImage $i does not have files associated with it. Skipping...\n";
        }
        $file_doesnt_exist_list{$i} = 1;
        next;
      }
      if( defined( $this_image->{Edge} ) ) {
        if( $DEBUG4 ) {
          print "\t\tImage $i was determined last round to be an edge image. Skipping...\n";
        }
        next;
      }
      if( $DEBUG4 ) {
        print "\t\tImage $i is $color and is a not an edge image. Proceeding with proximity analysis.\n";
      }
      for( $j = 0; $j < $#point_list; $j++ ) {
        if( !defined $point_list[$j] || !defined $point_list[$j]->{XPos} ) {
          next;
        }
        if( defined $point_list[$j]->{Edge} ) {
          next;
        }
        $that_image = $point_list[$j];
        if( $this_image == $that_image ) {
          if( $DEBUG4 ) {
            print "\t\t\tWon't compare Image $i to itself.\n";
          }
          next;
        }
        if( $DEBUG4 ) {
          print "\t\t\tComparing images $i and $j.\n";
        }
        # Assumes all coordinates are positive
        $XDist = abs( $this_image->{XPos} - $that_image->{XPos} );
        $YDist = abs( $this_image->{YPos} - $that_image->{YPos} );
        if( $DEBUG4 ) {
          print "\t\t\t\tX Dist: $XDist, Y Dist: $YDist\n";
        }
        if( $XDist <= $deltaX  && $YDist <= $deltaY) {
          push @{ $this_image->{InteriorNeighborList} }, $j;
        }
      }
      if( $DEBUG4 ) {
        if( defined $this_image->{InteriorNeighborList} ) {
          print "\t\t\tNeighbor List for Image $i: " . 
            join(   " & ",   @{ $this_image->{InteriorNeighborList} }   ) . "\n";
        }
      }
    } # end iterating over @point_list
  } # end iterating over %color_hash

  my $DEBUG5 = 0;

  if( $DEBUG5 ) {
    print "Second round of image neighbor analysis:\n";
  }
  for( $i = 0; $i < $#point_list; $i++ ) {
    if( defined $point_list[$i]->{InteriorNeighborList} ) {
      if( $DEBUG5 ) {
        print "\tImage $i neighbors these images " . join( " & ", @{ $point_list[$i]->{InteriorNeighborList} } ) . "\n";
        print "\tNumber of neighbors Image $i has: " . (1 + $#{ $point_list[$i]->{InteriorNeighborList} } ) . "\n" ; 
      }  
      if( (1 + $#{ $point_list[$i]->{InteriorNeighborList} } ) == 8 ) {
        $point_list[$i]->{Interior} = 1;
        $point_list[$i]->{Border} = undef;
      }
    }
  }

  # If there isn't a second round of edge analysis, just go straight to results  
  for( $i = 0; $i < $#point_list; $i++ ) {
    push( @interior_images, $i) if( defined $point_list[$i]->{Interior} );
    push( @border_images, $i) if( defined $point_list[$i]->{Border} );
    push( @exterior_images, $i) if( defined $point_list[$i]->{Edge} );
  }
  
}


my $DEBUG6 = 0;

if( $DEBUG6 ) {
  print "The following images do not have a file associated with them:\n";
  foreach ( @file_doesnt_exist_array ) {
    print $_ . ", "; 
  }
  print "\n\n";

  print "List of exterior images: \n";
  foreach ( sort @exterior_images ) {
    print $_ . ", ";
  }
  print "\n\n";
  
  if( $round2 ) {
    print "List of border images: \n";
    foreach ( sort @border_images ) {
      print $_ . ", ";
    }
    print "\n\n";
  }

  print "List of interior images: \n";
  foreach ( sort @interior_images ) {
    print $_ . ", ";
  }
  print "\n\n";
  
}

my $DEBUG7 = 0;

if( $DEBUG7 ) {
  print "Statistics: \n";
  print "Total images mentioned in csv: $total_line_count \n";
  print "Total number of corresponding files: $total_file_count \n";
  print "Total number of images without files: " . ($#file_doesnt_exist_array + 1) . "\n";
  print "Total number of exterior images: " . ($#exterior_images + 1) ."\n";
  print( "Total number of border images: " . ($#border_images + 1) ."\n") if( $round2 );
  print "Total number of interior images: " . ($#interior_images + 1) ."\n";
}


if( defined $select_number ) {

  my $DEBUG8 = 0;
  if( $#interior_images < 0 ) {
    # If there are no interior images, go to the border images
    @interior_images = @border_images;
  }
  if( $select_number > ( $#interior_images +1 ) ) {
    # die "Unable to select $select_number images. Only $#interior_images interior images exist.\n";
    $select_number = $#interior_images;
  }
  
  if( $DEBUG8 ) {
    print "Original order: \n";
    for ($i = 0; $i < $#interior_images; $i++ ) {
      print "($i -> $interior_images[$i]) ";
    }
    print "\n\n";
  }

  for ($i = @interior_images; --$i; ) {
    my $j = int rand ($i+1);
    # print "i: $i j: $j \n";
    next if $i == $j;
    @interior_images[$i,$j] = @interior_images[$j,$i];
  }
  if( $DEBUG8 ) {
    print "New order: \n";
    for ($i = 0; $i < $#interior_images; $i++ ) {
      print "($i -> $interior_images[$i]) ";
    }
    print "\n\n";
  }
  # Output a list of files associated with the first $select_number images
  # in the randomized array

  for( $i = 0; $i < $select_number; $i++ ) {
    if( defined $point_list[$interior_images[$i]]->{FileList} ) {
      foreach (@{ $point_list[$interior_images[$i]]->{FileList} } ) {
        print $_ . "\n" ;
      }
    }
  }
}


#1;