Create full image coverage working set.pl

From Colettapedia
Revision as of 17:48, 19 April 2010 by Ccoletta (talk | contribs) (New page: <pre> #!/usr/bin/perl use strict; use warnings; # takes in a list of images that have full or almost full coverage # as well as a list of images which have had sigs calculated already # f...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
#!/usr/bin/perl
use strict;
use warnings;

# takes in a list of images that have full or almost full coverage
# as well as a list of images which have had sigs calculated already
# finds the intersection of the two lists and stores in 3rd intersection list
# randomly selects more images from the full coverage list and stores in 4th top-up list

# creates the directory ~/data/monkeys_16bit/master_working_set/full_coverage/eosin

# populates that directory with slide directories.

# creates hard links to intersection images and their sigs from the other master_working_set
# creates hard links to top-up images from originals


################################################
# stage 1: read in the master working list file, i.e., the list of preused images/sigs
#################################################
my $max_num_tiles = 16;
my $min_num_tiles = 12;
my $mws_path = shift;
my %mws_list;
my $coverage_path = shift;
my %coverage_list;
my $dir_name;
my $file_name;

open( MWS_FILE, $mws_path ) or die "Cant open preused imgs file $mws_path: $!\n";
while( <MWS_FILE> )
{
  $_ =~ /\.\/(\S+)\/(\S+)/;
  $dir_name = $1;
  $file_name = $2;
  # print "MWS: Found dir $dir_name ||| file $file_name \n";
  push @{ $mws_list{ $dir_name } }, $file_name;
}
close MWS_FILE;

################################################
# stage 2: read in the full coverage list file
#################################################

my $coverage_level;
open( COVERAGE_FILE, $coverage_path ) or die "Cant open coverage file: $!\n";
while( <COVERAGE_FILE> )
{
  $_ =~ /(\d+)\s*(\S+)\/(\S+)/;
  $coverage_level = $1;
  $dir_name = $2;
  $file_name = $3;
  # print "COV: $coverage_level, dir $dir_name ||| file $file_name \n";
  push @{ $coverage_list{ $dir_name }[$coverage_level] }, $file_name;
}
close COVERAGE_FILE;

################################################
# stage 3: find the intersection of the two list files
#################################################
my %intersection_list;
my $count;

foreach $dir_name ( keys %coverage_list )
{
  print "Find intersection set of images in slide $dir_name\n";
  for( $coverage_level = $max_num_tiles; $coverage_level >= $min_num_tiles; $coverage_level--)
  {
    print "\tcoverage level: $coverage_level\n";
    $count = 0; # here, count keeps track of pos in fully covered array.

    # iterate over all the list of fully covered filenames
    foreach my $fully_covered_file ( @{ $coverage_list{$dir_name}[$coverage_level] } )
    {
      #  see if this filename is in the list of precalculated files
      foreach my $pre_calculated_file ( @{ $mws_list{$dir_name} } )
      {
        if( $fully_covered_file eq $pre_calculated_file ) 
        {
          # if so, splice it off the fully covered array and store in intersection array
          push @{ $intersection_list{$dir_name}[$coverage_level] }, 
               splice( @{ $coverage_list{$dir_name}[$coverage_level] }, $count, 1 );
          print "\t\tintersection found: $pre_calculated_file\n";
          last;
        }
      }
      $count++;
    }
  }
}

################################################
# stage 4: create a master list, with priority given to images in intersection set
#################################################

my %reuse_list; # a subset of the intersection list that will be used 
my %master_list; # the full set of images that will become the full coverage master w.s.

my $num_images_in_class = 60;

foreach $dir_name ( keys %coverage_list )
{
  # print "Creating master list for $dir_name\n";
  $count = 0; # here, count keeps track of how close we are to filling our quota of imgs.
  ITERATOR: for( $coverage_level = $max_num_tiles; $coverage_level >= $min_num_tiles; $coverage_level--)
  {
    # print "\tCoverage level $coverage_level\n";
    foreach my $file_name ( @{ $intersection_list{$dir_name}[$coverage_level] } )
    {
      last ITERATOR if( $count >= $num_images_in_class );
      push @{ $reuse_list{$dir_name} }, $file_name;
      push @{ $master_list{$dir_name} }, $file_name;
      # print "\t\tcount $count: Reusing $file_name\n";
      $count++;
    }

    foreach my $fully_covered_file ( @{ $coverage_list{$dir_name}[$coverage_level] } )
    {
      last ITERATOR if( $count >= $num_images_in_class );
      push @{ $master_list{$dir_name} }, $fully_covered_file;
      # print "\t\tcount $count: Adding $fully_covered_file\n";
      $count++;
    }
  }
}

################################################
# stage 5: create the directories and hard links for images only
#################################################
my $top_source_dir = "/home/colettace/data/monkeys_16bit/originals";
my $top_target_dir = "/home/colettace/data/monkeys_16bit/master_working_set/full_coverage/eosin";
my $top_overlap_dir = "/home/colettace/data/monkeys_16bit/master_working_set/white_level_corrected/eosin";

# create directory structure in target
foreach $dir_name ( keys %master_list )
{
# create directory in source directory
  # print "Creating directory $top_target_dir/$dir_name\n";
  mkdir "$top_target_dir/$dir_name" or die "Can't create dir $dir_name: $!";
  foreach $file_name ( @{ $master_list{$dir_name} } )
  {
    # make hard links to images
    # print "\linking $top_source_dir/$dir_name/$file_name with $top_target_dir/$dir_name/$file_name\n";
    link( "$top_source_dir/$dir_name/$file_name", "$top_target_dir/$dir_name/$file_name") 
      or die "Couldn't make link $top_target_dir/$dir_name/$file_name: $!\n";
  }
}

################################################
# stage 6: create the hard links to the precomputed sigs
#################################################
my $file_stem;
my @sig_list;
my $sig;

foreach $dir_name ( keys %reuse_list )
{
  print "precomputed sigs for $dir_name\n";
  foreach $file_name ( @{ $reuse_list{$dir_name} } )
  {
    $file_stem = "";
    @sig_list = ();
    # make hard links to the sigs already calculated in reuse list
    $file_name =~ /(\S+)\.tif/;
    $file_stem = $1;
    print "\t file stem: $file_stem\n";

    @sig_list = split( "\n", `find $top_overlap_dir/$dir_name -name "$file_stem*.sig"` );
    foreach (@sig_list)
    {
      $_ =~ /^\S*\/(\S+)/;
      print "\t\tlinking $_ to $top_target_dir/$dir_name/$1\n"; 
      link( $_, "$top_target_dir/$dir_name/$1");
    }
  }
}