Create full image coverage working set.pl
Jump to navigation
Jump to search
#!/usr/bin/perl use strict; use warnings; # takes in a list of images that have full or almost full coverage # as well as a list of images which have had sigs calculated already # finds the intersection of the two lists and stores in 3rd intersection list # randomly selects more images from the full coverage list and stores in 4th top-up list # creates the directory ~/data/monkeys_16bit/master_working_set/full_coverage/eosin # populates that directory with slide directories. # creates hard links to intersection images and their sigs from the other master_working_set # creates hard links to top-up images from originals ################################################ # stage 1: read in the master working list file, i.e., the list of preused images/sigs ################################################# my $max_num_tiles = 16; my $min_num_tiles = 12; my $mws_path = shift; my %mws_list; my $coverage_path = shift; my %coverage_list; my $dir_name; my $file_name; open( MWS_FILE, $mws_path ) or die "Cant open preused imgs file $mws_path: $!\n"; while( <MWS_FILE> ) { $_ =~ /\.\/(\S+)\/(\S+)/; $dir_name = $1; $file_name = $2; # print "MWS: Found dir $dir_name ||| file $file_name \n"; push @{ $mws_list{ $dir_name } }, $file_name; } close MWS_FILE; ################################################ # stage 2: read in the full coverage list file ################################################# my $coverage_level; open( COVERAGE_FILE, $coverage_path ) or die "Cant open coverage file: $!\n"; while( <COVERAGE_FILE> ) { $_ =~ /(\d+)\s*(\S+)\/(\S+)/; $coverage_level = $1; $dir_name = $2; $file_name = $3; # print "COV: $coverage_level, dir $dir_name ||| file $file_name \n"; push @{ $coverage_list{ $dir_name }[$coverage_level] }, $file_name; } close COVERAGE_FILE; ################################################ # stage 3: find the intersection of the two list files ################################################# my %intersection_list; my $count; foreach $dir_name ( keys %coverage_list ) { print "Find intersection set of images in slide $dir_name\n"; for( $coverage_level = $max_num_tiles; $coverage_level >= $min_num_tiles; $coverage_level--) { print "\tcoverage level: $coverage_level\n"; $count = 0; # here, count keeps track of pos in fully covered array. # iterate over all the list of fully covered filenames foreach my $fully_covered_file ( @{ $coverage_list{$dir_name}[$coverage_level] } ) { # see if this filename is in the list of precalculated files foreach my $pre_calculated_file ( @{ $mws_list{$dir_name} } ) { if( $fully_covered_file eq $pre_calculated_file ) { # if so, splice it off the fully covered array and store in intersection array push @{ $intersection_list{$dir_name}[$coverage_level] }, splice( @{ $coverage_list{$dir_name}[$coverage_level] }, $count, 1 ); print "\t\tintersection found: $pre_calculated_file\n"; last; } } $count++; } } } ################################################ # stage 4: create a master list, with priority given to images in intersection set ################################################# my %reuse_list; # a subset of the intersection list that will be used my %master_list; # the full set of images that will become the full coverage master w.s. my $num_images_in_class = 60; foreach $dir_name ( keys %coverage_list ) { # print "Creating master list for $dir_name\n"; $count = 0; # here, count keeps track of how close we are to filling our quota of imgs. ITERATOR: for( $coverage_level = $max_num_tiles; $coverage_level >= $min_num_tiles; $coverage_level--) { # print "\tCoverage level $coverage_level\n"; foreach my $file_name ( @{ $intersection_list{$dir_name}[$coverage_level] } ) { last ITERATOR if( $count >= $num_images_in_class ); push @{ $reuse_list{$dir_name} }, $file_name; push @{ $master_list{$dir_name} }, $file_name; # print "\t\tcount $count: Reusing $file_name\n"; $count++; } foreach my $fully_covered_file ( @{ $coverage_list{$dir_name}[$coverage_level] } ) { last ITERATOR if( $count >= $num_images_in_class ); push @{ $master_list{$dir_name} }, $fully_covered_file; # print "\t\tcount $count: Adding $fully_covered_file\n"; $count++; } } } ################################################ # stage 5: create the directories and hard links for images only ################################################# my $top_source_dir = "/home/colettace/data/monkeys_16bit/originals"; my $top_target_dir = "/home/colettace/data/monkeys_16bit/master_working_set/full_coverage/eosin"; my $top_overlap_dir = "/home/colettace/data/monkeys_16bit/master_working_set/white_level_corrected/eosin"; # create directory structure in target foreach $dir_name ( keys %master_list ) { # create directory in source directory # print "Creating directory $top_target_dir/$dir_name\n"; mkdir "$top_target_dir/$dir_name" or die "Can't create dir $dir_name: $!"; foreach $file_name ( @{ $master_list{$dir_name} } ) { # make hard links to images # print "\linking $top_source_dir/$dir_name/$file_name with $top_target_dir/$dir_name/$file_name\n"; link( "$top_source_dir/$dir_name/$file_name", "$top_target_dir/$dir_name/$file_name") or die "Couldn't make link $top_target_dir/$dir_name/$file_name: $!\n"; } } ################################################ # stage 6: create the hard links to the precomputed sigs ################################################# my $file_stem; my @sig_list; my $sig; foreach $dir_name ( keys %reuse_list ) { print "precomputed sigs for $dir_name\n"; foreach $file_name ( @{ $reuse_list{$dir_name} } ) { $file_stem = ""; @sig_list = (); # make hard links to the sigs already calculated in reuse list $file_name =~ /(\S+)\.tif/; $file_stem = $1; print "\t file stem: $file_stem\n"; @sig_list = split( "\n", `find $top_overlap_dir/$dir_name -name "$file_stem*.sig"` ); foreach (@sig_list) { $_ =~ /^\S*\/(\S+)/; print "\t\tlinking $_ to $top_target_dir/$dir_name/$1\n"; link( $_, "$top_target_dir/$dir_name/$1"); } } }