Parse html.pl

From Colettapedia
Jump to navigation Jump to search
#!/usr/bin/perl
use strict;
use warnings;
require HTML::Tree;
# Takes a wndchrm output html file as input.
# HTML file should have been procured as part of a wndchrm test operation
# where the operation derives interpolated values

# This code parses html output file, and counts how many images in the test set were
# sorted into the given bins.
my $output_file = shift;

print "Loading $output_file\n";
my $tree = HTML::TreeBuilder->new_from_file( $output_file );
print "Done loading $output_file\n";

#$tree->dump; 
my @table_elements = $tree->look_down("_tag", "TABLE", 
     sub
     {
       if( defined $_[0]->attr("ID") ) {
         if( $_[0]->attr("ID") =~ /IndividualImages_split/ ) {
           # print "Got one!\n";
           return 1;
         }
       }
       # print "DON' GOT ONE!!\n";
       return 0;
     }
   );

die "Couldn't find the test results table element\n" if( !@table_elements );
# print "Number of splits found: $#table_elements\n";

my @rows;
my @row;
my %results_hash;
my $val;
my $img_link_element;
my $fullpath;
my $filename;
my ($actual_class, $predicted_class, $interpolated_value);
my $DEBUG1 = 0;

foreach my $split_table_element (@table_elements)
{
  @rows = $split_table_element->look_down("_tag", "tr");

  print "Parsing $output_file, " . $split_table_element->attr("ID") ."\n";
# The first row is the heading row, so skip it by starting at 1 instead of 0
  for( my $i = 1; $i <= $#rows; $i++) {
    $val = 0;
    @row = ();
    $img_link_element = undef;
    @row = $rows[$i]->look_down("_tag","td");
    if( $DEBUG1 ) {
      foreach (@row) {
        print "  " . $_->as_text;
      }
      print "\n";
    }
    $img_link_element = $row[$#row]->look_down( "_tag", "A" );
    $fullpath = $img_link_element->attr("HREF");
    if( $fullpath =~ /\S*\/(\S+)/ ) {
      $filename = $1;
#      print "\tFound file $filename\n";

      $actual_class = $row[10]->as_text;
      $predicted_class = $row[11]->as_text;
      $interpolated_value = $row[13]->as_text;
#      print "\t\tactual: $actual_class, predicted: $predicted_class, interp val: $interpolated_value\n";
      push @{ ${ $results_hash{ $actual_class } }{ $filename } }, { "val" => $interpolated_value, "class" => $predicted_class };
    }
  }
}

# my %results_tally;
print "RESULTS:\n";
foreach my $class (keys %results_hash ) {
	print "\tClass \"$class\"\n";
  foreach my $file (keys %{ $results_hash{ $class } }) {
    print "\t\tFile \"$file\"\n";
#    %results_tally = {}; #empty the hash
    foreach my $key_value_pair ( @{ ${ $results_hash{ $class } }{ $file } } ) {
#      print $key_value_pair . "\n";
      $interpolated_value = ${ $key_value_pair }{ "val" };
      $predicted_class = ${ $key_value_pair }{ "class" };
      print "\t\t\tprediction: $predicted_class - val $interpolated_value\n";
    }
  }
}
$tree->delete();