#!/usr/bin/perl

# This is something i was hacking on to parse the HTML files spit out by 
# the meeting maker program on windows, and get useful schedule info out
# of them.   I never finished it, but I think the basics are here in case
# anyone wants it.

$|=1;
use strict;
use HTML::Parser;
use Date::Parse;
use Date::Format;

my $parser = new HTML::Parser( api_version => 3,
                               start_h => [\&start, "tagname, attr" ],
                               text_h  => [\&text,  "dtext"],
                               end_h   => [\&end,   "tagname"],
                               unbroken_text => 1);

my %results;
my %state;
foreach (glob "export.mmd/d*htm") {
    %state = ();
    $parser->parse_file($_);
    
    foreach (@{$state{tdata}[3]}) {
        next unless ref($_) eq "ARRAY";

        my ($times, $descr) = ($_->[1], $_->[2]);
        next unless ($times =~ /(AM|PM)/);

        $times =~ s/[\s\-]$//g;
        my ($start, $end) = split /-+/, $times;
        $start =~ s/(\d+):(\d+) PM/$1+=12 if $1>12;sprintf("%2.2d:%s",$1,$2)/e;
        $end   =~ s/(\d+):(\d+) PM/$1+=12 if $1>12;sprintf("%2.2d:%s",$1,$2)/e;
        $start =~ s/(\d+):(\d+) AM/sprintf("%2.2d:%s",$1,$2)/e;
        $end   =~ s/(\d+):(\d+) AM/sprintf("%2.2d:%s",$1,$2)/e;
        $start =~ s/[^\d:]//g;
        $end   =~ s/[^\d:]//g;
        my $date = time2str("%D", $state{date});
        print "$date: $start - $end: $descr\n";
    }

}


sub start {
    my ($tagname, $attr) = @_;

    $state{in}{$tagname}++;

    if ($tagname eq "table") {
        $state{table}++;
        $state{row} = 0;
        $state{col} = 0;
    }
    if ($tagname eq "tr") {
        $state{row}++;
        $state{col} = 0;
    }

    if ($tagname eq "td") {
        $state{col}++;
        $state{text} = "";
    }
}


sub end {
    my ($tagname) = @_;

    $state{in}{$tagname}--;

    $state{text} =~ s/\n//g;
    $state{text} =~ s/^\s*//g;
    $state{text} =~ s/\s*$//g;
    $state{tdata}[$state{table}][$state{row}][$state{col}] = $state{text};
}

sub text {
    my ($text) = @_;
    
    if ($state{in}{title}) {
        my $date = $text;
        $date =~ s/^[^,]*, //g;
        $state{datestr} = $date;
        $state{date} = str2time($date);
        return;
    }

    $state{text} .= $text if $state{in}{td};
}
