#!/usr/bin/perl # This is something i was hacking on to parse the HTML files spit out by # the meeting maker program on windows, and get useful schedule info out # of them. I never finished it, but I think the basics are here in case # anyone wants it. $|=1; use strict; use HTML::Parser; use Date::Parse; use Date::Format; my $parser = new HTML::Parser( api_version => 3, start_h => [\&start, "tagname, attr" ], text_h => [\&text, "dtext"], end_h => [\&end, "tagname"], unbroken_text => 1); my %results; my %state; foreach (glob "export.mmd/d*htm") { %state = (); $parser->parse_file($_); foreach (@{$state{tdata}[3]}) { next unless ref($_) eq "ARRAY"; my ($times, $descr) = ($_->[1], $_->[2]); next unless ($times =~ /(AM|PM)/); $times =~ s/[\s\-]$//g; my ($start, $end) = split /-+/, $times; $start =~ s/(\d+):(\d+) PM/$1+=12 if $1>12;sprintf("%2.2d:%s",$1,$2)/e; $end =~ s/(\d+):(\d+) PM/$1+=12 if $1>12;sprintf("%2.2d:%s",$1,$2)/e; $start =~ s/(\d+):(\d+) AM/sprintf("%2.2d:%s",$1,$2)/e; $end =~ s/(\d+):(\d+) AM/sprintf("%2.2d:%s",$1,$2)/e; $start =~ s/[^\d:]//g; $end =~ s/[^\d:]//g; my $date = time2str("%D", $state{date}); print "$date: $start - $end: $descr\n"; } } sub start { my ($tagname, $attr) = @_; $state{in}{$tagname}++; if ($tagname eq "table") { $state{table}++; $state{row} = 0; $state{col} = 0; } if ($tagname eq "tr") { $state{row}++; $state{col} = 0; } if ($tagname eq "td") { $state{col}++; $state{text} = ""; } } sub end { my ($tagname) = @_; $state{in}{$tagname}--; $state{text} =~ s/\n//g; $state{text} =~ s/^\s*//g; $state{text} =~ s/\s*$//g; $state{tdata}[$state{table}][$state{row}][$state{col}] = $state{text}; } sub text { my ($text) = @_; if ($state{in}{title}) { my $date = $text; $date =~ s/^[^,]*, //g; $state{datestr} = $date; $state{date} = str2time($date); return; } $state{text} .= $text if $state{in}{td}; }