#!/usr/bin/perl package MyParse; use HTML::Parser; use strict; use vars qw(@ISA); @ISA=qw(HTML::Parser); my (@cols,@data,%line,$needcols,$idx,%course); $needcols=1; sub start { my ($self, $tag, $attr, $attrseq, $origtext)=@_; if ($tag eq "tr") { $line{startline}=1; delete $line{skip}; } if ($tag eq "th") { delete $line{data}; $line{header}=1; delete $line{startline}; if ($attr->{colspan} > 5) { $line{skip}=2; # print "Skipping a probable section heading.\n"; } } if ($tag eq "i") { delete $line{header}; $line{data}=1; } } sub text { my ($self, $text)=@_; # print "GOT TEXT \"$text\" (header=$line{header}, data=$line{data})\n"; if ($text =~ /Instructor/) { # print "New course!\n"; my %c; foreach (@{$course{TIMES}}) { s/ //g; s/am//g; s/pm//g; } my $when; foreach $when (@{$course{DAYS}}) { my ($new,$day); next if ($when eq "TBA"); foreach $day (qw(M T W R F)) { if ($when =~ /$day/) { $new .= $day; } else { $new .= " "; } } $when=$new; } foreach (qw(CRN CRSE SUBJ SEC TITLE CREDITS BLDG ROOM DAYS TIMES)) { $c{$_}=@{$course{$_}}[0]; } printf("%-5.5s %-4.4s %-4.4s %-2.2s %-25.25s %-2.2s %-8.8s %-11.11s %-5.5s %-5.5s\n",$c{CRN},$c{SUBJ},$c{CRSE},$c{SEC},$c{TITLE},$c{CREDITS},$c{DAYS},$c{TIMES},$c{BLDG},$c{ROOM}); undef %c; my $numsess=scalar (@{$course{DAYS}}); if ($numsess > 1) { my $num; for $num (1..($numsess-1)) { # print "Print extra line $num\n"; foreach (qw(BLDG ROOM DAYS TIMES)) { $c{$_}=@{$course{$_}}[$num]; } printf("%-5.5s %-4.4s %-4.4s %-2.2s %-25.25s %-2.2s %-8.8s %-11.11s %-5.5s %-5.5s\n","","","","","","",$c{DAYS},$c{TIMES},$c{BLDG},$c{ROOM}); } } undef %course; undef @data; $idx=0; $line{skip}=1; } if ($line{skip}==2) { print "\n:::::::: $text ::::::::\n\n"; } return if $line{skip}; if ($line{header}) { if ($needcols) { push @cols,$text; # print "newcol $#cols: $text\n"; } } if ($line{data}) { $needcols=0; push @data,$text; push @{$course{$cols[$idx]}},$text; # print "$idx) $cols[$idx]: $text\n"; $idx++; # Hack for repeated schedule sections if ($idx > 12) { $idx=7; } } } sub end { my ($self, $tag, $origtext)=@_; } package main; use vars qw(@times); use HTTP::Request; use LWP::UserAgent; $|=1; print "Content-Type: text/html\n\n"; print "
\n";
my $url;
foreach $url ("susrscha.htm") {
# "hwsrscha.htm","hwsrschb.htm","hwsrschc.htm","hwsrschd.htm") {
my $ua = new LWP::UserAgent;
# print "Get $url\n";
my $request = new HTTP::Request(GET => "http://sis.rpi.edu/$url");
my $response = $ua->request($request);
foreach (split /[\r\n]/, $response->content()) {
my $p = new MyParse;
$p->parse($_);
$p->eof;
}
}
print "\n";