#!/usr/bin/perl -w
###############################################################################
#    Copyright (C) 2002-2004 by Eric Gerbier
#    Bug reports to: gerbier@users.sourceforge.net
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
###############################################################################

use strict;

use LWP::UserAgent;
use HTTP::Request;
use HTTP::Response;

my $fic = $ARGV[0];

die "fichier $fic absent : $!\n" unless ( -f $fic );

open( BOOK, '<', $fic ) or die "can not open $fic : $!\n";

my $ua = LWP::UserAgent->new();
$ua->agent("perlzilla/v9.9");

my $nb_url  = 0;
my $nb_skip = 0;
my $nb_bad  = 0;
my @folder_stack;
my %code;
my %errors;
while (<BOOK>) {
	if (/<DT><H3 ADD_DATE=.*>(.*?)<\/H3>/) {
		my $fold = $1;
		push( @folder_stack, $fold );
	}
	elsif (m/<\/DL>/) {
		pop(@folder_stack);
	}
	elsif (m/HREF="(.*)" ADD_DATE=.*>(.*)<\/A>/) {
		my $url   = $1;
		my $title = $2;
		print "@folder_stack title = $title url = $url ";

		if (   ( $url =~ m/^https:/ )
			or ( $url =~ m/^file:/ )
			or ( $url =~ m/^mailto/ )
			or ( $url =~ m/^ftp:/ ) )
		{
			$nb_skip++;
			print " SKIP\n";
		}
		else {

			my $req = HTTP::Request->new( GET => $url );
			$req->referer("http://www.perlzilla.org");
			my $reponse = $ua->request($req);
			if ( $reponse->is_error() ) {
				my $ret_code = $reponse->code();
				$code{$ret_code}++;
				push (@{ $errors{$ret_code} }, $url);
				print " WARNING " . $reponse->status_line() . "\n";
				$nb_bad++;
			}
			else {
				print " OK\n";
			}
			$nb_url++;
		}
	}
}
close BOOK;

# statistiques
print "nombre d'url sautees : $nb_skip\n";
print "nombre d'url testees : $nb_url\n";
print "nombre de problemes  : $nb_bad\n";

foreach my $key (keys %code) {
	print "erreur $key : $code{$key}\n";
	foreach my $i ( 0 .. $#{ $errors{$key} } ) {
		print "  " . $errors{$key}[$i]  . "\n";
	}
}
