#!/usr/bin/perl

# histogram - a utility to solve Scan of the Month No.16

# Usage: histogram [-g] -f infile > outfile
#      -g generates graphic output (html tables)


use Getopt::Std;
getopts('gf:');

sub numerically { $histogram{$b} <=> $histogram{$a}; };

sub html_header {
	print "<html>\n<head>\n\t<title>Histogram for $opt_f</title>\n</head>\n\n";
	print "<body>\n";
};

sub html_footer {
	print "</body>\n</html>\n";
};


open(INPUT, "<".$opt_f) || die "$0: unable to open $opt_f: $!\n";

$characters = 0;
while(sysread(INPUT, $char, 1)) {
	$histogram{$char} += 1;
	$characters += 1;
};

close INPUT;

&html_header if ($opt_g);

if ($opt_g) {
	print "<h1>Histogram for $opt_f</h1>\n";
	print "$characters characters read.<p>\n";
} else {	
	print "File read: $opt_f\n";
	print "Characters read: $characters\n\n";
};


if ($opt_g) {
	print "<h2>Frequency by Character</h2>\n";
	print "<table border=\"1\">\n";
        print "\t<tr>\n";
        print "\t\t<th colspan=\"2\">ASCII</th><th colspan=\"2\">Frequency</th>\n";
        print "\t</tr>\n\t<tr>\n";
        print "\t\t<th>dec</th><th>hex</th>\n";
        print "\t\t<th>abs</th><th>rel</th>\n";
        print "\t</tr>\n";
} else {
	print "Frequency by Character:\n";
};

for ($i=0;$i<=255;$i++) {
	if ($opt_g) {
		print  "\t<tr>\n";
		printf "\t\t<td align=\"right\">%d</td>\n", $i;
		printf "\t\t<td align=\"right\">%2X</td>\n", $i;
		printf "\t\t<td align=\"right\">%d</td>\n", $histogram{chr($i)};
		printf "\t\t<td align=\"right\">%f</td>\n", $histogram{chr($i)}/$characters;
		printf "\t\t<td align=\"left\"><img src=\"bar.gif\" height=\"10\" width=\"%d\"></td>\n",
			$histogram{chr($i)}/$characters*1000;
		print  "\t</tr>\n";
	} else {
		printf "%3d (%2X) : %10d (%f)\n", 
			$i, $i, $histogram{chr($i)}, $histogram{chr($i)}/$characters;
	};
};

if ($opt_g) {
	print "</table>\n";
};


if ($opt_g) {
	print "<h2>Character by Frequency</h2>\n";
	print "<table border=\"1\">\n";
        print "\t<tr>\n";
        print "\t\t<th colspan=\"2\">ASCII</th><th colspan=\"2\">Frequency</th>\n";
        print "\t</tr>\n\t<tr>\n";
        print "\t\t<th>dec</th><th>hex</th>\n";
        print "\t\t<th>abs</th><th>rel</th>\n";
        print "\t</tr>\n";
} else {
	print "\nCharacter by Frequency:\n";
};

foreach $char (sort numerically keys(%histogram)) {
	if ($opt_g) {
		print  "\t<tr>\n";
                printf "\t\t<td align=\"right\">%d</td>\n", ord($char);
                printf "\t\t<td align=\"right\">%2X</td>\n", ord($char);
                printf "\t\t<td align=\"right\">%d</td>\n", $histogram{$char};
                printf "\t\t<td align=\"right\">%f</td>\n", $histogram{$char}/$characters;
                printf "\t\t<td align=\"left\"><img src=\"bar.gif\" height=\"10\" width=\"%d\"></td>\n",
                        $histogram{$char}/$characters*1000;
                print  "\t</tr>\n";
	} else {
 		printf "%3d (%2X) : %10d (%f)\n", 
			ord($char), ord($char), $histogram{$char}, $histogram{$char}/$characters;
	};
};

if ($opt_g) {
	print "</table>\n";
};

&html_footer if ($opt_g);
