#!/usr/bin/perl -w

# decomp_xref_jumps : A decomp filter to cross reference jump instructions.
# Copyright (C) 2002 Dion Mendel
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.

# decomp_xref_jumps visually links jumps with the destination address
# This makes reading the dump much easier.
#
# Sample usage:
# % gendump the-binary | decomp_xref_jumps > dump

use Getopt::Long "GetOptions";
use FileHandle;
use strict 'vars';
use vars '$VERSION', '$Verbose';

$VERSION = "1.0";                            # version of this program
$Verbose = 1;


&parse_command_line_for_options();
&usage if (scalar @ARGV != 0);

# do the filtering
&filter();


exit 0;


################################ Obj Functions ################################

# -----------------------------------------------------------------------------
# Creates a new jump object.  An jump object is a representation of the
# end points of a jump instruction.
# Params: $end1, $end2 - the two endpoints of the jump
# Returns: reference to an object

sub new_jump($$)
{
   my ($end1, $end2) = @_;
   my ($a, $b);

   if ($end1 < $end2) {
      ($a, $b) = ($end1, $end2);
   }
   else {
      ($a, $b) = ($end2, $end1);
   }

   return { start => $a, end => $b };
}

# -----------------------------------------------------------------------------
# Returns whether the two given jump objects intersect with each other or not.
# Params: $obj1, $obj2 objects to test
# Returns: 1 if the objects intersect, 0 otherwise.

sub jumps_intersect($$)
{
   my ($a, $b) = @_;
   my ($j1, $j2);

   # order so that the start of $j1 is before the start of $j1
   if ($a->{start} < $b->{start}) {
      ($j1, $j2) = ($a, $b);
   }
   else {
      ($j1, $j2) = ($b, $a);
   }

   # intersection occurs if the start of $j2 is before the end of $j1
   return ($j2->{start} < $j1->{end});
}

# -----------------------------------------------------------------------------
# For the given object and a list of objects, return the first index of
# an object in the list which intersects with the given object.
# Params: $obj - object to check
#         \@list - list of objects
# Returns: index of intersecting object, or -1 if there are no intersections.

sub intersection_index($\@)
{
   my ($jmp, $ref_list) = @_;
   my ($i, $index);

   $index = -1;
   for ($i = 0; $i < scalar @$ref_list; $i++) {
      if (&jumps_intersect($jmp, $ref_list->[$i])) {
         $index = $i;
         last;
      }
   }

   return $index;
}

# -----------------------------------------------------------------------------
# For the given object and a list of objects, returns whether the given object
# intersects with any of the objects in the list.
# Params: $obj - object to check
#         \@list - list of objects
# Returns: 0 if no intersections, 1 if there exists an intersection.

sub intersects($\@)
{
   my ($jmp, $ref_list) = @_;

   return (&intersection_index($jmp, $ref_list) != -1);
}

############################### Filter Functions ##############################

# Note:  the jump list is a list of arrays.  Each array contains jump objects
# that overlap with each other.

sub add_jump($\@)
{
   my ($jmp, $ref_jumps) = @_;
   my ($i, $ref_list);

   my @matches = ();

   for ($i = 0; $i < scalar @$ref_jumps; $i++) {
      if (&intersects($jmp, $ref_jumps->[$i])) {
         # needed so that matches is in descending order
         unshift @matches, $i;
      }
   }

   # create a new list consisting of the element to add
   my @new_list = ($jmp);

   # append all matching jumps
   foreach $i (@matches) {
      # remove list from jumps list, and append to new list
      $ref_list = splice(@$ref_jumps, $i, 1);
      push @new_list, @$ref_list;
   }

   # store the new list in the jumps list
   push(@$ref_jumps, \@new_list);
}

# -----------------------------------------------------------------------------
# Reads in data from stdin, performs the filter, then write result to stdout.
# Params: none
# Returns: none

sub filter()
{
   my ($i, $line, $str, $cur_offset, $jmp_end);

   my @jumps = ();           # jump endpoints
   my %offsets = ();         # key: offset, value: index into @lines
   my @lines = ();           # accumulated lines from stdin

   $i = 0;
   # accumulate each line
   while ($line = <STDIN>) {
      chomp($line);
      # skip empty lines and comment lines
      unless ($line =~ /^$/ or $line =~ /^#/ or $line =~ /^;/) {
         # get offset of current line
         $str = '0x' . substr($line, 0, 8);
         $cur_offset = oct($str);

         # remember position of this offset in the @lines array
         $offsets{$cur_offset} = $i;

         # if line contains a jump opcode
         if ($line =~ /j[a-z]+\s+(0x[0-9a-f]*)/) {
            $jmp_end = oct($1);

            # store the jump endpoints
            &add_jump(&new_jump($cur_offset, $jmp_end), \@jumps);
         }
      }

      # accumulate lines
      push @lines, $line;

      # increment counter
      $i++;
   }

   my @new_jumps = &process_jumps(\@jumps);

   my ($level, $chunk, $start, $end, $max, $line_length, $list_ref, $ref);

   my %marked_offsets = ();

   # extra processing
   foreach $chunk (@new_jumps) {
      $start = $offsets{$chunk->{min}};
      $end = $offsets{$chunk->{max}};

      # find max line length in this range
      $max = 0;
      for ($i = $start; $i <= $end; $i++) {
         $line = $lines[$i];
         unless ($line =~ /^$/ or $line =~ /^#/ or $line =~ /^;/) {
            if (length($line) > $max) {
               $max = length($line);
            }
         }
      }

      $start = $chunk->{min};
      $end = $chunk->{max};
      $list_ref = $chunk->{list};

      $line_length = $max + 2;
      if ($line_length < 45) {
         my $len = 80 - ($end - $start);
         $line_length = ($len > 45) ? $len : 45;
      }

      for ($i = $start; $i <= $end; $i++) {
         $str = "";
         foreach $level (@$list_ref) {
            $str .= &get_marker($i, $level);
         }
         $marked_offsets{$i} = [$line_length, $str];
      }
   }

   # do output
   foreach $line (@lines) {
      # skip empty lines and comment lines
      unless ($line =~ /^$/ or $line =~ /^#/ or $line =~ /^;/) {
         # get offset of current line
         $str = '0x' . substr($line, 0, 8);
         $cur_offset = oct($str);

         if (exists $marked_offsets{$cur_offset}) {
            $ref = $marked_offsets{$cur_offset};
            ($line_length, $str) = @$ref;
            printf STDOUT ("%-${line_length}s%s\n", $line, $str);
         }
         else {
            print STDOUT $line, "\n";
         }
      }
      else {
         print STDOUT $line, "\n";
      }
   }
}

# -----------------------------------------------------------------------------
# Given a list of jump objects, and an offset, determine which output
# marker should be used at that offset.
# Params: $offset - offset in question
#         \@list - list of jump objects
# Returns: marker to use.

sub get_marker($\@)
{
   my ($offset, $list_ref) = @_;
   my ($jmp, $marker);

   $marker = " ";
   foreach $jmp (@$list_ref) {
      if (($jmp->{start} == $offset) or ($jmp->{end} == $offset)) {
         $marker = "*";
         last;
      }
      if (($jmp->{start} < $offset) and ($offset < $jmp->{end})) {
         $marker = "|";
         last;
      }
   }

   return $marker;
}

################################ Jump Functions ###############################

# -----------------------------------------------------------------------------
# Processes the jump list, and returns a structure which can be used to
# nicely print the jump cross references.
# Params: \@list - jump list of jump objects
# Returns: a list containing chunks.
#   A chunk is a hash with keys 'min' 'max' 'list'
#     min and max are the extents of the jump objects in list
#     list is a list containing sublists
#       each sublist contains a number of non intersecting jump objects

sub process_jumps(\@)
{
   my ($ref_jumps) = @_;
   my ($min, $max, $el, $jmp_list, $chunk, $list_ref);
   my (@to_sort, @sorted, @jumps);

   # sort jump sections
   @to_sort = ();
   foreach $jmp_list (@$ref_jumps) {
      # find minimum starting offset in the jump list
      $min = 0xffffffff;
      foreach $el (@$jmp_list) {
         if ($el->{start} < $min) {
            $min = $el->{start};
         }
      }

      # find maximum ending offset in the jump list
      $max = 0;
      foreach $el (@$jmp_list) {
         if ($el->{end} > $max) {
            $max = $el->{end};
         }
      }

      # store min / max in preparation for sort
      push @to_sort, { min => $min, max => $max, list => $jmp_list };
   }

   # sort according to starting offset
   @sorted = sort { $a->{min} <=> $b->{min} } @to_sort;

   @jumps = ();

   # break sorted list into structure to return
   foreach $chunk (@sorted) {
      $list_ref = $chunk->{list};
      my @main_list = ();
      while (scalar @$list_ref > 0) {
         my @sub_list = &remove_level($list_ref);
         unshift @main_list, \@sub_list;
      }
      push(@jumps, { min => $chunk->{min}, max => $chunk->{max},
                     list => \@main_list });
   }

   return @jumps;
}

# -----------------------------------------------------------------------------
# Given a list of intersecting jump objects.  Remove and return a 'level'
# of objects.  A level is a list of non intersecting jump objects.
# Params: \@list - list of objects
# Returns: array of non intersecting jump objects.

sub remove_level(\@)
{
   my ($list_ref) = @_;
   my ($i, $done, $index, @level, @pending);

   @level = ();

   # remove largest jump object and store in @level
   $index = &index_of_largest($list_ref);
   push @level, splice(@$list_ref, $index, 1);

   # keep looping until done
   $done = 0;
   while (not $done) {

      @pending = ();

      # get indexes of all jump object which do not intersect with @level
      for ($i = 0; $i < scalar @$list_ref; $i++) {
         if (! &intersects($list_ref->[$i], \@level)) {
            push @pending, $i;
         }
      }

      # were there any non intersecting jump objects?
      if (scalar @pending > 0) {
         # add largest non intersecting jump to @level
         my @list = map { $list_ref->[$_] } @pending;
         $index = &index_of_largest(\@list);
         push @level,  splice(@$list_ref, $pending[$index], 1);
      }
      else {
         # all object intersect, so we are done
         $done = 1;
      }

   }

   return @level;
}

# -----------------------------------------------------------------------------
# For the given list of jump objects, return the index of the jump object with
# the largest range.
# Params: \@list - list of objects
# Returns: index of object with largest range.

sub index_of_largest(\@)
{
   my ($list_ref) = @_;
   my ($i, $max, $diff, $index);

   # find index of jump object with largest range
   $max = 0;
   $index = 0;
   for ($i = 0; $i < scalar @$list_ref; $i++) {
      $diff = $list_ref->[$i]->{end} - $list_ref->[$i]->{start};
      if ($diff > $max) {
         $max = $diff;
         $index = $i;
      }
   }

   # return the index of the largest
   return $index;
}

############################### Usage Functions ###############################

# -----------------------------------------------------------------------------
# Parses the command line for any specified options.  Sets the appropriate
# option flags if options are specified.  Prints usage info if invalid options
# are given.
# Returns: nothing

sub parse_command_line_for_options()
{
   my ($want_quiet)   = 0;
   my ($want_version) = 0;
   my ($want_help)    = 0;

   &GetOptions("q|quiet"   => \$want_quiet,
               "V|version" => \$want_version,
               "h|help"    => \$want_help,
              );

   if ($want_version) {
      print "$0 $VERSION\n";
      exit 0;
   }

   if ($want_help) {
      &usage();
   }

   $Verbose = !$want_quiet;
}

# -----------------------------------------------------------------------------
# Prints a nice usage message to stdout, and then exits.

sub usage()
{
   print <<"_END";

$0 v${VERSION}
A decomp filter to cross reference jumps with the destination address
Reads in the current dump from stdin and writes the modified dump to stdout.

Usage: $0 [options]

Options:
    -V, --version         outputs version information and exits
    -h, --help            displays this help and exits

_END

   exit 1;
}

