#!/usr/bin/perl -w

#
# rangelist.pl
# $Id$
#
# Copyright (C) 2016 University of Southern California.
# All rights reserved.                                            
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
#


=head1 NAME

rangelist.pl - check rangelists

=head1 SYNOPSIS

rangelist.pl [file...]

=head1 DESCRIPTION

Runs tests over tricky rangelist merging code.

=head1 OPTIONS

=over

=item B<-d>

Enable debugging output.

=item B<-v>

Enable verbose output.

=item B<--help>

Show help.

=item B<--man>

Show full manual.

=back

=cut

use strict;
use Pod::Usage;
use Getopt::Long;

use Test::More;

Getopt::Long::Configure ("bundling");
pod2usage(2) if ($#ARGV >= 0 && $ARGV[0] eq '-?');
#my(@orig_argv) = @ARGV;
my($prog) = $0;
my $debug = undef;
my $verbose = undef;
&GetOptions(
 	'help|?' => sub { pod2usage(1); },
	'man' => sub { pod2usage(-verbose => 2); },
	'd|debug+' => \$debug,   
        'v|verbose+' => \$verbose) or pod2usage(2);
pod2usage("$prog: no args allowed.\n") if ($#ARGV != -1);




#
# Take the string form of a rangelist and break it into start and end arrays.
#
sub decompose_rangelist($) {
    my($rl_str) = $_[0];
    my(@ss, @es);
    foreach (split(/,/, $rl_str)) {
	my($s, $e) = split(/-/, $_);
	die "unparsable range $_\n" if (!defined($s));
	$e //= $s;
	push(@ss, $s);
	push(@es, $e);
    };
    return (\@ss, \@es);
			}

sub min($$) {
    return $_[0] < $_[1] ? $_[0] : $_[1];
}

#
# Take two range lists (format like: 1-2,4,6-7)
# and merge them.
#
sub merge_ranges($$;$) {
    my(@rangelists) = ($_[0], $_[1]);
    my($report_overlap_as_error) = $_[2];

    #
    # fast path
    # optimize appending a simple other on a ranged one
    # merge_ranges("1-2", "3") => "1-3"
    #
    my($one, $other) = (undef, undef);
    if ($rangelists[1] =~ /^\d+$/) {
	($one, $other) = (0, 1);
    } elsif ($rangelists[0] =~ /^\d+$/) {
	($one, $other) = (1, 0);
    };
    if (defined($other) && $rangelists[$one] =~ /\-(\d+)$/) {
	# can try to fastpath
	# one:  1-2   (or more complex)
	# other:    3 (hopefully)
	my($one_e) = $1;
	my($other_s) = $rangelists[$other];
        if ($one_e == $other_s) {
            # no change needed
            if ($report_overlap_as_error) {
		print "e:overlapping-regions\t$rangelists[$one]+$other_s\n" if ($report_overlap_as_error > 1);
		return $rangelists[$one] . "/e";
	    } else {
		return $rangelists[$one];
	    };
        } elsif ($one_e + 1 == $other_s) {
            $rangelists[$one] =~ s/(\D?)(\d+)$/$1$other_s/;
            return $rangelists[$one];
    	};
	# fall through for slow path
    };

    #
    # slow path
    #
    # Decompose comma-separated list into array of ranges (start and ends).
    #    
    my(@ss, @es);
    foreach (0..1) {
        ($ss[$_], $es[$_]) = decompose_rangelist($rangelists[$_]);
    };

    #
    # Count how many lists each range occurs in.
    # If there is overlap, make more intermediate ranges.
    #
    # On exit of this loop, we have ONE rangelist in an array, plus counts.
    #
    # (And ick: this code is ALL corner cases.)
    #
    my(@count, @s, @e);
  buildcount:
    while (1) {
	#
	# Check if either side has drained.
	#
	foreach $one (0, 1) {
	    # print "checking $one for emtpy, is $#{$ss[$one]}\n";
	    $other = 1 - $one;
	    if ($#{$ss[$one]} == -1) {
		push(@count, (1) x ($#{$ss[$other]} + 1));
		push(@s, @{$ss[$other]});
		push(@e, @{$es[$other]});
		last buildcount;
	    };
	};
	#
	# assert(have stuff left in both)
	#
	# Make $one be the one the starts first
	# (so we only have a million cases, not four million.)
	#
	my($new_count) = 1;
	if ($ss[0][0] < $ss[1][0]) {
	    ($one, $other) = (0, 1);
	} elsif ($ss[0][0] > $ss[1][0]) {
	    ($one, $other) = (1, 0);
	} else {
	    # both start at same time
	    $new_count = 2;
	    # $one becomes the one that ends first
	    if ($es[0][0] <= $es[1][0]) {
		($one, $other) = (0, 1);
	    } else {
		($one, $other) = (1, 0);
	    };
	};
	#
	# assert($lists[$one] starts first (or at same time))
	#
	my($consume_one) = undef;
	push(@count, $new_count);
	push(@s, $ss[$one][0]);
	if ($ss[$one][0] < $ss[$other][0]) {
	    # one starts first
	    if ($es[$one][0] < $ss[$other][0]) {
		# and ends before other
		# +----+
		#         +----+
		# or abutts other (in which case we will merge later)
		# +----+
		#       +----+
		push(@e, $es[$one][0]);
		$consume_one = 1;
	    } elsif ($es[$one][0] >= $ss[$other][0]) {
		# and overlaps with other
		# +----+
		#      +----+
		# or
		# +----+
		#    +----+
		push(@e, $ss[$other][0]-1);
		$ss[$one][0] = $ss[$other][0];
		$consume_one = 0;
	    } else {
		die "invariant violated: one $one starts first\n";
	    };
	} elsif ($ss[$one][0] == $ss[$other][0]) {
	    # start at same time
	    push(@e, $es[$one][0]);
	    $consume_one = 1;
	    if ($es[$one][0] < $es[$other][0]) {
		# but one ends first
		# +----+
		# +--------+
		$ss[$other][0] = $es[$one][0]+1;
	    } elsif ($es[$one][0] == $es[$other][0]) {
		# complete overlap
		# +----+
		# +----+
		#
		# so also consume other here:
		shift @{$ss[$other]};
		shift @{$es[$other]};
	    } else {
		die "invariant violated: one $one and other $other start at same time and other ends first\n";
	    };
	} else {
	    die "invariant violated: one $one starts after other $other\n";
	};
	if ($consume_one) {
	    shift @{$ss[$one]};
	    shift @{$es[$one]};
	};
    };

    #
    # We now have a clean, single rangelist in an array, with counts.
    #
    # Now concatinate adjacent ranges and report overlap.
    #
    my($out, $error_out) = ("", "");
    while ($#s != -1) {
	if ($count[0] == 2) {
	    if ($report_overlap_as_error) {
		print "e:overlapping-regions\t$s[0]-$e[0]\n" if ($report_overlap_as_error > 1);
		$error_out = "/e";
	    };
	};
	# merge?
	if ($#s >= 1) {
	    if ($e[0]+1 >= $s[1]) {
		$s[1] = $s[0];
		shift @count;
		shift @s;
		shift @e;
		# no output
		next;
	    };
	};
	$out .= "," if ($out ne "");
	$out .= ($s[0] == $e[0] ? $s[0] : $s[0] . "-" . $e[0]);
	shift @count;
	shift @s;
	shift @e;
    };
    return $out . $error_out;
}


my @tests = (
    [ "1", "", "1", "", "pass" ],
    [ "1-1", "", "1", "", "accumulate range" ],
    [ "1", "2", "1-2", "", "merge adjacent, fastpath" ],
    [ "1-2", "3", "1-3", "", "merge long adjacent, fastpath" ],
    [ "1", "1", "1", "/e", "no change, fastpath" ],
    [ "1-2", "2", "1-2", "/e", "no change, long, fastpath" ],
    [ "1", "3", "1,3", "", "keep separate non-adjacent" ],
    [ "1-2", "3-4", "1-4", "", "merge adjacent long" ],
    [ "1-2", "4-5", "1-2,4-5", "", "keep separate non-adajcent" ],
    [ "1-2,4-5", "3", "1-5", "", "hole fill" ],
    [ "1-2,5-6", "3-4", "1-6", "", "hole fill long" ],
    #
    [ "1-6", "1-2", "1-6", "/e", "overlap head" ],
    [ "1-6", "3-4", "1-6", "/e", "overlap mid" ],
    [ "1-6", "5-6", "1-6", "/e", "overlap tail" ],
    #
    [ "1-6", "1-7", "1-7", "/e", "overlap plus head" ],
    [ "1-6", "3-7", "1-7", "/e", "overlap plus mid" ],
    [ "1-6", "6-7", "1-7", "/e", "abutt plus tail" ],
    );

foreach (@tests) {
    is(merge_ranges($_->[0], $_->[1]), $_->[2], $_->[4]);
    is(merge_ranges($_->[1], $_->[0]), $_->[2], $_->[4] . " (reverse)");
    is(merge_ranges($_->[0], $_->[1], 1), $_->[2] . $_->[3], $_->[4] . " (errorout)");
    is(merge_ranges($_->[1], $_->[0], 1), $_->[2]. $_->[3], $_->[4] . " (errorout, reverse)");
};
done_testing();
