#!/usr/bin/perl

#
# dbrowdiff.pm
# Copyright (C) 1991-2024 by John Heidemann <johnh@isi.edu>
#
# This program is distributed under terms of the GNU general
# public license, version 2.  See the file COPYING
# in $dblibdir for details.
#

package Fsdb::Filter::dbrowdiff;

=head1 NAME

dbrowdiff - compute row-by-row differences of some column

=head1 SYNOPSIS

dbrowdiff [-B|-I|-F] [-A AbsDiffColumnName] [-P PctDiffColumnName] column

=head1 DESCRIPTION

For a given column, compute the differences between each row
of the table.  Differences are output to two new columns,
C<absdiff> and C<pctdiff>.

Differences are either relative to the previous column 
(I<incremental> mode), or relative to the first row
(I<baseline> mode), the default.
Alternatively, in I<future> mode, differences
are between the I<next> row and the current row.

If column names are given, with C<-A> or C<-P>,
then only columns with that name are produced.


=head1 OPTIONS

=over 4

=item B<-B> or B<--baseline>

Select baseline mode (the default), where differences are relative to the first row.

=item B<-I> or B<--incremental>

Select incremental mode, where differences are relative to the previous row.

=item B<-F> or B<--future>

Select future  incremental mode, where differences are
incremental between the next row and the current one.

=item B<-A> COL or B<--absdiff> COL

Name the absolute difference output column COL,
and don't output percent difference unless C<-P> is given.

=item B<-P> COL or B<--pctdiff> COL

Name the percent difference output column COL,
and don't output absolute difference unless C<-P> is given.

=item B<-f FORMAT> or B<--format FORMAT>

Specify a L<printf(3)>-style format for output statistics.
Defaults to C<%.5g>.

=item B<-e> EmptyValue or B<--empty>

Specify the value for the last row when in future mode.

=back


=for comment
begin_standard_fsdb_options

This module also supports the standard fsdb options:

=over 4

=item B<-d>

Enable debugging output.

=item B<-i> or B<--input> InputSource

Read from InputSource, typically a file name, or C<-> for standard input,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.

=item B<-o> or B<--output> OutputDestination

Write to OutputDestination, typically a file name, or C<-> for standard output,
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.

=item B<--autorun> or B<--noautorun>

By default, programs process automatically,
but Fsdb::Filter objects in Perl do not run until you invoke
the run() method.
The C<--(no)autorun> option controls that behavior within Perl.

=item B<--help>

Show help.

=item B<--man>

Show full manual.

=back

=for comment
end_standard_fsdb_options


=head1 SAMPLE USAGE

=head2 Input:

    #fsdb      event   clock:d
    _null_getpage+128       815812813.281756
    _null_getpage+128       815812813.328709
    _null_getpage+128       815812813.353830
    _null_getpage+128       815812813.357169
    _null_getpage+128       815812813.375844
    _null_getpage+128       815812813.378358
    #  | /home/johnh/BIN/DB/dbrow 
    #  | /home/johnh/BIN/DB/dbcol event clock

=head2 Command:

    cat DATA/kitrace.fsdb | dbrowdiff clock

=head2 Output:

    #fsdb      event   clock:d   absdiff:d pctdiff:d
    _null_getpage+128       815812813.281756        0       0
    _null_getpage+128       815812813.328709        0.046953        5.7554e-09
    _null_getpage+128       815812813.353830        0.072074        8.8346e-09
    _null_getpage+128       815812813.357169        0.075413        9.2439e-09
    _null_getpage+128       815812813.375844        0.094088        1.1533e-08
    _null_getpage+128       815812813.378358        0.096602        1.1841e-08
    #  | /home/johnh/BIN/DB/dbrow 
    #  | /home/johnh/BIN/DB/dbcol event clock
    #  | dbrowdiff clock


=head1 SEE ALSO

L<Fsdb>.
L<dbcolmovingstats>.
L<dbrowuniq>.
L<dbfilediff>.

L<dbrowdiff>, L<dbrowuniq>, and L<dbfilediff> are similar but different.
L<dbrowdiff> computes row-by-row differences for a column,
L<dbrowuniq> eliminates rows that have no differences,
and L<dbfilediff> compares fields of two files.


=head1 CLASS FUNCTIONS

=cut

@ISA = qw(Fsdb::Filter);
$VERSION = 2.0;

use strict;
use Pod::Usage;
use Carp;

use Fsdb::Filter;
use Fsdb::IO::Reader;
use Fsdb::IO::Writer;


=head2 new

    $filter = new Fsdb::Filter::dbrowdiff(@arguments);

Create a new dbrowdiff object, taking command-line arguments.

=cut

sub new ($@) {
    my $class = shift @_;
    my $self = $class->SUPER::new(@_);
    bless $self, $class;
    $self->set_defaults;
    $self->parse_options(@_);
    $self->SUPER::post_new();
    return $self;
}


=head2 set_defaults

    $filter->set_defaults();

Internal: set up defaults.

=cut

sub set_defaults ($) {
    my($self) = @_;
    $self->SUPER::set_defaults();
    $self->{_format} = "%.5g";
    $self->{_mode} = 'B';
    $self->{_absdiff_column} = undef;
    $self->{_pctdiff_column} = undef;
}

=head2 parse_options

    $filter->parse_options(@ARGV);

Internal: parse command-line arguments.

=cut

sub parse_options ($@) {
    my $self = shift @_;

    my(@argv) = @_;
    $self->get_options(
	\@argv,
 	'help|?' => sub { pod2usage(1); },
	'man' => sub { pod2usage(-verbose => 2); },
	'autorun!' => \$self->{_autorun},
        'A|absdiff=s' => \$self->{_absdiff_column},
	'B|baseline' => sub { $self->{_mode} = 'B'; },
	'close!' => \$self->{_close},
	'd|debug+' => \$self->{_debug},
	'e|empty=s' => \$self->{_empty},
	'f|format=s' => \$self->{_format},
	'F|future' => sub { $self->{_mode} = 'F'; },
	'i|input=s' => sub { $self->parse_io_option('input', @_); },
	'I|incremental' => sub { $self->{_mode} = 'I'; },
	'log!' => \$self->{_logprog},
	'o|output=s' => sub { $self->parse_io_option('output', @_); },
        'P|pctdiff=s' => \$self->{_pctdiff_column},
	) or pod2usage(2);
    $self->parse_target_column(\@argv);
}

=head2 setup

    $filter->setup();

Internal: setup, parse headers.

=cut

sub setup ($) {
    my($self) = @_;

    pod2usage(2) if (!defined($self->{_target_column}));

    $self->finish_io_option('input', -comment_handler =>
                            ($self->{_mode} eq "F" ?
                             $self->create_delay_comments_sub :
                             $self->create_pass_comments_sub));

    $self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
    croak($self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n")
	if (!defined($self->{_target_coli}));

    my(@new_colnames);
    if (defined($self->{_absdiff_column} && defined($self->{_pctdiff_column}))) {
        @new_colnames = ($self->{_absdiff_column}, $self->{_pctdiff_column});
    } elsif (defined($self->{_absdiff_column})) {
        @new_colnames = ($self->{_absdiff_column}, undef);
    } elsif (defined($self->{_pctdiff_column})) {
        @new_colnames = (undef, $self->{_pctdiff_column});
    } else {
        $self->{_absdiff_column} = 'absdiff';
        $self->{_pctdiff_column} = 'pctdiff';
        @new_colnames =  qw(absdiff pctdiff);
    };
            
    $self->finish_io_option('output', -clone => $self->{_in}, -outputheader => 'delay');
    my($destination_type) = ($self->{_in}->col_type_is_numeric($self->{_target_column}) == 1 ? 'q' : 'd');
    foreach (@new_colnames) {
        next if (!defined($_));
	$self->{_out}->col_create("$_:$destination_type")
	    or croak($self->{_prog} . ": cannot create column $_ (maybe it already existed?)\n");
    };
}

=head2 run

    $filter->run();

Internal: run over each rows.

=cut
sub run ($) {
    my($self) = @_;

    my $read_fastpath_sub = $self->{_in}->fastpath_sub();
    my $write_fastpath_sub = $self->{_out}->fastpath_sub();

    my $target_coli = $self->{_target_coli};
    my $absdiff_coli = defined($self->{_absdiff_column}) ? $self->{_out}->col_to_i($self->{_absdiff_column}) : undef;
    my $pctdiff_coli = defined($self->{_pctdiff_column}) ? $self->{_out}->col_to_i($self->{_pctdiff_column}) : undef;
    my $format = $self->{_format};
    my $incremental_mode = ($self->{_mode} eq 'I');
    my $future_mode = ($self->{_mode} eq 'F');

    my $base;
    my $absdiff;
    my $pctdiff;
    my $fref = undef;
    my $next_fref;

    if ($future_mode) {
        my($last_fref) = undef;
        my $input_delay_comments = $self->{_delay_comments}[0];
        while ($fref = &$read_fastpath_sub()) {
            if (!defined($last_fref)) {
                $last_fref = $fref;
                next;
            };
            $absdiff = $fref->[$target_coli] - $last_fref->[$target_coli];
            $pctdiff = ($absdiff / $last_fref->[$target_coli]) * 100.0;
            # emit one behind
            if (defined($absdiff_coli)) {
                $last_fref->[$absdiff_coli] = sprintf("$format", $absdiff);
            };
            if (defined($pctdiff_coli)) {
                $last_fref->[$pctdiff_coli] = sprintf("$format", $pctdiff);
            };
            &$write_fastpath_sub($last_fref);
            $input_delay_comments->flush($self->{_out});
            $last_fref = $fref;
        };
        # output saved row, if any
        if (defined($last_fref)) {
            if (defined($absdiff_coli)) {
                $last_fref->[$absdiff_coli] = $self->{_empty};
            };
            if (defined($pctdiff_coli)) {
                $last_fref->[$pctdiff_coli] = $self->{_empty};
            };
            &$write_fastpath_sub($last_fref)
        };
    } else {
        # non-future mode
        while ($fref = &$read_fastpath_sub()) {
            if (!defined($base)) {
                $absdiff = $pctdiff = 0.0;
                $base = $fref->[$target_coli];
            } else {
                $absdiff = $fref->[$target_coli] - $base;
                $pctdiff = ($absdiff / $base) * 100.0 if ($base != 0);
            };
            if (defined($absdiff_coli)) {
                $fref->[$absdiff_coli] = sprintf("$format", $absdiff);
            };
            if (defined($pctdiff_coli)) {
                if ($base == 0) {
                    $fref->[$pctdiff_coli] = $self->{_empty};
                } else {
                    $fref->[$pctdiff_coli] = sprintf("$format", $pctdiff);
                };
            };
            $base = $fref->[$target_coli] if ($incremental_mode);
            &$write_fastpath_sub($fref);
        };
    };
}

=head1 AUTHOR and COPYRIGHT

Copyright (C) 1991-2024 by John Heidemann <johnh@isi.edu>

This program is distributed under terms of the GNU general
public license, version 2.  See the file COPYING
with the distribution for details.

=cut

1;
