#!/usr/bin/env perl
use strict;
use warnings;
use feature 'say';
use autodie;

use FindBin '$Bin';
use lib "$Bin/lib";

use Vnlog::Util qw(parse_options read_and_preparse_input ensure_all_legends_equivalent reconstruct_substituted_command get_key_index);




# This comes from the struct option long_options in sort.c in GNU coreutils
my @specs = ( # options with no args
              "ignore-leading-blanks|b",
              "debug",
              "dictionary-order|d",
              "ignore-case|f",
              "general-numeric-sort|g",
              "ignore-nonprinting|i",
              "merge|m",
              "month-sort|M",
              "numeric-sort|n",
              "human-numeric-sort|h",
              "version-sort|V",
              "random-sort|R",
              "reverse|r",
              "stable|s",
              "unique|u",
              "zero-terminated|z",

              # options with args
              "compress-program=s",
              "files0-from=s",
              "key|k=s@",
              "random-source=s",
              "sort=s",
              "output|o=s",
              "batch-size=s",
              "buffer-size|S=s",
              "field-separator|t=s",
              "temporary-directory|T=s",
              "parallel=s",

              # '--check' and '--check=...' are valid. '--check ...' is NOT
              # valid. parse_options handles that
              "check:s",
              "c|C",

              "vnl-tool=s",
              "help|h");

my %options_unsupported = ( 'files0-from' => <<'EOF',
Support for this could be added but has not been (yet)
EOF

                            'output'      => <<'EOF',
Can't support this. sort calls ftruncate(file), so if I write out the legend
before calling sort, this is clobbered. Or if I use a pipe, the ftruncate() will
fail and sort barfs
EOF
                            'field-separator' => <<'EOF',
vnlog is built on assuming a particular field separator
EOF
                            'zero-terminated' => <<'EOF'
vnlog is built on assuming a particular record separator
EOF
                          );

my ($filenames,$options) = parse_options(\@ARGV, \@specs, 0, <<EOF);
  $0 [options] logfile logfile logfile ... < logfile

The most common options are (from the GNU sort manpage)

  -k, --key=KEYDEF
         sort via a key. This is the vnlog field we're sorting on

  -s, --stable
         stabilize sort by disabling last-resort comparison

  -u, --unique
         with -c, check for strict ordering;
         without -c, output only the first of an equal run

  -f, --ignore-case
         fold lower case to upper case characters

  -r, --reverse
         reverse the result of comparisons

These all choose the sort order:

  -d, --dictionary-order
         consider only blanks and alphanumeric characters

  -g, --general-numeric-sort
         compare according to general numerical value

  -h, --human-numeric-sort
         compare human readable numbers (e.g., 2K 1G)

  -M, --month-sort
         compare (unknown) < 'JAN' < ... < 'DEC'

  -n, --numeric-sort
         compare according to string numerical value

  -R, --random-sort
         shuffle, but group identical keys.  See shuf(1)

  -V, --version-sort
         natural sort of (version) numbers within text

  --sort=WORD
         sort according to WORD: general-numeric -g, human-numeric -h,
         month -M, numeric -n, random -R, version -V

There's also a vnlog-specific option

  --vnl-tool tool
       Specifies the path to the tool we're wrapping. By default we wrap 'sort',
       so most people can omit this
EOF
$options->{'vnl-tool'} //= 'sort';

for my $key(keys %$options)
{
    if($options_unsupported{$key})
    {
        my $keyname = length($key) == 1 ? "-$key" : "--$key";
        die("I don't support $keyname: $options_unsupported{$key}");
    }
}

$options->{'ignore-leading-blanks'} = 1;

my $inputs = read_and_preparse_input($filenames);
ensure_all_legends_equivalent($inputs);
substitute_field_keys($options, $inputs->[0]);
my $ARGV_new = reconstruct_substituted_command($inputs, $options, [], \@specs);

say '# ' . join(' ', @{$inputs->[0]{keys}});
exec $options->{'vnl-tool'}, @$ARGV_new;


sub substitute_field_keys
{
    my ($options, $inputs) = @_;

    return unless defined $options->{key};

    # manpage of sort says that key definitions are given as
    # "F[.C][OPTS][,F[.C][OPTS]]". By default, sort works not just off the field
    # alone, but off the whole line, starting at that field, which is crazy. To
    # do the sane thing of just looking at the field you can do the after-,
    # field spec. I.e. to sort JUST on field 2 you'd say 'sort -k 2,2'. That's
    # weird, and vnl-sort always passes this syntax, and thus vnl-sort
    # doesn't support the , in -k .... vnl-sort also doesn't support the C.
    # I DO want to support OPTS, so the vnl-sort syntax is 'vnl-sort
    # -k field[.OPTS] where OPTS are just like in sort: one or more of
    # [bdfgiMhnRrV]

    my $Nkeyoptions = scalar @{$options->{key}};
    for my $i_keyoption (0..$Nkeyoptions-1)
    {
        $options->{key}[$i_keyoption] =~ /^([^\.]+)(?:\.([bdfgiMhnRrV]+?))?$/
          or die "Couldn't parse '$_' as a sort KEYDEF";

        my $key  = get_key_index($inputs, $1);
        my $opts = $2 // '';

        $options->{key}[$i_keyoption] = "$key,$key$opts";
    }
}

__END__

=head1 NAME

vnl-sort - sorts an vnlog file, preserving the legend

=head1 SYNOPSIS

 $ cat a.vnl
 # a b
 AA 11
 bb 12
 CC 13
 dd 14
 dd 123

 Sort lexically by a:
 $ <a.vnl vnl-sort -k a
 # a b
 AA 11
 CC 13
 bb 12
 dd 123
 dd 14

 Sort lexically by a, ignoring case:
 $ <a.vnl vnl-sort -k a --ignore-case
 # a b
 AA 11
 bb 12
 CC 13
 dd 123
 dd 14

 Sort lexically by a, then numerically by b:
 $ <a.vnl vnl-sort -k a -k b.n
 # a b
 AA 11
 CC 13
 bb 12
 dd 14
 dd 123

 Sort lexically by a, then numerically by b in reverse:
 $ <a.vnl vnl-sort -k a -k b.nr
 # a b
 AA 11
 CC 13
 bb 12
 dd 123
 dd 14


 Sort by month and then day:
 $ cat dat.vnl
 # month day
 March 5
 Jan 2
 Feb 1
 March 30
 Jan 21

 $ <dat.vnl vnl-sort -k month.M -k day.n
 # month day
 Jan 2
 Jan 21
 Feb 1
 March 5
 March 30


=head1 DESCRIPTION

  Usage: vnl-sort [options] logfile logfile logfile ... < logfile

This tool sorts given vnlog files in various ways. C<vnl-sort> is a
wrapper around the GNU coreutils C<sort> tool. Since this is a wrapper, most
commandline options and behaviors of the C<sort> tool are present; consult the
L<sort(1)> manpage for detail. The differences from GNU coreutils C<sort> are

=over

=item *

The input and output to this tool are vnlog files, complete with a legend

=item *

The columns are referenced by name, not index. So instead of saying

  sort -k1

to sort by the first column, you say

  sort -k time

to sort by column "time".

=item *

The fancy C<KEYDEF> spec from C<sort> is only partially supported. I only allow
us to sort by full I<fields>, so the start/stop positions don't make sense. I
I<do> support the C<OPTS> to change the type of sorting in a given particular
column. For instance, to sort by month and then by day, do this (see example
above):

  vnl-sort -k month.M -k day.n

=item *

C<--files0-from> is not supported due to lack of time. If somebody really needs
it, talk to me.

=item *

C<--output> is not supported due to an uninteresting technical limitation. The
output always goes to standard out.

=item *

C<--field-separator> is not supported because vnlog assumes
whitespace-separated fields

=item *

C<--zero-terminated> is not supported because vnlog assumes
newline-separated records

=item *

By default we call the C<sort> tool to do the actual work. If the underlying
tool has a different name or lives in an odd path, this can be specified by
passing C<--vnl-tool TOOL>

=back

Past that, everything C<sort> does is supported, so see that man page for
detailed documentation. Note that all non-legend comments are stripped out,
since it's not obvious where they should end up.

=head1 COMPATIBILITY

I use GNU/Linux-based systems exclusively, but everything has been tested
functional on FreeBSD and OSX in addition to Debian, Ubuntu and CentOS. I can
imagine there's something I missed when testing on non-Linux systems, so please
let me know if you find any issues.

=head1 SEE ALSO

L<sort(1)>

=head1 REPOSITORY

https://github.com/dkogan/vnlog/

=head1 AUTHOR

Dima Kogan C<< <dima@secretsauce.net> >>

=head1 LICENSE AND COPYRIGHT

Copyright 2018 Dima Kogan C<< <dima@secretsauce.net> >>

This library is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 2.1 of the License, or (at your option) any
later version.

=cut
