#!/usr/bin/env perl

use strict;
use File::Basename qw(basename);
use File::Copy qw(move);
use Getopt::Long;

my ($out, $read_length, $mode, $rna, $wrapper_only);
GetOptions(
    'out=s'         => \$out,
    'read-length=i' => \$read_length,
    'mode=s'        => \$mode,
    'rna'           => \$rna,
    'wrapper-only'  => \$wrapper_only,
)
or die "Can't parse command line arguments\n";
if (!defined $out) { die "Output file is not specified\n"; }
if (!defined $mode) { die "Mode is not specified\n"; }
if ($mode ne 'fast' and $mode ne 'slow') { die "Unknown mode\n"; }
if (!defined $read_length) { die "Read length is not specified\n"; }
if ($read_length < 1 or $read_length > 4000) { die "Read length is out of range\n"; }

my $name = basename($out);
$name =~ s/\s/_/g;

my $temp_dir = $ENV{'TMPDIR'};
if (!defined $temp_dir) { die "Environment variable 'TMPDIR' is not set\n"; }
if (!-e $temp_dir or !-d $temp_dir) { die "Can't find temporary directory\n"; }

my $temp_prefix = "$temp_dir/$name.fastqz-c";
my $temp_fasta     = "$temp_prefix-fa";
my $temp_names     = "$temp_prefix-names";
my $temp_lengths   = "$temp_prefix-lengths";
my $temp_soft_mask = "$temp_prefix-soft-mask";
my $temp_n         = "$temp_prefix-n";
my $temp_iupac     = "$temp_prefix-iupac";
my $temp_seq       = "$temp_prefix-seq";
my $temp_fastq     = "$temp_prefix-fastq";

my $buf_size = 1000000;
my $buffer;

binmode STDIN;

open(my $FA, '>', $temp_fasta) or die "Can't create temporary file \"$temp_fasta\"\n";
binmode $FA;
while (read(STDIN, $buffer, $buf_size)) { print $FA $buffer; }
close $FA;

run("fasta-guess-line-length.pl '$temp_fasta' >'$out.line-length'");

my $cmd = "fasta-to-names-lengths-sequence --names '$temp_names' --lengths '$temp_lengths' <'$temp_fasta'"
          . " | sequence-soft-mask-remove --mask '$temp_soft_mask'"
          . " | sequence-n-remove --n '$temp_n'"
          . " | sequence-iupac-remove --iupac '$temp_iupac'"
          . ($rna ? ' | sequence-u2t' : '')
          . " >'$temp_seq'";
run($cmd);

run("zstd -c -1 <'$temp_names' >'$out.names'");
run("zstd -c -1 <'$temp_lengths' >'$out.lengths'");
run("zstd -c -1 <'$temp_soft_mask' >'$out.soft-mask'");
run("zstd -c -1 <'$temp_n' >'$out.n'");
run("zstd -c -1 <'$temp_iupac' >'$out.iupac'");

my $seq_length = -s $temp_seq;

my ($head_length, $tail_length);
{
    use integer;
    $tail_length = $seq_length % $read_length;
    $head_length = $seq_length - $tail_length;
}

run("tail -c $tail_length '$temp_seq' | zstd -c -1 >'$out.tail'");

if ($head_length > 0)
{
    run("head -c -$tail_length '$temp_seq' | fastq-from-sequence --seq-length $read_length --quality '#' >'$temp_fastq'");

    if ($wrapper_only)
    {
        move($temp_fastq, $out);
    }
    else
    {
        my $m = ($mode eq 'slow') ? 'c' : 'e';
        run("fastqz $m '$temp_fastq' '$out' >/dev/null 2>&1");
        unlink $temp_fastq;
        system(":>'$out'");
    }
}
else
{
    system(":>'$out'");
}

unlink $temp_fasta;
unlink $temp_names;
unlink $temp_lengths;
unlink $temp_soft_mask;
unlink $temp_n;
unlink $temp_iupac;
unlink $temp_seq;

sub run
{
    my ($cmd) = @_;
    my $error = system($cmd);
    if ($error) { die "Command failed:\n$cmd\n"; }
}
