#!/usr/bin/perl -w

# Copyright 2013, Naoki Takebayashi <ntakebayashi@alaska.edu>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Version: 20130612

my $usage="\nUsage: $0 [-hrg] fastaFileName1 fastaFileName2 ...\n".
    "  -h: help\n".
    "  -g: remove gaps '-' from the sequence\n".
    "Concatenate FASTA sequences from several files.  If multiple files are \n".
    "given, sequences in all files are concatenated.\n";

our($opt_h, $opt_g, $opt_r);

use Bio::SeqIO;

use Getopt::Std;
getopts('hgr') || die "$usage\n";
die "$usage\n" if (defined($opt_h));

my $format = "fasta";
my @seqArr = ();

die "ERROR: give at least two fasta files\n$usage\n" unless (@ARGV >= 2);
my $numFiles = scalar(@ARGV);

while (my $file = shift) {
    my $seqio_obj = Bio::SeqIO->new(-file => $file, -format => $format);
    my @arrFromThisFile = ();
    while (my $seq = $seqio_obj->next_seq()) {
	push(@arrFromThisFile, $seq);
	my $na = $seq->id();
    }

#    if (defined($opt_r)) {
#	@arrFromThisFile = sort { - ($a->id() cmp $b->id()) } @arrFromThisFile;
#    } else {
#	@arrFromThisFile = sort { $a->id() cmp $b->id() } @arrFromThisFile;
#    }

    push @seqArr, \@arrFromThisFile;
}

my @numSeqArr = ();  # number of sequences in each file
my @maxSeqLenArr = (); # max lengths of sequences for each file
for my $fileNum (0..($numFiles -1)) {
    push @numSeqArr, scalar(@{$seqArr[$fileNum]});

    push @maxSeqLenArr, MaxSeqLen(@{$seqArr[$fileNum]});

#    for my $j (0..5) {
#	my $s = $seqArr[$fileNum][$j]->id();
#	print "$k $j: $s\n";
#    }

}

# Can do more fancy stuff around here, but this is ok for now

my @result = @{$seqArr[0]};  # take the seq from the first file
foreach my $fileNum (1..($numFiles-1)) {
    foreach my $s (0..($numSeqArr[$fileNum] - 1)) {
	my $thisSeq = $result[$s]->seq() . $seqArr[$fileNum][$s]->seq();
	$result[$s]->seq($thisSeq) ;
    } 
}


my $seqOut = Bio::SeqIO->new(-fs => \*STDOUT, -format => $format);
foreach my $s (@result) {
    if(defined($opt_g)) {
	my $tmp = $s->seq();
	$tmp =~ s/-//g;
	$s->seq($tmp);
    }
    $seqOut->write_seq($s);
}


#print (join " ", @maxSeqLenArr, "\n");

exit;

sub MaxSeqLen {
    my $max = -1;
    foreach my $s (@_) {
	my $len = $s->length;
	$max = ($len > $max) ? $len: $max;
    }
    return $max;
}





