#!/usr/bin/perl -w

# Copyright 2013, Naoki Takebayashi <ntakebayashi@alaska.edu>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# Version: 20130612

my $usage = "\nUsage: $0 [-h] [-r RemoveColumnNumber] inputfile\n" .
    " Input File: tab delimited text file\n" .
    "1st column is individual name (name of directory)\n" .
    "Each column represent comma-separated cloneIDs for each allele\n";

my $consedChromatDir="chromat_dir";
my $alleleNameForRemoval="Remove";

use File::Basename;
use File::Copy;

use Getopt::Std;

getopts('hr:') || die $usage;
die $usage if (defined($opt_h));

die "$usage\nERROR: Please specify one input file describing the " .
    "cloneID -> alleleID\n"  unless (@ARGV != 1);

# purse the inputfile, and get conversion table.
my @conv = ReadInConfig($ARGV[0]);

my @updatedDir = ();  # need to run phredPhrap for these directories
# go through each individual conversion
foreach my $entry (@conv) {
    my @cloneIDs = split /\t/, $entry;

    my $dirName = shift @cloneIDs;
    next if (@cloneIDs == 0);

    # test if the directory exists
    unless ( -d $dirName) {
	warn "WARN: directory: $dirName doesn't exists, ignoring...\n";
	next;
    }
    
    # get the list of ab1 files inside
    my @ab1Files = GetAB1FileNames($dirName);
    next if (@ab1Files == 0);
    
    # assign allele destination for each filename.
    my @alleleAssignment = AssignAlleles(\@cloneIDs, \@ab1Files);
    # need error handling
    
    # prepare the destination
    my @allAlleles = ExtractUnique(@alleleAssignment);
    my $updateFlag = 0;
    foreach my $i (@allAlleles) {
	if ($i != 0 && $i != 1) {
	    SetUpDir($dirName, $i);
	    $updateFlag = 1;
	    push (@updateDir,  $dirName . "-a" . $i);  # need to be updated
	}
    }
    if ($updateFlag == 1) {
	push (@updateDir,  $dirName);  # need to update the original location
    }
    
    # actually move the files
    for my $i (0..$#ab1Files) {
	my $dest = $alleleAssignment[$i];
	my $fn = $ab1Files[$i];
	if ($dest == 0) {
	    warn "INFO: $fn didn't match with any clone name listed in " .
		"the input file.  It will not be moved\n";
	    next;
	} elsif ($dest == 1) {
	    next;
	} else { # now move the file
	    my $newFN = $dirName . "-a" . $dest . "/$consedChromatDir/" . 
		basename($fn);
	    if (-x $newFN) {
		warn "INFO: $newFN is replaced with the newer file\n";
		unlink ($newFN) || die "ERROR: Can't delete $newFN\n";
	    }
	    move($fn, $newFN) || die "ERROR: can't move $fn -> $newFN\n";
	}
    }

}

#if (@updateDir > 0) {
#    RunPhredPhrap (@updateDir);
#}

exit(0);

sub ReadInConfig {
    my $file = shift;
    
    open(IN, "<$file") || die "ERROR: Can't open $file\n";
    
    my @result = ();
    while(<IN>) {
	chomp;
	my @line = split /\t/;
	for my $i (0..$#line) {
#	    $line[$i] =~ s/^\s+//;
#	    $line[$i] =~ s/\s+$//;
	    $line[$i] =~ s/\s+//g;
	}
	my $entry = join ("\t", @line);
	push @result, $entry;
    }
    return @result;
}

sub GetAB1FileNames {
    my $dir = shift;

    $dir = $dir . "/" . $consedChromatDir;
    
    unless ( -d $dir) {
	warn "WARN: directory: $dir doesn't exists, ignoring...\n";
	return ();
    }
    
    # read in the file names
    opendir (DIR, $dir) || die "ERROR: can't open $dir\n";
    my $name;
    my @files = ();
    while (defined($name = readdir(DIR))) {
	push @files, $name ;
    }
    closedir (DIR);

    # make sure all of them have the ab1 files.
    my $numAB1files = scalar(@files);
    @files = SelectAB1Files (@files);
    $numAB1files = $numAB1files - @files;
    if ($numAB1files != 0) {
	warn "WARN: In $dir, there are $numAB1files files which are not AB1\n";
    }

    return @files;
}

# only select ab1 files
sub SelectAB1Files {
    my @result = ();
    foreach $file (@_) {
	my $id = GetIDFromFilename($file);
	if ($id eq "") {
	    warn "INFO: $file does not follow the naming convention: " .
		"templateID.type.ab1.  Ignoring ...";
	    next;
	}
	push @result, $file;
    }
    return @result;
}


sub GetIDFromFilename {
    my $name = shift;
    my $bn = basename $name;
    # "+?" is stingy match
    if ($bn =~ /^(.+?)\..*\.ab1$/) {
	return $1;
    } else {
	return "";
    }
}

sub AssignAlleles {
    my ($cloneIDArrRef, $fileArrRef) = @_;

    # First, purse the cloneID -> alleleID array
    my %alleleHash = ();
    my $alleleCnt = 1;
    foreach my $allele (@$cloneIDArrRef) {
	my @clones = split /,/, $allele;
	foreach my $cID (@clones) {
	    unless ($alleleHash{$cID}) {
		$alleleHash{$cID} = $alleleCnt;
	    } else {
		die "ERROR: check the input file. $alleleHash{$cID} is " .
		    "designeated to several alleles\n";
	    }
	}
	$alleleCnt ++;
    }

    # Go through each filename, and assign them to the allele destination
    my @result = ();
    foreach my $file (@$fileArrRef) {
	# find the potential allele destination for this file
	my @alleleID = ();
	foreach my $cID (keys(%alleleHash)) {
	    if ($file =~ /$cID/) {
		push @alleleID, $cID;
	    }
	}
	
	# make sure that there is no ambiguity in allele destination
	if (@alleleID > 1) {
	    die "ERROR: I'm not sure how to handle the filename $file.  " .
	       "It matches with several alleles: ", join(",", @alleleID), "\n";
	} elsif (@alleleID == 0) {
	    push @result, 0;
	} else {
	    push @result, $alleleID[0];
	}
    }

    my $remCol = -1;
    $remCol = $opt_r if (defined($opt_r));
    for my $i (0..$#result) {
	if ($result[$i] == $remCol) {
	    $result[$i] = $alleleNameForRemoval;
	}
    }

    return @result;
}


# take a list as the argument and extract the unique elements.
# The order of elements will not be preserved.
sub ExtractUnique {
    my %seen=();
    my @unique = ();

    foreach my $item (@_) {
        push (@unique, $item) unless $seen{$item}++;
    }
    return @unique;
}

sub MemberQ {
    my ($x, $arrRef) = @_;
    foreach my $item (@$arrRef) {
        if ($x eq $item) {
            return 1;
        }
    }
    return 0;
}

sub CntOccurrence {
    my ($x, $arrRef) = @_;
    my $cnt = 0;
    foreach my $item (@$arrRef) {
        if ($x eq $item) {
            $cnt++;
        }
    }
    return $cnt;
}

sub SetUpDir {
    my ($dirName, $alleleID) = @_;
    
    my $name = $dirName . "-a" . $alleleID;    
    if (-x $name) {
	if ( -d "$name/chromat_dir" && -d "$name/edit_dir" &&
	     -d "$name/phd_dir" ) {
	    return;  # already directory exists
	} else {
	    RenameFile($name, "old");
	}
    }
    
    MakeConsedDirStructure($name);
    return;
}

sub MakeConsedDirStructure {
    my $name = shift;
    system ("mkdir -p $name/chromat_dir");
    system ("mkdir -p $name/edit_dir");
    system ("mkdir -p $name/phd_dir");
}

sub RenameFile {
    my ($file, $toSuffix) = @_;

    my $newName = $file . "." . $toSuffix;
    if (-x $newName) {
	warn "WARN: removed $newName\n";
	system("/bin/rm -rf $newName");
    }
    move($file, $newName) || die "ERROR: move $file -> $newName failed\n";
    return
}
