#!/usr/bin/env perl

use warnings;
use strict;
use Data::Dumper;

my($version) = "1.0a";

############################################################################
# Set the default values for input, output, and authority files
############################################################################
# Flag variables are 0 if FALSE and 1 if TRUE
my($progname) = $0;
my($infname) = "intree";
my($infnmread) = 0; # Flag indicating infile name has been passed from command line
my($outfname) = "outtree";
my($outfnmread) = 0; # Flag indicating outfile name has been passed from command line
my($authfname) = "authority.txt";
my($authfnmread) = 0; # Flag indicating authority file name has been passed from command line
my($verbose) = 0; # Flag for verbose mode
# Iterator variables
my($iter);
my($jter);
my($kter);
my($lter);

############################################################################
# read the filenames from the command line
############################################################################
if ( @ARGV == 0 ) {
	print "No command line arguments were passed to $progname.\n";
	print "Default filenames will be used.\n";
	&usage($progname,$version);
} # if ( @ARGV == 0...
else {
	for (my $cmdit=0; $cmdit<@ARGV; $cmdit++) {
	#	print "$cmdit --> $ARGV[$cmdit]\n"; # echo command line arguments
		if ( lc($ARGV[$cmdit]) eq "-i" ) { # pass input file name
			unless ( $cmdit == @ARGV ) {
				$cmdit++;
				$infname = $ARGV[$cmdit];
				if ($infnmread == 1) {
					print "ERROR - multiple infile names were passed.\n\n";
					&usage($progname,$version);
					exit;
				}
				$infnmread = 1; # set the flag indicating an infile name has been read
			}
		}
		elsif ( lc($ARGV[$cmdit]) eq "-o" ) { # pass output file name
			unless ( $cmdit == @ARGV ) {
				$cmdit++;
				$outfname = $ARGV[$cmdit];
				if ($outfnmread == 1) {
					print "ERROR - multiple outfile names were passed.\n\n";
					&usage($progname,$version);
					exit;
				}
				$outfnmread = 1; # set the flag indicating an outfile name has been read
			}
		}
		elsif ( lc($ARGV[$cmdit]) eq "-a" ) { # pass authority file name
			unless ( $cmdit == @ARGV ) {
				$cmdit++;
				$authfname = $ARGV[$cmdit];
				if ($authfnmread == 1) {
					print "ERROR - multiple authority file names were passed.\n\n";
					&usage($progname,$version);
					exit;
				}
				$authfnmread = 1; # set the flag indicating an authority file name has been read
			}
		}
		elsif ( lc($ARGV[$cmdit]) eq "-v" ) { # pass verbose mode
			unless ( $cmdit == @ARGV ) {
				if ($verbose == 1) {
					print "ERROR - verbose mode switch set multiple times.\n\n";
					&usage($progname,$version);
					exit;
				}
				$verbose = 1; # set the flag for verbose mode
			}
		}
		else {
			unless ( lc($ARGV[$cmdit]) eq "--help" ) {
				print "Unknown command $ARGV[$cmdit] was passed. This command was ignored.\n";
			}
			&usage($progname,$version);
			if ( lc($ARGV[$cmdit]) eq "--help" ) { exit; }
		}
	} # end for (my $cmdit=0; $cmdit<@ARGV;...
} # end else... [if ( @ARGV == 0...]

############################################################################
# Echo the filenames to be used and open the log file for writing
############################################################################
if ($verbose == 1) {
	print "\n$progname version $version\n";
	print "\nRename taxa in a phylip tree:\n";
	print "  input tree     = $infname\n";
	print "  output tree    = $outfname\n";
	print "  authority file = $authfname\n\n";
}

############################################################################
# Read the authority file (list of taxon names)
############################################################################
open (my $AUTHF, $authfname) or die "Could not open file $authfname for input.\n";

my @authline = <$AUTHF>; # Read the authority file data
my @authname = ();
my $maxauthlen = 0; # Maximum length of an authority file taxon name
my @newname = ();
my $maxnewlen = 0;  # Maximum length of a new authority file taxon name
my($nauthtaxa)=$#authline + 1;
if ($verbose == 1) { print "There are $nauthtaxa taxa in the authority file.\n"; }

my($currlen);
my($difflen);

# Convert the @authline array to the @authname and @newname array
for (my $readat=0; $readat<$nauthtaxa; $readat++) {
	($authname[$readat],$newname[$readat]) = split(/\s+/, $authline[$readat]);
	if ( length($authname[$readat]) > $maxauthlen ) { $maxauthlen = length($authname[$readat]); }
	if ( length($newname[$readat]) > $maxnewlen ) { $maxnewlen = length($newname[$readat]); }
} # end for (my $readat=0; $readat<$nauthtaxa...

# Check for duplicate names
for ($iter=0; $iter<$nauthtaxa; $iter++) {
	$kter=$iter+1;
	for ($jter=$kter; $jter<$nauthtaxa; $jter++) {
		if ( $authname[$iter] eq $authname[$jter] ) {
			$lter=$jter+1;
			print "ERROR - $authname[$iter] is a duplicate name\n";
			print "  found as names $kter and $lter\n";
			exit;
		} # if ( $authname[$iter] eq $authname[$jter]...
		if ( $newname[$iter] eq $newname[$jter] ) {
			$lter=$jter+1;
			print "ERROR - $newname[$iter] is a duplicate name\n";
			print "  found as names $kter and $lter\n";
			exit;
		} # if ( $newname[$iter] eq $newname[$jter]...
	} # end for ($jter=$kter; $jter<$nauthtaxa...
} # end for ($iter=0; $iter<$nauthtaxa...

# Echo the names from the authority file if mode is "verbose"
if ($verbose == 1) {
	print "\nTaxon names in authority file (original name, new name):\n";
	for ($iter=0; $iter<$nauthtaxa; $iter++) {
		print "  $authname[$iter]";
		$currlen = length ($authname[$iter]);
		$difflen = $maxauthlen - $currlen + 2;
		for ($jter=0; $jter<$difflen; $jter++) {
			print " ";
		}
		print "$newname[$iter]\n";
	}
	print "\n";
}

close($AUTHF) or die "Could not close file $authfname.\n";

############################################################################
# Read the infile and rename the taxa
############################################################################
open (my $INF, $infname) or die "Could not open file $infname for input.\n";

my @treeline = <$INF>; # Read the input tree
my $tree = join ('',@treeline); # Join array elements if there were returns in the tree

close($INF) or die "Could not close file $infname.\n";

for ($iter=$nauthtaxa; $iter>0; $iter--) {
	$jter=$iter-1;
	# Complete taxon names should always be followed by a ',' ':' or ')' in a newick tree
	$tree =~ s/$authname[$jter],/$newname[$jter],/g;
	$tree =~ s/$authname[$jter]\)/$newname[$jter]\)/g;
	$tree =~ s/$authname[$jter]:/$newname[$jter]:/g;
}

############################################################################
# Output the tree with the renamed taxa
############################################################################
open (my $OUTF, ">$outfname") or die "Could not open file $outfname for input.\n";
print $OUTF "$tree";
close($OUTF) or die "Could not close file $outfname.\n";

if ($verbose == 1) { print "Tree with renamed taxa written to $outfname\n\n"; }

exit;

############################################################################
# Usage subroutine - prints information about program usage
############################################################################
sub usage {
	my($pname,$ver) = @_;
	print "\n$pname version $ver\n\n";
	print "typical usage: $pname -i <intree> -o <outtree> -a <authority>\n";
	print "  This will reorder taxa using files:\n";
	print "    infile         (default is 'intree')\n";
	print "    outfile        (default is 'outtree')\n";
	print "    authority file (default is 'authority.txt')\n";
	print "  The tree should be in phylip (newick) format.\n\n";
	print "Additional command line options are:\n";
	print "  --help  = print usage information\n";
	print "  -v      = verbose mode, echo information to screen\n";
	print "\n";
}

