#!/usr/bin/env python

from pyPheWAS.pyPhewasCorev2 import *
import pandas as pd
import sys, os
from pathlib import Path
import os.path as osp
import time
import argparse
import math


def parse_args():
    parser = argparse.ArgumentParser(description="pyPheWAS group/phenotype files merging tool")

    parser.add_argument('--phenotypefiles', required=False, default=None, type=str, help='Names of input phenotype files separated by +')
    parser.add_argument('--groupfiles', required=False, default=None, type=str, help='Names of input group files separated by +')
    parser.add_argument('--phenotypeout', required=False, default=None, type=str, help ='Name of the output phenotype file')
    parser.add_argument('--groupout', required=False, default=None, type=str, help='Name of the output group file')
    parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files (default = current directory)')

    args = parser.parse_args()
    return args

"""
Print Start Message
"""
start = time.time()
print_start_msg()
print('\nmergeGroups: Group/Phenotype File Merging Tool\n')


"""
Retrieve and validate all arguments.
"""
args = parse_args()

kwargs = {
	'phenotypefiles':args.phenotypefiles,
	'groupfiles':args.groupfiles,
	'phenotypeout':args.phenotypeout,
	'groupout':args.groupout,
	'path':Path(args.path),
}

# assert that valid input combination was given
assert (kwargs['phenotypefiles'] is not None) or (kwargs['groupfiles'] is not None), "No files were provided to merge"

# Print Arguments
display_kwargs(kwargs)
# Make all arguments local variables
locals().update(kwargs)

# Assert that valid files were given
n_phenf = 0 # count number of files
if phenotypefiles is not None:
	for filename in phenotypefiles.split('+'):
		assert filename.endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (filename)
		assert osp.exists(path/filename), "phenotype file (%s) does not exist" % (path/filename)
		n_phenf += 1
		
n_groupf = 0 # count number of group files
if groupfiles is not None:
	for filename in groupfiles.split('+'):
		assert filename.endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (filename)
		assert osp.exists(path/filename), "group file (%s) does not exist" % (path/filename)
		n_groupf += 1

# Assert that the output files are valid
if phenotypefiles is not None:
	assert phenotypeout is not None, "Please provide a filename for the merged phenotype data"
	assert phenotypeout.endswith('.csv'), "%s is not a valid output file, must be a .csv file" % phenotypeout
	
if groupfiles is not None:
	assert groupout is not None, "Please provide a filename for the merged group data"
	assert groupout.endswith('.csv'), "%s is not a vailid output file, must be a .csv file" % groupout

"""
Read & Merge the group and/or phenotype files
"""
if phenotypefiles is not None:
	print('Reading %d phenotype files' %n_phenf)
	phensDF = [pd.read_csv(path/filename) for filename in phenotypefiles.split('+')]
	print('Merging phenotype files')
	phen_merged = pd.concat(phensDF)
	phen_merged.drop_duplicates(inplace=True)
	print('Writing merged phenotype data to %s' %(path / phenotypeout))
	phen_merged.to_csv(path / phenotypeout, index=False)

if groupfiles is not None:
	print('Reading %d group files' %n_groupf)
	groupsDF = [pd.read_csv(path/filename) for filename in groupfiles.split('+')]
	print('Merging group files')
	group_merged = pd.concat(groupsDF)
	group_merged.drop_duplicates('id', inplace=True)
	print('Writing merged group data to %s' % (path / groupout))
	group_merged.to_csv(path / groupout, index=False)



"""
Calculate runtime
"""
interval = time.time() - start
hour = math.floor(interval/3600.0)
minute = math.floor((interval - hour*3600)/60)
second = math.floor(interval - hour*3600 - minute*60)

if hour > 0:
    time_str = '%dh:%dm:%ds' %(hour,minute,second)
elif minute > 0:
    time_str = '%dm:%ds' % (minute, second)
else:
    time_str = '%ds' % second

print('createGenotypeFile Complete [Runtime: %s]' %time_str)