#!/usr/bin/env python

from pyPheWAS.pyPhewasCorev2 import *
import sys, os
import argparse
import time
import math
from pathlib import Path
import os.path as osp

def parse_args():
    parser = argparse.ArgumentParser(description="pyPheWAS ICD-Phecode Lookup Tool")

    parser.add_argument('--phenotype', required=True, type=str, help='Name of the phenotype file (e.g. icd9_data.csv)')
    parser.add_argument('--group', required=True, type=str, help ='Name of the group file (e.g. groups.csv)')
    parser.add_argument('--reg_type', required=True, type=str, help='Type of regression that you would like to use (log, lin, or dur)')
    parser.add_argument('--path', required=False, default='.', type=str, help='Path to all input files and destination of output files')
    parser.add_argument('--outfile', required=False, default=None, type=str, help='Name of the output file for the feature matrix')
    parser.add_argument('--phewas_cov', required=False, default=None, type=str, help='PheCode to use as covariates in pyPhewasModel regression')

    args = parser.parse_args()
    return args

"""
Print Start Message
"""
start = time.time()
print_start_msg()
print('\npyPhewasLookup: ICD-Phecode Lookup Tool\n')


"""
Retrieve and validate all arguments.
"""
args = parse_args()
kwargs = {'path': Path(args.path),
		  'phenotype': args.phenotype,
		  'group': args.group,
		  'reg_type':args.reg_type,
		  'phewas_cov':args.phewas_cov,
		  'outfile':args.outfile,
}

# Assert that a valid regression type was used
assert args.reg_type in regression_map.keys(), "%s is not a valid regression type" % args.reg_type

# Assert that valid file names were given
assert kwargs['phenotype'].endswith('.csv'), "%s is not a valid phenotype file, must be a .csv file" % (kwargs['phenotype'])
assert kwargs['group'].endswith('.csv'), "%s is not a valid group file, must be a .csv file" % (kwargs['group'])
# Assert that valid files were given
assert osp.exists(kwargs['path'] / kwargs['phenotype']), "%s does not exist" %(kwargs['path'] / kwargs['phenotype'])
assert osp.exists(kwargs['path'] / kwargs['group']), "%s does not exist" %(kwargs['path'] / kwargs['group'])

# Assign the output file if none was assigned
if kwargs['outfile'] is None:
    kwargs['outfile'] = "feature_matrix_" + kwargs['group']

# Assert that the output file is valid
assert kwargs['outfile'].endswith('.csv'), "%s is not a valid outputfile, must be a .csv file" % (kwargs['outfile'])

# Print Arguments
display_kwargs(kwargs)
# Make all arguments local variables
locals().update(kwargs)

"""
Calculate feature matrix
"""
print("Retrieving phenotype data...")
phenotypes = get_icd_codes(path, phenotype, regression_map[reg_type])

print("Retrieving group data...")
genotypes = get_group_file(path, group)

if not {'MaxAgeAtVisit'}.issubset(genotypes.columns):
	print('WARNING: MaxAgeAtVisit was not found in group file. Calculating MaxAgeAtVisit from phenotype data')
	phenotypes['MaxAgeAtVisit'] = phenotypes.groupby(['id'])['AgeAtICD'].transform('max')
	genotypes = pd.merge(genotypes, phenotypes[['id','MaxAgeAtVisit']].drop_duplicates(subset='id'),on='id',how='left')

print("Generating feature matrix...")
fm,columns = generate_feature_matrix(genotypes, phenotypes, regression_map[reg_type], 'ICD', phewas_cov)

"""
Save feature matrix
"""
print("Saving feature matrices to %s" % (path /('*_' + outfile)))
h = ','.join(columns)

np.savetxt(path /('agg_measures_' + outfile), fm[0],delimiter=',',header=h)
print("...")
np.savetxt(path /('icd_age_' + outfile), fm[1],delimiter=',',header=h)

if phewas_cov is not None:
	# only save this if it actually means something
	print("...")
	np.savetxt(path /('phewas_cov_' + outfile), fm[2],delimiter=',',header=h)

"""
Calculate runtime
"""
interval = time.time() - start
hour = math.floor(interval/3600.0)
minute = math.floor((interval - hour*3600)/60)
second = math.floor(interval - hour*3600 - minute*60)

if hour > 0:
    time_str = '%dh:%dm:%ds' %(hour,minute,second)
elif minute > 0:
    time_str = '%dm:%ds' % (minute, second)
else:
    time_str = '%ds' % second

print('pyPhewasLookup Complete [Runtime: %s]' %time_str)

