#!/usr/bin/env python3

"""READemption - A RNA-seq Analysis Pipeline"""

import argparse
from reademptionlib.controller import Controller

__author__ = "Konrad Foerstner <konrad@foerstner.org>, Till Sauerwein <sauerwein@zbmed.de>"
__copyright__ = "2011-2022 by Konrad Foerstner <konrad@foerstner.org, , Till Sauerwein <sauerwein@zbmed.de>"
__license__ = "ISC license"
__email__ = "konrad@foerstner.org, sauerwein@zbmed.de"
__version__ = "2.0.1"


def main():
    parser = create_parser()
    args = parser.parse_args()
    if args.version is True:
        print("READemption version " + __version__)
    elif "func" in dir(args):
        controller = Controller(args)
        args.func(controller)
    else:
        parser.print_help()

def is_positive_int(value):
    integer = int(value)
    if integer <= 0:
        raise argparse.ArgumentTypeError(f"{value} is an invalid positive integer value")
    return integer

def create_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--version",
        "-v",
        default=False,
        action="store_true",
        help="show version",
    )
    subparsers = parser.add_subparsers(help="commands")

    # Arguments for project creation
    create_project_parser = subparsers.add_parser(
        "create",
        help="Create a project",
        formatter_class=argparse.RawTextHelpFormatter,
    )
    create_project_parser.add_argument(
        "--species",
        "-s",
        nargs="+",
        metavar="SUB_FOLDER_NAME=DISPLAY_NAME",
        required=True,
        help="One key-value pair for each species.\n"
        "Key-value "
        "pairs consist of a 'folder prefix' (key) and a "
        "'display name' (value) and are separated by an '='-sign.\n"
        "The 'folder prefix' will be used to create input -and output "
        "folders for the given species.\n"
        "The 'display name' will be used for output files "
        "and figures. \n"
        "Syntax example: \n"
        "$ reademption create --species "
        'human="Homo sapiens" '
        'staphylococcus="Staphylococcus aureus" '
        "--project_path READemption_analysis",
    )
    create_project_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    create_project_parser.set_defaults(func=create_project)

    # Parameters for read alignment
    read_aligning_parser = subparsers.add_parser(
        "align", help="Run read alignings"
    )
    read_aligning_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    read_aligning_parser.add_argument(
        "--min_read_length",
        "-l",
        default=12,
        type=int,
        help="Minimal read length after clipping (default 12). Should be "
        "higher for eukaryotic species.",
    )
    read_aligning_parser.add_argument(
        "--processes",
        "-p",
        default=1,
        type=int,
        help="Number of processes that should be used (default 1).",
    )
    read_aligning_parser.add_argument(
        "--segemehl_accuracy",
        "-a",
        default=95.0,
        type=float,
        help="Segemehl's minimal accuracy (in %%) (default 95).",
    )
    read_aligning_parser.add_argument(
        "--segemehl_evalue",
        "-e",
        default=5.0,
        type=float,
        help="Segemehl's maximal e-value (default 5.0).",
    )
    read_aligning_parser.add_argument(
        "--segemehl_bin",
        "-s",
        default="segemehl.x",
        help="Segemehl's binary path (default 'segemehl.x').",
    )
    read_aligning_parser.add_argument(
        "--paired_end",
        "-P",
        default=False,
        action="store_true",
        help="Use this if reads are originating from a paired-end sequencing. "
        "The members of a pair must be marked with '_p1' and '_p2' in front "
        "of the file type suffixes (e.g. 'my_sample_p1.fa' and "
        "'my_sample_p2.fa' or 'my_sample_p1.fa.bz2' and 'my_sample_p2.fa.bz2'"
        "). This option cannot be used with polyA tail clipping.",
    )
    read_aligning_parser.add_argument(
        "--no_fragment_building",
        "-nb",
        default=False,
        action="store_true",
        help="Do not build fragments from paired end alignments. "
             "Not building the fragments saves time. If later"
             "subcommands like 'gene_quanti' or 'coverage' need the fragments,"
             "they will be build before. If you don't want to work with fragments,"
             "also turn on 'no_fragments' when using the sucommands "
             "'gene_quanti' and 'coverage' to avoid building the fragments when"
             "running these downstream subcommands",
    )
    read_aligning_parser.add_argument(
        "--max_fragment_length",
        "-mf",
        default=False,
        type=is_positive_int,
        help="The maximum fragment length for all fragments. Fragments are "
              "either built from two mates or a single read",
    )
    read_aligning_parser.add_argument(
        "--split",
        "-S",
        default=False,
        action="store_true",
        help="Run segemehl with read splitting.",
    )
    read_aligning_parser.add_argument(
        "--poly_a_clipping",
        "-c",
        default=False,
        action="store_true",
        help="Perform polyA tail clipping. This option "
        "cannot be used for paired-end reads.",
    )
    read_aligning_parser.add_argument(
        "--fastq",
        "-q",
        default=False,
        action="store_true",
        help="Input reads are in FASTQ not FASTA format.",
    )
    read_aligning_parser.add_argument(
        "--min_phred_score",
        "-Q",
        default=None,
        type=int,
        help="Minimal Phred score. Works only if read are given in FASTQ "
        "format. As soon as a based drop below this value it and all the "
        "nucleotides downstream of it will be trimmed off.",
    )
    read_aligning_parser.add_argument(
        "--adapter",
        "-A",
        default=None,
        type=str,
        help="Adapter sequence. If it is found in a read it and all the "
        "nucleotides downstream will be trimmed off.",
    )
    read_aligning_parser.add_argument(
        "--check_for_existing_files",
        "-F",
        default=False,
        action="store_true",
        help="Check for existing files (e.g. from a "
        "interrupted previous run) and do not overwrite them if they exits. "
        "Attention! You have to take care that there are no partially "
        "generated files left!",
    )
    read_aligning_parser.add_argument(
        "--reverse_complement",
        "-R",
        default=False,
        action="store_true",
        help="Map reverse complement of the input reads.",
    )
    read_aligning_parser.add_argument(
        "--progress",
        "-g",
        default=False,
        action="store_true",
        help="Show progress of the segemehl mapping.",
    )
    read_aligning_parser.add_argument(
        "--crossalign_cleaning",
        "-x",
        default=False,
        action="store_true",
        help="Remove reads from BAM files that are cross-mapped to replicons of different "
        "species.",
    )
    read_aligning_parser.set_defaults(func=align_reads)

    # Parameters for coverage file building
    coverage_creation_parser = subparsers.add_parser(
        "coverage", help="Create coverage (wiggle) files"
    )
    coverage_creation_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    coverage_creation_parser.add_argument(
        "--unique_only",
        "-u",
        default=False,
        action="store_true",
        help="Use uniquely aligned reads only.",
    )
    coverage_creation_parser.add_argument(
        "--normalize_by_uniquely",
        "-U",
        default=False,
        action="store_true",
        help="Normalize by the number of uniquely aligned reads. By default "
        "the normalization is done based on the total number of aligned reads "
        "even if only uniquely aligned reads are used for the coverage "
        "calculation.",
    )
    coverage_creation_parser.add_argument(
        "--count_cross_aligned_reads",
        "-x",
        default=False,
        action="store_true",
        help="Count species-cross-aligned reads for the coverage file creation,"
        " by default these reads are not counted",
    )
    coverage_creation_parser.add_argument(
        "--normalize_cross_aligned_reads_included",
        "-X",
        default=False,
        action="store_true",
        help="Add species-cross-aligned reads to normalization,"
        " by default only the species specific reads are used.",
    )
    coverage_creation_parser.add_argument(
        "--processes",
        "-p",
        default=1,
        type=int,
        help="Number of processes that should be used (default 1).",
    )
    coverage_creation_parser.add_argument(
        "--skip_read_count_splitting",
        "-s",
        default=False,
        action="store_true",
        help="Do not split the read counting between "
        "different alignings. Default is to do the splitting.",
    )
    coverage_creation_parser.add_argument(
        "--non_strand_specific",
        "-d",
        default=False,
        action="store_true",
        help="Do not distict between the coverage of the "
        "forward and reverse strand but sum them to a single value for each "
        "base.",
    )
    coverage_creation_parser.add_argument(
        "--coverage_style",
        "-b",
        choices=["global", "first_base_only", "last_base_only", "centered"],
        default="global",
        help="Select for coverage generation if only the "
        "first aligned base at the 5' end of each read ('first_base_only') or "
        "the last aligned base at the 3' end of each read ('last_base_only') "
        "is taken into account. The centered approach ('centered') clips a "
        "predefined number of nts from each alignment end and adds to the "
        "remaining genomic region a value divided by its length. By default "
        "the coverage is generated using the whole range of each alignment "
        "('global').",
    )
    coverage_creation_parser.add_argument(
        "--clip_length",
        "-cl",
        type=int,
        default=11,
        help="Number of "
        "nucleotides that are clipped from each alignment end for centered "
        "approach.",
    )
    coverage_creation_parser.add_argument(
        "--check_for_existing_files",
        "-F",
        default=False,
        action="store_true",
        help="Check for existing files (e.g. from a "
        "interrupted previous run) and do not overwrite them if they exits. "
        "Attention! You have to take care that there are no partially "
        "generated files left!",
    )
    coverage_creation_parser.add_argument(
        "--paired_end",
        "-P",
        default=False,
        action="store_true",
        help="Use this if reads are originating from a paired-end sequencing.",
    )
    coverage_creation_parser.add_argument(
        "--no_fragments",
        "-nf",
        default=False,
        action="store_true",
        help="Count single alignments instead of fragments built after alignment."
             "This option saves time if the fragments have not been build during alignment."
             "Only use this if reads are originating from a paired-end sequencing.",
    )
    coverage_creation_parser.add_argument(
        "--max_fragment_length",
        "-mf",
        default=False,
        type=is_positive_int,
        help="The maximum fragment length for all fragments. Fragments are "
             "either built from two mates or a single read",
    )
    coverage_creation_parser.add_argument(
        "--no_norm_by_fragments",
        "-nff",
        default=False,
        action="store_true",
        help="Use total no. of single alignments instead of fragments built after alignment for the normalisation."
             "This option saves time if the fragments have not been build during alignment."
             "Only use this if reads are originating from a paired-end sequencing.",
    )

    coverage_creation_parser.set_defaults(func=create_coverage_files)

    # Parameters for gene wise quantification
    gene_wise_quanti_parser = subparsers.add_parser(
        "gene_quanti", help="Quantify the expression gene wise"
    )
    gene_wise_quanti_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    gene_wise_quanti_parser.add_argument(
        "--min_overlap",
        "-o",
        default=1,
        type=int,
        help="Minimal read-annotation-overlap (in nt) (default 1).",
    )
    gene_wise_quanti_parser.add_argument(
        "--read_region",
        "-b",
        choices=["global", "first_base_only", "last_base_only", "centered"],
        default="global",
        help="Select for gene-wise quantification "
        "if only the first aligned base at the 5' end of each read "
        "('first_base_only') or the last aligned base at the 3' end of each "
        "read ('last_base_only') is taken into account. The centered approach "
        "('centered') clips a predefined number of nts from each alignment end "
        "and calculates the overlap based on the remaining region. By default "
        "the overlap is calculated based on the whole range of each alignment "
        "('global').",
    )
    gene_wise_quanti_parser.add_argument(
        "--clip_length",
        "-cl",
        type=int,
        default=11,
        help="Number of "
        "nucleotides that are clipped from each alignment end for centered "
        "approach.",
    )
    gene_wise_quanti_parser.add_argument(
        "--no_count_split_by_alignment_no",
        "-n",
        default=False,
        action="store_true",
        help="Do not split read countings by the number "
        "of alignments a read has. By default this count splitting is "
        "performed.",
    )
    gene_wise_quanti_parser.add_argument(
        "--no_count_splitting_by_gene_no",
        "-l",
        default=False,
        action="store_true",
        help="Do not split read countings by the number "
        "of genes it overlaps with. By default this count splitting is "
        "performed.",
    )
    gene_wise_quanti_parser.add_argument(
        "--add_antisense",
        "-a",
        default=False,
        action="store_true",
        help="Count anti-sense and sense read-gene-overlaps and report them separately. By default only sense "
        "overlaps are counted.",
    )
    gene_wise_quanti_parser.add_argument(
        "--antisense_only",
        "-ao",
        default=False,
        action="store_true",
        help="Count only anti-sense read-gene-overlaps and no sense overlaps. By default only sense "
        "overlaps are counted.",
    )
    gene_wise_quanti_parser.add_argument(
        "--non_strand_specific",
        default=False,
        action="store_true",
        help="Use countings of reads overlapping with a gene on both strands "
        "and sum them up.",
    )
    gene_wise_quanti_parser.add_argument(
        "--processes",
        "-p",
        default=1,
        type=int,
        help="Number of processes that should be used (default 1).",
    )
    gene_wise_quanti_parser.add_argument(
        "--features",
        "-t",
        dest="allowed_features",
        default=None,
        help="Comma separated list of features that should be considered "
        "(e.g. gene, cds, region, exon). Other feature will be skipped. If "
        "not specified all features will be considered.",
    )
    gene_wise_quanti_parser.add_argument(
        "--unique_only",
        "-u",
        default=False,
        action="store_true",
        help="Use uniquely aligned reads only.",
    )
    gene_wise_quanti_parser.add_argument(
        "--pseudocounts",
        "-c",
        default=False,
        action="store_true",
        help="Add a pseudocount of 1 to each gene.",
    )
    gene_wise_quanti_parser.add_argument(
        "--check_for_existing_files",
        "-F",
        default=False,
        action="store_true",
        help="Check for existing files (e.g. from a "
        "interrupted previous run) and do not overwrite them if they exits. "
        "Attention! You have to take care that there are no partially "
        "generated files left!",
    )
    gene_wise_quanti_parser.add_argument(
        "--count_cross_aligned_reads",
        "-x",
        default=False,
        action="store_true",
        help="Count species-cross-aligned reads for the gene quantification. By"
        "default these reads are not counted",
    )
    gene_wise_quanti_parser.add_argument(
        "--normalize_cross_aligned_reads_included",
        "-X",
        default=False,
        action="store_true",
        help="Add species-cross-aligned reads to normalization,"
        " by default only the species specific reads are used.",
    )

    gene_wise_quanti_parser.add_argument(
        "--paired_end",
        "-P",
        default=False,
        action="store_true",
        help="Use this if reads are originating from a paired-end sequencing.",
    )
    gene_wise_quanti_parser.add_argument(
        "--no_fragments",
        "-nf",
        default=False,
        action="store_true",
        help="Count single alignments instead of fragments built after alignment."
             "This option saves time if the fragments have not been build during alignment."
             "Only use this if reads are originating from a paired-end sequencing.",
    )
    gene_wise_quanti_parser.add_argument(
        "--max_fragment_length",
        "-mf",
        default=False,
        type=is_positive_int,
        help="The maximum fragment length for all fragments. Fragments are "
             "either built from two mates or a single read",
    )
    gene_wise_quanti_parser.add_argument(
        "--no_norm_by_fragments",
        "-nff",
        default=False,
        action="store_true",
        help="Use total no. of single alignments instead of fragments built after alignment for the normalisation."
             "This option saves time if the fragments have not been build during alignment."
             "Only use this if reads are originating from a paired-end sequencing.",
    )
    gene_wise_quanti_parser.set_defaults(func=run_gene_wise_quantification)

    # Parameters for DESeq calling
    deseq_parser = subparsers.add_parser(
        "deseq", help="Compare expression pairwise using DESeq"
    )
    deseq_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    deseq_parser.add_argument(
        "--libs", "-l", required=True, help="Comma separated list of libraries."
    )
    deseq_parser.add_argument(
        "--conditions",
        "-c",
        required=True,
        help="Comma separated list of condition in the same order as "
        "their corresponding libraries.",
    )
    deseq_parser.add_argument(
        "--replicates",
        "-r",
        required=True,
        help="Comma separated list of replicates in the same order as "
        "their corresponding libraries.",
    )
    deseq_parser.add_argument(
        "--libs_by_species",
        "-s",
        nargs="+",
        metavar="SUB_FOLDER_NAME=LIST_OF_LIBRARIES",
        required=True,
        help="One key-value pair for each species.\n"
        "Key-value "
        "pairs consist of the 'folder prefix' (key) and a 'list of "
        "libraries' and are separated by an '='-sign.\n"
        "The 'folder prefix' has or have to be the same that was used when "
        "creating a new project \n"
        "Syntax example: \n"
        "$ reademption deseq --libs infected_1.fq,infected_2.fq,"
        "control_human_1.fq,control_human_2.fq,"
        "control_staph_1.fq,control_staph_2.fq \n"
        "--conditions infected,infected,control_human,control_human,"
        "control_staph,control_staph \n"
        "--replicates 1,2,1,2,1,2"
        " --libs_by_species "
        'human="infected_1.fq,infected_2.fq,control_human_1.fq,'
        'control_human2.fq" '
        'staphylococcus="infected_1.fq,infected_2.fq,'
        'control_staphylococcus_1.fq,control_staphylococcus_2.fq" '
        "--project_path READemption_analysis",
    )
    deseq_parser.add_argument(
        "--size_factor",
        "-b",
        choices=["project", "species", "comparison"],
        default="species",
        help="Select the libraries for the size factor estimation of DESeq2."
        "Selecting the option 'project' will result in calculating the "
        "size factors for all comparisons based on all libraries of the"
        "whole project (It doesn't matter if the libraries have no reads "
        "of the species being inspected or if the libraries are part of"
        "the current comparison). The option 'species' will result in "
        "calculating the size factors for all comparisons of a species "
        "only based on the libraries of the given species. The option "
        "'comparison' calculates the size factors for each comparison and"
        "only based on the libraries that are being compared for the"
        "current comparison.",
    )

    deseq_parser.add_argument(
        "--cooks_cutoff_off", "-k", default=False, action="store_true"
    )
    deseq_parser.add_argument(
        "--fc_shrinkage_off",
        "-so",
        default=False,
        action="store_true",
        help="turn off log2 fold change shrinkage",
    )

    deseq_parser.set_defaults(func=run_deseq)

    # Parameters for viz_align
    viz_align_parser = subparsers.add_parser(
        "viz_align",
        help="Generate read processing and alignment visualisations.",
    )
    viz_align_parser.add_argument(
        "--paired_end",
        "-P",
        default=False,
        action="store_true",
        help="Use this if reads are originating from a paired-end sequencing.",
    )
    viz_align_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    viz_align_parser.set_defaults(func=viz_align)

    # Parameters for viz_gene_quanti
    viz_gene_quanti_parser = subparsers.add_parser(
        "viz_gene_quanti",
        help="Generate gene wise quantification visualisations.",
    )
    viz_gene_quanti_parser.add_argument(
        "--paired_end",
        "-P",
        default=False,
        action="store_true",
        help="Use this if reads are originating from a paired-end sequencing.",
    )
    viz_gene_quanti_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )
    viz_gene_quanti_parser.set_defaults(func=viz_gene_quanti)

    # Parameters for viz_deseq
    viz_deseq_parser = subparsers.add_parser(
        "viz_deseq", help="Generate DESeq visualisations."
    )
    viz_deseq_parser.add_argument(
        "--project_path",
        "-f",
        required=True,
        help="Path of the project folder.",
    )

    viz_deseq_parser.add_argument(
        "--max_pvalue",
        type=float,
        default="0.05",
        help="Maximum adjusted p-value for genes considered to be regulated. "
        "Genes with adjusted p-values below will be marked red. "
        "(default 0.05)",
    )
    viz_deseq_parser.set_defaults(func=viz_deseq)
    return parser


def create_project(controller):
    controller.create_project(__version__)


def align_reads(controller):
    controller.align_reads()


def create_coverage_files(controller):
    controller.create_coverage_files()


def run_gene_wise_quantification(controller):
    controller.quantify_gene_wise()


def run_deseq(controller):
    controller.compare_with_deseq()


def viz_align(controller):
    controller.viz_align()


def viz_gene_quanti(controller):
    controller.viz_gene_quanti()


def viz_deseq(controller):
    controller.viz_deseq()


if __name__ == "__main__":
    main()
