import numpy as np
import scipy.stats as stats
from sklearn.utils import check_array
from .base import BaseThresholder
from .thresh_utility import normalize, cut


class MCST(BaseThresholder):
    """MCST class for Monte Carlo Shapiro Tests thresholder.

       Use uniform random sampling and statstical testing to evaluate a
       non-parametric means to threshold scores generated by the decision_scores
       where outliers are set to any value beyond the minimum value left after
       iterative Shapiro-Wilk tests have occured. Note** accuracy decreases with
       array size. For good results the should be array<1000. However still this
       threshold method may fail at any array size.
       
       Paramaters
       ----------

       Attributes
       ----------

       eval_: numpy array of binary labels of the training data. 0 stands
           for inliers and 1 for outliers/anomalies.

    """

    def __init__(self):

        pass

    def eval(self, decision):
        """Outlier/inlier evaluation process for decision scores.

        Parameters
        ----------
        decision : np.array or list of shape (n_samples)
                   which are the decision scores from a
                   outlier detection.
        
        Returns
        -------
        outlier_labels : numpy array of shape (n_samples,)
            For each observation, tells whether or not
            it should be considered as an outlier according to the
            fitted model. 0 stands for inliers and 1 for outliers.
        """

        decision = check_array(decision, ensure_2d=False)

        decision = normalize(decision)

        # Get Baseline Shapiro-Wilk test p value
        p_std = stats.shapiro(decision).pvalue

        # Create random dataset to insert and test p values
        rnd = stats.uniform.rvs(loc=0, scale=1, size=len(decision), random_state=123)
        rnd = normalize(rnd)
        povr = []

        # Iterate and add a new random variable
        # Perform a Shapiro-Wilk test and see if the new
        # distribution has a lower or higher value
        # If higher record these potential outlier values
        for i in range(len(rnd)):

            arr = np.append(decision, rnd[i])
            p_check = stats.shapiro(arr).pvalue

            if p_check>p_std:

                p_std = p_check
                povr.append(rnd[i])       

        # Set the limit to the minimum found outlier
        if len(povr)>0:
            limit = np.min(povr)
        else:
            limit = 1.0

        self.thresh_ = limit

        return cut(decision, limit)
