__author__ = 'lucabasa'
__version__ = '0.0.4'
__status__ = 'development'

from tubesml.base import BaseTransformer, self_columns, reset_columns
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
import pandas as pd


class DfScaler(BaseTransformer):
    '''
    Wrapper of several sklearn scalers that keeps the dataframe structure.
    Inherits from BaseTransformer
    
    :Attributes:
    ------------
        
    method : str, the method to scale the data, default "standard"
              Allowed values: "standard", 'robust', 'minmax'

    feature_range : Range to scale the data to when the method is 'minmax'
                
    '''
    def __init__(self, method='standard', feature_range=(0,1)):
        super().__init__()
        self.method = method
        self._validate_input()
        self.scale_ = None
        self.feature_range = feature_range
        if self.method == 'standard':
            self.mean_ = None
        elif method == 'robust':
            self.center_ = None
        elif method == 'minmax':
            self.feature_range = feature_range
            self.min_ = None
            self.data_min_ = None
            self.data_max_ = None
            self.data_range_ = None
            self.n_samples_seen_ = None

            
    def _validate_input(self):
        allowed_methods = ["standard", 'robust', 'minmax']
        if self.method not in allowed_methods:
            raise ValueError(f"Can only use these methods: {allowed_methods} got method={self.method}")
    
    
    @reset_columns
    def fit(self, X, y=None):
        '''
        Method to train the scaler.
        Depending on the method attribute, it calls a different sklearn scaler
        It also reset the columns attribute
        
        :Parameters:
        ------------

        X : pandas DataFrame of shape (n_samples, n_features)
            The training input samples.
        y : array-like of shape (n_samples,) or (n_samples, n_outputs), Not used
            The target values (class labels) as integers or strings.

        '''
        if self.method == 'standard':
            self.scl = StandardScaler()
            self.scl.fit(X)
            self.mean_ = pd.Series(self.scl.mean_, index=X.columns)
        elif self.method == 'robust':
            self.scl = RobustScaler()
            self.scl.fit(X)
            self.center_ = pd.Series(self.scl.center_, index=X.columns)
        elif self.method == 'minmax':
            self.scl = MinMaxScaler(feature_range=self.feature_range)
            self.scl.fit(X)
            self.min_ = pd.Series(self.scl.min_, index=X.columns)
            self.data_min_ = pd.Series(self.scl.data_min_, index=X.columns)
            self.data_max_ = pd.Series(self.scl.data_max_, index=X.columns)
            self.data_range_ = self.data_max_ - self.data_min_
            self.n_samples_seen_ = X.shape[0]
        self.scale_ = pd.Series(self.scl.scale_, index=X.columns)
        return self
    
    
    @self_columns
    def transform(self, X, y=None):
        '''
        Method to transform the input data
        It populates the columns attribute with the columns of the output data
        
        :Parameters:
        ------------

        X : pandas DataFrame of shape (n_samples, n_features)
            The input samples.
        y : array-like of shape (n_samples,) or (n_samples, n_outputs), Not used
            The target values (class labels) as integers or strings.

        '''
        Xscl = self.scl.transform(X)
        Xscaled = pd.DataFrame(Xscl, index=X.index, columns=X.columns)
        return Xscaled
    