import collections
from typing import Tuple
from injecta.container.ContainerInterface import ContainerInterface
from pyspark.sql.dataframe import DataFrame
from databricksbundle.notebook.decorators import ResultDecorator

class DuplicateColumnsChecker:

    def __init__(
        self,
        container: ContainerInterface,
    ):
        self.__container = container

    def check(self, df: DataFrame, resultDecorators: Tuple[ResultDecorator]):
        fieldNames = [field.name.lower() for field in df.schema.fields]
        duplicateFields = dict()

        for fieldName, count in collections.Counter(fieldNames).items():
            if count > 1:
                duplicateFields[fieldName] = []

        if duplicateFields == dict():
            return

        fields2Tables = dict()

        for resultDecorator in resultDecorators:
            sourceDf = resultDecorator.result
            for field in sourceDf.schema.fields:
                fieldName = field.name.lower()

                if fieldName not in fields2Tables:
                    fields2Tables[fieldName] = []

                fields2Tables[fieldName].append(resultDecorator.function.__name__)

        for duplicateField in duplicateFields:
            self.__container.get('databricksbundle.logger').error(f'Duplicate field {duplicateField}', extra={'source_dataframes': fields2Tables[duplicateField]})

        fieldsString = ', '.join(duplicateFields)
        raise Exception(f'Duplicate output column(s): {fieldsString}. Disable by setting @transformation(checkDuplicateColumns=False)')
