import re

import numpy as np
import pandas as pd

from .types import PyStvError, RaceInfo, RaceMetadata

QUESTION_PATTERN = re.compile(r"(?P<question>.*) \[(?P<option>.*)\]")


def parse_google_form_csv(buffer):
    df = pd.read_csv(buffer)
    race_infos = []
    for metadata, slice_ in parse_header(df.columns):
        goog = df.iloc[:, slice_].applymap(coerce).values
        goog = np.ma.array(goog, mask=(goog == 0))

        argsort = goog.argsort(axis=1)
        mask = np.take_along_axis(goog.mask, argsort, axis=1)
        ballots = np.ma.array(argsort, mask=mask) + 1
        ballots = ballots.filled(0)
        ballots, votes = np.unique(ballots, axis=0, return_counts=True)
        race_infos.append(RaceInfo(metadata, ballots.tolist(), votes.tolist()))
    return race_infos


def parse_header(header):
    current_question = None
    current_options = []

    questions = []
    options = []
    starts = []
    ends = []
    for col_idx, col in enumerate(header):
        match = QUESTION_PATTERN.match(col)
        if match:
            question = match.group("question")
            option = match.group("option")
            if question != current_question:
                if current_question is not None:
                    ends.append(col_idx)
                    options.append(current_options)
                    current_options = []
                questions.append(question)
                starts.append(col_idx)
                current_question = question
            current_options.append(option)
        else:
            if current_question is not None:
                current_question = None
                ends.append(col_idx)
                options.append(current_options)
                current_options = []

    if current_question is not None:
        ends.append(col_idx + 1)
        options.append(current_options)

    num_questions = len(questions)
    assert len(options) == num_questions, options
    assert len(starts) == num_questions, starts
    assert len(ends) == num_questions, ends

    return [
        (RaceMetadata(q, 1, o), slice(s, e))
        for q, o, s, e in zip(questions, options, starts, ends)
    ]


def coerce(x):
    if isinstance(x, float):
        if np.isnan(x):
            return 0
        return int(x)
    numbers = re.findall(r"(\d+)[st|nd|rd|th]?", x)
    if not numbers or len(numbers) > 1:
        raise PyStvError(f"Could not determine number: {x}")
    return int(numbers[0])
