from io import StringIO
from itertools import chain
from typing import TypeVar

import pandas as pd
from dcp.data_copy.base import CopyRequest, create_empty_if_not_exists, datacopier
from dcp.data_copy.costs import (
    FormatConversionCost,
    MemoryToBufferCost,
    MemoryToMemoryCost,
)
from dcp.data_format.formats.memory.arrow_table import ArrowTableFormat
from dcp.data_format.formats.memory.dataframe import DataFrameFormat
from dcp.data_format.formats.memory.records import Records, RecordsFormat
from dcp.storage.base import MemoryStorageClass, StorageApi
from dcp.storage.memory.engines.python import PythonStorageApi
# from dcp.data_format.formats.memory.csv_lines_iterator import CsvLinesIteratorFormat
from dcp.utils.data import read_csv, write_csv
from dcp.utils.pandas import dataframe_to_records

try:
    import pyarrow as pa
except ImportError:
    pa = TypeVar("pa")


@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[RecordsFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[DataFrameFormat],
    cost=MemoryToMemoryCost + FormatConversionCost,
)
def copy_records_to_df(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    records_object = req.from_storage_api.get(req.from_name)
    df = pd.DataFrame(records_object)
    create_empty_if_not_exists(req)
    existing_df = req.to_storage_api.get(req.to_name)
    final_df = existing_df.append(df)
    req.to_storage_api.put(req.to_name, final_df)
    # Pandas does not preserve types well through operations, so we need to cast here
    req.to_format_handler.cast_to_schema(
        req.to_name, req.to_storage_api.storage, req.get_schema()
    )


@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[DataFrameFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[RecordsFormat],
    cost=MemoryToMemoryCost + FormatConversionCost,
)
def copy_df_to_records(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    records_object = req.from_storage_api.get(req.from_name)
    records = dataframe_to_records(records_object)
    create_empty_if_not_exists(req)
    existing_records = req.to_storage_api.get(req.to_name)
    req.to_storage_api.put(req.to_name, existing_records + records)
    # Only necessary if we think there is datatype loss when converting df->records
    # req.to_format_handler.cast_to_schema(
    #     req.to_name, req.to_storage_api.storage, req.get_schema()
    # )


# Self copy?
@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[RecordsFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[RecordsFormat],
    cost=MemoryToMemoryCost,
)
def copy_records_to_records(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    records = req.from_storage_api.get(req.from_name)
    create_empty_if_not_exists(req)
    existing_records = req.to_storage_api.get(req.to_name)
    req.to_storage_api.put(req.to_name, existing_records + records)


@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[DataFrameFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[DataFrameFormat],
    cost=MemoryToMemoryCost,
)
def copy_df_to_df(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    df = req.from_storage_api.get(req.from_name)
    create_empty_if_not_exists(req)
    existing_df = req.to_storage_api.get(req.to_name)
    final_df = existing_df.append(df)
    req.to_storage_api.put(req.to_name, final_df)
    req.to_format_handler.cast_to_schema(
        req.to_name, req.to_storage_api.storage, req.get_schema()
    )


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[DataFrameIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[RecordsIteratorFormat],
#     cost=BufferToBufferCost + FormatConversionCost,
# )
# def copy_df_iterator_to_records_iterator(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     itr = (dataframe_to_records(df, req.get_schema()) for df in records_object)
#     to_records_object = as_records(itr, data_format=RecordsIteratorFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[RecordsIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[DataFrameIteratorFormat],
#     cost=BufferToBufferCost + FormatConversionCost,
# )
# def copy_records_iterator_to_df_iterator(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     itr = (pd.DataFrame(records) for records in records_object)
#     to_records_object = as_records(itr, data_format=DataFrameIteratorFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[RecordsIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[RecordsFormat],
#     cost=MemoryToMemoryCost,
# )
# def copy_records_iterator_to_records(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     all_records = []
#     for records in records_object:
#         all_records.extend(records)
#     to_records_object = as_records(all_records, data_format=RecordsFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[DataFrameIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[DataFrameFormat],
#     cost=MemoryToMemoryCost,
# )
# def copy_dataframe_iterator_to_dataframe(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     all_dfs = []
#     for df in records_object:
#         all_dfs.append(df)
#     to_records_object = as_records(pd.concat(all_dfs), data_format=DataFrameFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[CsvLinesIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[RecordsFormat],
#     cost=MemoryToBufferCost + FormatConversionCost,
# )
# def copy_csv_lines_to_records(req: CopyRequest):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     csv_lines = req.from_storage_api.get(req.from_name)
#     records = list(read_csv(csv_lines))
#     create_empty_if_not_exists(req)
#     existing_records = req.to_storage_api.get(req.to_name)
#     req.to_storage_api.put(req.to_name, existing_records + records)
#     # Must cast because csv does a poor job of preserving logical types
#     req.to_format_handler.cast_to_schema(
#         req.to_name, req.to_storage_api.storage, req.get_schema()
#     )


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[RecordsFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[CsvLinesIteratorFormat],
#     cost=MemoryToBufferCost + FormatConversionCost,
# )
# def copy_records_to_csv_lines(req: CopyRequest):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records = req.from_storage_api.get(req.from_name)
#     create_empty_if_not_exists(req)
#     csv_lines = req.to_storage_api.get(req.to_name)
#     f = StringIO()
#     write_csv(records, f, append=True)
#     f.seek(0)
#     req.to_storage_api.put(req.to_name, chain(csv_lines, (ln for ln in f)))
#     # Casting does no good for a csv (no concept of types)
#     # req.to_format_handler.cast_to_schema(
#     #     req.to_name, req.to_storage_api.storage, req.get_schema()
#     # )


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[DelimitedFileObjectFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[RecordsIteratorFormat],
#     cost=BufferToBufferCost + FormatConversionCost,
# )
# def copy_file_object_to_records_iterator(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     # Note: must keep header on each chunk when iterating delimited file object!
#     # TODO: ugly hard-coded 1000 here, but how could we ever make it configurable? Not a big deal I guess
#     itr = (
#         read_csv(chunk)
#         for chunk in with_header(iterate_chunks(records_object, 1000))
#     )
#     to_records_object = as_records(itr, data_format=RecordsIteratorFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


# @datacopier(
#     from_storage_classes=[MemoryStorageClass],
#     from_data_formats=[DelimitedFileObjectIteratorFormat],
#     to_storage_classes=[MemoryStorageClass],
#     to_data_formats=[RecordsIteratorFormat],
#     cost=BufferToBufferCost + FormatConversionCost,
# )
# def copy_file_object_iterator_to_records_iterator(
# req: CopyRequest
# ):
#     assert isinstance(req.from_storage_api, PythonStorageApi)
#     assert isinstance(req.to_storage_api, PythonStorageApi)
#     records_object = req.from_storage_api.get(req.from_name)
#     itr = (read_csv(chunk) for chunk in with_header(records_object))
#     to_records_object = as_records(itr, data_format=RecordsIteratorFormat, schema=req.get_schema())
#     to_records_object = to_records_object.conform_to_schema()
#     req.to_storage_api.put(req.to_name, to_records_object)


#########
### Arrow
#########


@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[ArrowTableFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[DataFrameFormat],
    cost=MemoryToMemoryCost,  # Sometimes this is a zero-copy no-op (rarely for real world data tho due to lack of null support in numpy)
)
def copy_arrow_to_dataframe(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    records_object = req.from_storage_api.get(req.from_name)
    df = records_object.to_pandas()
    create_empty_if_not_exists(req)
    existing_df = req.to_storage_api.get(req.to_name)
    # No need to cast, should be preserved
    req.to_storage_api.put(req.to_name, existing_df.append(df))


@datacopier(
    from_storage_classes=[MemoryStorageClass],
    from_data_formats=[DataFrameFormat],
    to_storage_classes=[MemoryStorageClass],
    to_data_formats=[ArrowTableFormat],
    cost=MemoryToMemoryCost,
)
def copy_dataframe_to_arrow(req: CopyRequest):
    assert isinstance(req.from_storage_api, PythonStorageApi)
    assert isinstance(req.to_storage_api, PythonStorageApi)
    records_object = req.from_storage_api.get(req.from_name)
    at = pa.Table.from_pandas(records_object)
    create_empty_if_not_exists(req)
    existing_table: pa.Table = req.to_storage_api.get(req.to_name)
    new_table = pa.Table.from_batches(existing_table.to_batches() + at.to_batches())
    # No need to cast, should be preserved (???)
    req.to_storage_api.put(req.to_name, new_table)
