import os

import pytest
import pandas as pd
from toolz import pipe

from altair.utils.data import (
    limit_rows,
    MaxRowsError,
    sample,
    to_values,
    to_json,
    to_csv,
)


def _create_dataframe(N):
    data = pd.DataFrame({"x": range(N), "y": range(N)})
    return data


def _create_data_with_values(N):
    data = {"values": [{"x": i, "y": i + 1} for i in range(N)]}
    return data


def test_limit_rows():
    """Test the limit_rows data transformer."""
    data = _create_dataframe(10)
    result = limit_rows(data, max_rows=20)
    assert data is result
    with pytest.raises(MaxRowsError):
        pipe(data, limit_rows(max_rows=5))
    data = _create_data_with_values(10)
    result = pipe(data, limit_rows(max_rows=20))
    assert data is result
    with pytest.raises(MaxRowsError):
        limit_rows(data, max_rows=5)


def test_sample():
    """Test the sample data transformer."""
    data = _create_dataframe(20)
    result = pipe(data, sample(n=10))
    assert len(result) == 10
    assert isinstance(result, pd.DataFrame)
    data = _create_data_with_values(20)
    result = sample(data, n=10)
    assert isinstance(result, dict)
    assert "values" in result
    assert len(result["values"]) == 10
    data = _create_dataframe(20)
    result = pipe(data, sample(frac=0.5))
    assert len(result) == 10
    assert isinstance(result, pd.DataFrame)
    data = _create_data_with_values(20)
    result = sample(data, frac=0.5)
    assert isinstance(result, dict)
    assert "values" in result
    assert len(result["values"]) == 10


def test_to_values():
    """Test the to_values data transformer."""
    data = _create_dataframe(10)
    result = pipe(data, to_values)
    assert result == {"values": data.to_dict(orient="records")}


def test_type_error():
    """Ensure that TypeError is raised for types other than dict/DataFrame."""
    for f in (sample, limit_rows, to_values):
        with pytest.raises(TypeError):
            pipe(0, f)


def test_dataframe_to_json():
    """Test to_json
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_dataframe(10)
    try:
        result1 = pipe(data, to_json)
        result2 = pipe(data, to_json)
        filename = result1["url"]
        output = pd.read_json(filename)
    finally:
        os.remove(filename)

    assert result1 == result2
    assert output.equals(data)


def test_dict_to_json():
    """Test to_json
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_data_with_values(10)
    try:
        result1 = pipe(data, to_json)
        result2 = pipe(data, to_json)
        filename = result1["url"]
        output = pd.read_json(filename).to_dict(orient="records")
    finally:
        os.remove(filename)

    assert result1 == result2
    assert data == {"values": output}


def test_dataframe_to_csv():
    """Test to_csv with dataframe input
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_dataframe(10)
    try:
        result1 = pipe(data, to_csv)
        result2 = pipe(data, to_csv)
        filename = result1["url"]
        output = pd.read_csv(filename)
    finally:
        os.remove(filename)

    assert result1 == result2
    assert output.equals(data)


def test_dict_to_csv():
    """Test to_csv with dict input
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_data_with_values(10)
    try:
        result1 = pipe(data, to_csv)
        result2 = pipe(data, to_csv)
        filename = result1["url"]
        output = pd.read_csv(filename).to_dict(orient="records")
    finally:
        os.remove(filename)

    assert result1 == result2
    assert data == {"values": output}
