# encoding: utf-8

"""Provides the CrunchCube class.

CrunchCube is the main API class for manipulating Crunch.io JSON cube
responses.
"""

from __future__ import division

import json
import warnings

import numpy as np

from cr.cube.cube_slice import CubeSlice
from cr.cube.legacy_dimension import AllDimensions
from cr.cube.enum import DIMENSION_TYPE as DT
from cr.cube.measures.index import Index
from cr.cube.measures.scale_means import ScaleMeans
from cr.cube.util import lazyproperty

np.seterr(divide="ignore", invalid="ignore")


class CrunchCube(object):
    """The main API object for manipulating Crunch.io cube-responses.

    This object provides the main API methods for working with cubes. The
    main API functions are:

    * `as_array()`
    * `margin()`
    * `proportions()`
    * `percentages()`

    These methods return statistical information of interest from a JSON
    cube response.

    Crunch Cubes contain richer metadata than standard Python objects and
    also conceal certain complexity in the data structures from the user.
    In particular, multiple-response variables are generally represented as
    single dimensions in result tables, but in the actual data, they may
    comprise of two dimensions. These methods (API) understand the subtleties
    in the Crunch data types, and correctly compute margins and percentages
    from them.
    """

    def __init__(self, response):
        """Initializes the CrunchCube class with the cube JSON response.

        Class can be initialized with both JSON string, and dict types.
        There's no real parsing of the data at the point of initialization,
        just storing. The functionality is implemented in the
        appropriate API functions.

        @response: Represents the cube response object, as generated by the
            zz9d cube factory. Cube can come in as a JSON or as a dictionary,
            So we need to check its type, and convert it to a dictionary if
            it's JSON, if possible.
        """
        self._cube_response_arg = response

    def __repr__(self):
        """Provide text representation suitable for working at console.

        Falls back to a default repr on exception, such as might occur in
        unit tests where object need not otherwise be provided with all
        instance variable values.
        """
        try:
            dimensionality = " x ".join(dt.name for dt in self.dim_types)
            return "%s(name='%s', dim_types='%s')" % (
                type(self).__name__,
                self.name,
                dimensionality,
            )
        except Exception:
            return super(CrunchCube, self).__repr__()

    def as_array(
        self,
        include_missing=False,
        weighted=True,
        include_transforms_for_dims=None,
        prune=False,
    ):
        """Return `ndarray` representing cube values.

        Returns the tabular representation of the crunch cube. The returned
        array has the same number of dimensions as the cube. E.g. for
        a cross-tab representation of a categorical and numerical variable,
        the resulting cube will have two dimensions.

        *include_missing* (bool): Include rows/cols for missing values.

        Example 1 (Categorical x Categorical)::

            >>> cube = CrunchCube(response)
            >>> cube.as_array()
            np.array([
                [5, 2],
                [5, 3],
            ])

        Example 2 (Categorical x Categorical, include missing values)::

            >>> cube = CrunchCube(response)
            >>> cube.as_array(include_missing=True)
            np.array([
                [5, 3, 2, 0],
                [5, 2, 3, 0],
                [0, 0, 0, 0],
            ])
        """
        array = self._as_array(
            include_missing=include_missing,
            weighted=weighted,
            include_transforms_for_dims=include_transforms_for_dims,
        )

        # ---prune array if pruning was requested---
        if prune:
            array = self._prune_body(array, transforms=include_transforms_for_dims)

        return self._drop_mr_cat_dims(array)

    @lazyproperty
    def can_compare_pairwise(self):
        return all(slice_.can_compare_pairwise for slice_ in self.slices)

    def count(self, weighted=True):
        """Return numberic count of rows considered for cube response."""
        return self._measures.weighted_n if weighted else self._measures.unweighted_n

    @lazyproperty
    def description(self):
        """Return the description of the cube."""
        if not self.dimensions:
            return None
        return self.dimensions[0].description

    @lazyproperty
    def dim_types(self):
        """Tuple of DIMENSION_TYPE member for each dimension of cube."""
        return tuple(d.dimension_type for d in self.dimensions)

    @lazyproperty
    def dimensions(self):
        """_ApparentDimension object providing access to visible dimensions.

        A cube involving a multiple-response (MR) variable has two dimensions
        for that variable (subvariables and categories dimensions), but is
        "collapsed" into a single effective dimension for cube-user purposes
        (its categories dimension is supressed). This collection will contain
        a single dimension for each MR variable and therefore may have fewer
        dimensions than appear in the cube response.
        """
        return self._all_dimensions.apparent_dimensions

    @lazyproperty
    def filter_annotation(self):
        """Get cube's filter annotation."""
        return self._cube_dict.get("filter_names", [])

    def get_slices(self, ca_as_0th=False):
        """Return list of :class:`.CubeSlice` objects.

        The number of slice objects in the returned list depends on the
        dimensionality of this cube. A 1D or 2D cube will return a list
        containing one slice object. A 3D cube will return a list of slices
        the same length as the first dimension.
        """
        if self.ndim < 3 and not ca_as_0th:
            return [CubeSlice(self, 0)]

        return [CubeSlice(self, i, ca_as_0th) for i, _ in enumerate(self.labels()[0])]

    @lazyproperty
    def has_means(self):
        """True if cube includes a means measure."""
        return self._measures.means is not None

    @lazyproperty
    def has_mr(self):
        """True if this cube has a multiple-response dimension."""
        return self.mr_dim_ind is not None

    def index(self, weighted=True, prune=False):
        """Return cube index measurement.

        This function is deprecated. Use index_table from CubeSlice.
        """
        warnings.warn(
            "CrunchCube.index() is deprecated. Use CubeSlice.index_table().",
            DeprecationWarning,
        )
        return Index.data(self, weighted, prune)

    def inserted_hs_indices(self, prune=False):
        """Get indices of the inserted H&S (for formatting purposes)."""
        if self.ndim == 2 and prune:
            # If pruning is applied, we need to subtract from the H&S indes
            # the number of pruned rows (cols) that come before that index.
            pruning_bases = [self._pruning_base(axis=i, hs_dims=[0, 1]) for i in [1, 0]]
            pruning_bases = [
                base if base.ndim == 1 else np.sum(base, axis=(1 - i))
                for i, base in enumerate(pruning_bases)
            ]
            # Obtain prune indices as subscripts
            prune_indices_list = [
                np.arange(len(base))[np.logical_or(base == 0, np.isnan(base))]
                for base in pruning_bases
            ]
            inserted_indices_list = [dim.inserted_hs_indices for dim in self.dimensions]
            return self._adjust_inserted_indices(
                inserted_indices_list, prune_indices_list
            )

        return [dim.inserted_hs_indices for dim in self.dimensions]

    @lazyproperty
    def is_double_mr(self):
        """Check if cube has 2 MR dimensions."""
        return True if isinstance(self.mr_dim_ind, tuple) else False

    @lazyproperty
    def is_univariate_ca(self):
        """True if cube only contains a CA dimension-pair, in either order."""
        return self.ndim == 2 and set(self.dim_types) == {DT.CA_SUBVAR, DT.CA_CAT}

    @lazyproperty
    def is_weighted(self):
        """True if cube response contains weighted data."""
        return self._measures.is_weighted

    def labels(self, include_missing=False, include_transforms_for_dims=False):
        """Gets labels for each cube's dimension.

        Args
            include_missing (bool): Include labels for missing values

        Returns
            labels (list of lists): Labels for each dimension
        """
        return [
            dim.labels(include_missing, include_transforms_for_dims)
            for dim in self.dimensions
        ]

    def margin(
        self,
        axis=None,
        weighted=True,
        include_missing=False,
        include_transforms_for_dims=None,
        prune=False,
        include_mr_cat=False,
    ):
        """Get margin for the selected axis.

        the selected axis. For MR variables, this is the sum of the selected
        and non-selected slices.

        Args
            axis (int): Axis across the margin is calculated. If no axis is
                        provided the margin is calculated across all axis.
                        For Categoricals, Num, Datetime, and Text, this
                        translates to sumation of all elements.
        Returns
            Calculated margin for the selected axis

        Example 1:
            >>> cube = CrunchCube(fixt_cat_x_cat)
            np.array([
               [5, 2],
               [5, 3],
            ])

            >>> cube.margin(axis=0)
            np.array([10, 5])

        Example 2:
            >>> cube = CrunchCube(fixt_cat_x_num_x_datetime)
            np.array([
                [[1, 1],
                 [0, 0],
                 [0, 0],
                 [0, 0]],
                [[2, 1],
                 [1, 1],
                 [0, 0],
                 [0, 0]],
                [[0, 0],
                 [2, 3],
                 [0, 0],
                 [0, 0]],
                [[0, 0],
                 [0, 0],
                 [3, 2],
                 [0, 0]],
                [[0, 0],
                 [0, 0],
                 [1, 1],
                 [0, 1]]
            ])

            >>> cube.margin(axis=0)
            np.array([
                [3, 2],
                [3, 4],
                [4, 3],
                [0, 1],
            ])
        """

        table = self._counts(weighted).raw_cube_array
        new_axis = self._adjust_axis(axis)
        index = tuple(
            None if i in new_axis else slice(None) for i, _ in enumerate(table.shape)
        )

        # Calculate denominator. Only include those H&S dimensions, across
        # which we DON'T sum. These H&S are needed because of the shape, when
        # dividing. Those across dims which are summed across MUST NOT be
        # included, because they would change the result.
        hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis)
        den = self._apply_subtotals(
            self._apply_missings(table, include_missing=include_missing), hs_dims
        )

        # Apply correct mask (based on the as_array shape)
        arr = self._as_array(
            include_transforms_for_dims=hs_dims, include_missing=include_missing
        )

        # ---prune array if pruning was requested---
        if prune:
            arr = self._prune_body(arr, transforms=hs_dims)

        arr = self._drop_mr_cat_dims(arr, fix_valids=include_missing)

        if isinstance(arr, np.ma.core.MaskedArray):
            # Inflate the reduced version of the array, to match the
            # non-reduced version, for the purposes of creating the correct
            # mask. Create additional dimension (with no elements) where MR_CAT
            # dimensions should be. Don't inflate 0th dimension if it has only
            # a single element, because it's not being reduced
            # in self._drop_mr_cat_dims
            inflate_ind = tuple(
                (
                    None
                    if (
                        d.dimension_type == DT.MR_CAT
                        or i != 0
                        and (n <= 1 or len(d.valid_elements) <= 1)
                    )
                    else slice(None)
                )
                for i, (d, n) in enumerate(zip(self._all_dimensions, table.shape))
            )
            mask = np.logical_or(np.zeros(den.shape, dtype=bool), arr.mask[inflate_ind])
            den = np.ma.masked_array(den, mask)

        if (
            self.ndim != 1
            or axis is None
            or axis == 0
            and len(self._all_dimensions) == 1
        ):
            # Special case for 1D cube wigh MR, for "Table" direction
            den = np.sum(den, axis=new_axis)[index]

        den = self._drop_mr_cat_dims(
            den, fix_valids=(include_missing or include_mr_cat)
        )
        if den.shape[0] == 1 and len(den.shape) > 1 and self.ndim < 3:
            den = den.reshape(den.shape[1:])
        return den

    @lazyproperty
    def missing(self):
        """Get missing count of a cube."""
        return self._measures.missing_count

    @lazyproperty
    def mr_dim_ind(self):
        """Return int, tuple of int, or None, representing MR indices.

        The return value represents the index of each multiple-response (MR)
        dimension in this cube. Return value is None if there are no MR
        dimensions, and int if there is one MR dimension, and a tuple of int
        when there are more than one. The index is the (zero-based) position
        of the MR dimensions in the _ApparentDimensions sequence returned by
        the :attr"`.dimensions` property.
        """
        # TODO: rename to `mr_dim_idxs` or better yet get rid of need for
        # this as it's really a cube internal characteristic.
        # TODO: Make this return a tuple in all cases, like (), (1,), or (0, 2).
        indices = tuple(
            idx
            for idx, d in enumerate(self.dimensions)
            if d.dimension_type == DT.MR_SUBVAR
        )
        if indices == ():
            return None
        if len(indices) == 1:
            return indices[0]
        return indices

    @lazyproperty
    def name(self):
        """Return the name of the cube.

        If the cube has 2 diensions, return the name of the second one. In case
        of a different number of dimensions, default to returning the name of
        the last one. In case of no dimensions, return the empty string.
        """
        if not self.dimensions:
            return None
        return self.dimensions[0].name

    @lazyproperty
    def ndim(self):
        """int count of dimensions for this cube."""
        return len(self.dimensions)

    def percentages(self, axis=None):
        """Get the percentages for crunch cube values.

        This function calculates the percentages for crunch cube values. The
        percentages are based on the values of the 'proportions'.

        Args
            axis (int): Base axis of percentages calculation. If no axis is
                        provided, calculations are done accros entire table.

        Returns
            (nparray): Calculated array of crunch cube percentages.

        Example 1:
            >>> cube = CrunchCube(fixt_cat_x_cat)
            np.array([
               [5, 2],
               [5, 3],
            ])

            >>> cube.percentages()
            np.array([
                [33.33333, 13.33333],
                [33.33333, 20.00000],
            ])

        Example 2:
            >>> cube = CrunchCube(fixt_cat_x_cat)
            np.array([
               [5, 2],
               [5, 3],
            ])

            >>> cube.percentages(axis=0)
            np.array([
                [50., 40.],
                [50., 60.],
            ])
        """
        return self.proportions(axis) * 100

    def population_counts(
        self,
        population_size,
        weighted=True,
        include_missing=False,
        include_transforms_for_dims=None,
        prune=False,
    ):
        """Return counts scaled in proportion to overall population.

        The return value is a numpy.ndarray object. Count values are scaled
        proportionally to approximate their value if the entire population
        had been sampled. This calculation is based on the estimated size of
        the population provided as *population size*. The remaining arguments
        have the same meaning as they do for the `.proportions()` method.

        Example::

            >>> cube = CrunchCube(fixt_cat_x_cat)
            >>> cube.as_array()
            np.array([
               [5, 2],
               [5, 3],
            ])
            >>> cube.population_counts(9000)
            np.array([
                [3000, 1200],
                [3000, 1800],
            ])
        """
        population_counts = [
            slice_.population_counts(
                population_size,
                weighted=weighted,
                include_missing=include_missing,
                include_transforms_for_dims=include_transforms_for_dims,
                prune=prune,
            )
            for slice_ in self.slices
        ]

        if len(population_counts) > 1:
            return np.array(population_counts)
        return population_counts[0]

    @lazyproperty
    def population_fraction(self):
        """The filtered/unfiltered ratio for cube response.

        This value is required for properly calculating population on a cube
        where a filter has been applied. Returns 1.0 for an unfiltered cube.
        Returns `np.nan` if the unfiltered count is zero, which would
        otherwise result in a divide-by-zero error.
        """
        return self._measures.population_fraction

    def proportions(
        self,
        axis=None,
        weighted=True,
        include_transforms_for_dims=None,
        include_mr_cat=False,
        prune=False,
    ):
        """Return percentage values for cube as `numpy.ndarray`.

        This function calculates the proportions across the selected axis
        of a crunch cube. For most variable types, it means the value divided
        by the margin value. For a multiple-response variable, the value is
        divided by the sum of selected and non-selected slices.

        *axis* (int): base axis of proportions calculation. If no axis is
        provided, calculations are done across the entire table.

        *weighted* (bool): Specifies weighted or non-weighted proportions.

        *include_transforms_for_dims* (list): Also include headings and
        subtotals transformations for the provided dimensions. If the
        dimensions have the transformations, they'll be included in the
        resulting numpy array. If the dimensions don't have the
        transformations, nothing will happen (the result will be the same as
        if the argument weren't provided).

        *include_transforms_for_dims* (list): Include headers and subtotals
        (H&S) across various dimensions. The dimensions are provided as list
        elements. For example: "include_transforms_for_dims=[0, 1]" instructs
        the CrunchCube to return H&S for both rows and columns (if it's a 2D
        cube).

        *include_mr_cat* (bool): Include MR categories.

        *prune* (bool): Instructs the CrunchCube to prune empty rows/cols.
        Emptiness is determined by the state of the margin (if it's either
        0 or nan at certain index). If it is, the corresponding row/col is
        not included in the result.

        Example 1::

            >>> cube = CrunchCube(fixt_cat_x_cat)
            np.array([
               [5, 2],
               [5, 3],
            ])

            >>> cube.proportions()
            np.array([
                [0.3333333, 0.1333333],
                [0.3333333, 0.2000000],
            ])

        Example 2::

            >>> cube = CrunchCube(fixt_cat_x_cat)
            np.array([
               [5, 2],
               [5, 3],
            ])

            >>> cube.proportions(axis=0)
            np.array([
                [0.5, 0.4],
                [0.5, 0.6],
            ])
        """

        # Calculate numerator from table (include all H&S dimensions).
        table = self._measure(weighted).raw_cube_array
        num = self._apply_subtotals(
            self._apply_missings(table), include_transforms_for_dims
        )

        proportions = num / self._denominator(
            weighted, include_transforms_for_dims, axis
        )
        if not include_mr_cat:
            proportions = self._drop_mr_cat_dims(proportions)

        # Apply correct mask (based on the as_array shape)
        arr = self.as_array(
            prune=prune, include_transforms_for_dims=include_transforms_for_dims
        )
        if isinstance(arr, np.ma.core.MaskedArray):
            proportions = np.ma.masked_array(proportions, arr.mask)

        return proportions

    def _denominator(self, weighted, include_transforms_for_dims, axis):
        """Calculate denominator for percentages.

        Only include those H&S dimensions, across which we DON'T sum. These H&S
        are needed because of the shape, when dividing. Those across dims
        which are summed across MUST NOT be included, because they would
        change the result."""

        table = self._measure(weighted).raw_cube_array
        new_axis = self._adjust_axis(axis)
        index = tuple(
            None if i in new_axis else slice(None) for i, _ in enumerate(table.shape)
        )
        hs_dims = self._hs_dims_for_den(include_transforms_for_dims, axis)
        den = self._apply_subtotals(self._apply_missings(table), hs_dims)
        return np.sum(den, axis=new_axis)[index]

    def pvals(self, weighted=True, prune=False, hs_dims=None):
        """Return ndarray with calculated p-vals.

        This function calculates statistically significant results for
        categorical contingency tables. The values are calculated for 2D tables
        only. For 3D cubes, the slices' results are stacked together and
        returned as an ndarray.

        :param weighted: Use weighted counts for zscores
        :param prune: Prune based on unweighted counts
        :param hs_dims: Include headers and subtotals (as NaN values)
        :returns: 2 or 3 Dimensional ndarray, representing the p-values for each
                  cell of the table-like representation of the crunch cube.
        """
        res = [s.pvals(weighted, prune, hs_dims) for s in self.slices]
        return np.array(res) if self.ndim == 3 else res[0]

    @lazyproperty
    def row_direction_axis(self):
        # when dealing with 1D MR cubes, axis for margin should be 0
        if self.ndim == 1 and self.has_mr:
            return 0
        elif self.ndim == 3:
            return 2
        return 1

    def scale_means(self, hs_dims=None, prune=False):
        """Get cube means."""
        slices_means = [ScaleMeans(slice_).data for slice_ in self.slices]

        if hs_dims and self.ndim > 1:
            # Intersperse scale means with nans if H&S specified, and 2D. No
            # need to modify 1D, as only one mean will ever be inserted.
            inserted_indices = self.inserted_hs_indices()[-2:]
            for scale_means in slices_means:
                # Scale means 0 corresonds to the column dimension (is
                # calculated by using its values). The result of it, however,
                # is a row. That's why we need to check the insertions on the
                # row dim (inserted columns).
                if scale_means[0] is not None and 1 in hs_dims and inserted_indices[1]:
                    for i in inserted_indices[1]:
                        scale_means[0] = np.insert(scale_means[0], i, np.nan)
                # Scale means 1 is a column, so we need to check
                # for row insertions.
                if scale_means[1] is not None and 0 in hs_dims and inserted_indices[0]:
                    for i in inserted_indices[0]:
                        scale_means[1] = np.insert(scale_means[1], i, np.nan)

        if prune:
            # Apply pruning
            arr = self.as_array(include_transforms_for_dims=hs_dims, prune=True)
            if isinstance(arr, np.ma.core.MaskedArray):
                mask = arr.mask
                for i, scale_means in enumerate(slices_means):
                    if scale_means[0] is not None:
                        row_mask = (
                            mask.all(axis=0) if self.ndim < 3 else mask.all(axis=1)[i]
                        )
                        scale_means[0] = scale_means[0][~row_mask]
                    if self.ndim > 1 and scale_means[1] is not None:
                        col_mask = (
                            mask.all(axis=1) if self.ndim < 3 else mask.all(axis=2)[i]
                        )
                        scale_means[1] = scale_means[1][~col_mask]
        return slices_means

    @lazyproperty
    def slices(self):
        return self.get_slices()

    @lazyproperty
    def univariate_ca_main_axis(self):
        """For univariate CA, the main axis is the categorical axis"""
        return self.dim_types.index(DT.CA_CAT)

    def zscore(self, weighted=True, prune=False, hs_dims=None):
        """Return ndarray with cube's zscore measurements.

        Zscore is a measure of statistical significance of observed vs.
        expected counts. It's only applicable to a 2D contingency tables.
        For 3D cubes, the measures of separate slices are stacked together
        and returned as the result.

        :param weighted: Use weighted counts for zscores
        :param prune: Prune based on unweighted counts
        :param hs_dims: Include headers and subtotals (as NaN values)
        :returns zscore: ndarray representing zscore measurements
        """
        res = [s.zscore(weighted, prune, hs_dims) for s in self.slices]
        return np.array(res) if self.ndim == 3 else res[0]

    def wishart_pairwise_pvals(self, axis=0):
        """Return matrices of column-comparison p-values as list of numpy.ndarrays.

        Square, symmetric matrix along *axis* of pairwise p-values for the
        null hypothesis that col[i] = col[j] for each pair of columns.

        *axis* (int): axis along which to perform comparison. Only columns (0)
        are implemented currently.
        """
        return [slice_.wishart_pairwise_pvals(axis=axis) for slice_ in self.slices]

    def compare_to_column(self, slice_idx=0, column_idx=0):
        """Return matrices of column-comparison p-values as list of numpy.ndarrays.

        *t*-statistic and associated p-values for one column of one slice
        compared to each of the other columns in that slice.
        See scipy.stats.ttest_ind_from_stats.

        (It does not make sense to compare the same column at once across slices.)

        *slice_idx* (int): Index of slice for which we perform comparisons
        *column_idx* (int): Index of column to compare to, by default 0 (the first)
        """
        return self.slices[slice_idx].pairwise_significance_tests(column_idx)

    def _adjust_axis(self, axis):
        """Return raw axis/axes corresponding to apparent axis/axes.

        This method adjusts user provided 'axis' parameter, for some of the
        cube operations, mainly 'margin'. The user never sees the MR selections
        dimension, and treats all MRs as single dimensions. Thus we need to
        adjust the values of axis (to sum across) to what the user would've
        specified if he were aware of the existence of the MR selections
        dimension. The reason for this adjustment is that all of the operations
        performed troughout the margin calculations will be carried on an
        internal array, containing all the data (together with all selections).

        For more info on how it needs to operate, check the unit tests.
        """
        if not self._is_axis_allowed(axis):
            ca_error_msg = "Direction {} not allowed (items dimension)"
            raise ValueError(ca_error_msg.format(axis))

        if isinstance(axis, int):
            # If single axis was provided, create a list out of it, so that
            # we can do the subsequent iteration.
            axis = list([axis])
        elif axis is None:
            # If axis was None, create what user would expect in terms of
            # finding out the Total(s). In case of 2D cube, this will be the
            # axis of all the dimensions that the user can see, that is (0, 1),
            # because the selections dimension is invisible to the user. In
            # case of 3D cube, this will be the "total" across each slice, so
            # we need to drop the 0th dimension, and only take last two (1, 2).
            axis = range(self.ndim)[-2:]
        else:
            # In case of a tuple, just keep it as a list.
            axis = list(axis)
        axis = np.array(axis)

        # Create new array for storing updated values of axis. It's necessary
        # because it's hard to update the values in place.
        new_axis = np.array(axis)

        # Iterate over user-visible dimensions, and update axis when MR is
        # detected. For each detected MR, we need to increment all subsequent
        # axis (that were provided by the user). But we don't need to update
        # the axis that are "behind" the current MR.
        for i, dim in enumerate(self.dimensions):
            if dim.dimension_type == DT.MR_SUBVAR:
                # This formula updates only the axis that come "after" the
                # current MR (items) dimension.
                new_axis[axis >= i] += 1

        return tuple(new_axis)

    @staticmethod
    def _adjust_inserted_indices(inserted_indices_list, prune_indices_list):
        """Adjust inserted indices, if there are pruned elements."""
        # Created a copy, to preserve cached property
        updated_inserted = [[i for i in dim_inds] for dim_inds in inserted_indices_list]
        pruned_and_inserted = zip(prune_indices_list, updated_inserted)
        for prune_inds, inserted_inds in pruned_and_inserted:
            # Only prune indices if they're not H&S (inserted)
            prune_inds = prune_inds[~np.in1d(prune_inds, inserted_inds)]
            for i, ind in enumerate(inserted_inds):
                ind -= np.sum(prune_inds < ind)
                inserted_inds[i] = ind
        return updated_inserted

    @lazyproperty
    def _all_dimensions(self):
        """The AllDimensions object for this cube.

        The AllDimensions object provides access to all the dimensions
        appearing in the cube response, not only apparent dimensions (those
        that appear to a user). It also provides access to
        an _ApparentDimensions object which contains only those user-apparent
        dimensions (basically the categories dimension of each MR
        dimension-pair is suppressed).
        """
        return AllDimensions(dimension_dicts=self._cube_dict["result"]["dimensions"])

    def _apply_missings(self, res, include_missing=False):
        """Return ndarray with missing and insertions as specified.

        The return value is the result of the following operations on *res*,
        which is a raw cube value array (raw meaning it has shape of original
        cube response).

        * Remove vectors (rows/cols) for missing elements if *include_missin*
          is False.


        Note that it does *not* include pruning.
        """
        # --element idxs that satisfy `include_missing` arg. Note this
        # --includes MR_CAT elements so is essentially all-or-valid-elements
        element_idxs = tuple(
            (
                d.all_elements.element_idxs
                if include_missing
                else d.valid_elements.element_idxs
            )
            for d in self._all_dimensions
        )
        return res[np.ix_(*element_idxs)] if element_idxs else res

    def _apply_subtotals(self, res, include_transforms_for_dims):
        """* Insert subtotals (and perhaps other insertions later) for
          dimensions having their apparent dimension-idx in
          *include_transforms_for_dims*.
        """
        if not include_transforms_for_dims:
            return res

        suppressed_dim_count = 0
        for (dim_idx, dim) in enumerate(self._all_dimensions):
            if dim.dimension_type == DT.MR_CAT:
                suppressed_dim_count += 1
            # ---only marginable dimensions can be subtotaled---
            if not dim.is_marginable:
                continue
            apparent_dim_idx = dim_idx - suppressed_dim_count
            transform = (
                dim.has_transforms and apparent_dim_idx in include_transforms_for_dims
            )
            if not transform:
                continue
            # ---insert subtotals into result array---
            insertions = self._insertions(res, dim, dim_idx)
            res = self._update_result(res, insertions, dim_idx)

        return res

    def _as_array(
        self,
        include_missing=False,
        get_non_selected=False,
        weighted=True,
        include_transforms_for_dims=False,
    ):
        """Get crunch cube as ndarray.

        Args
            include_missing (bool): Include rows/cols for missing values.
            get_non_selected (bool): Get non-selected slices for MR vars.
            weighted (bool): Take weighted or unweighted counts.
            include_transforms_for_dims (list): For which dims to
                include headings & subtotals (H&S) transformations.
        Returns
            res (ndarray): Tabular representation of crunch cube
        """
        return self._apply_subtotals(
            self._apply_missings(
                self._measure(weighted).raw_cube_array, include_missing=include_missing
            ),
            include_transforms_for_dims,
        )

    @classmethod
    def _calculate_constraints_sum(cls, prop_table, prop_margin, axis):
        """Calculate sum of constraints (part of the standard error equation).

        This method calculates the sum of the cell proportions multiplied by
        row (or column) marginal proportions (margins divide by the total
        count). It does this by utilizing the matrix multiplication, which
        directly translates to the mathematical definition (the sum
        across i and j indices).
        """
        if axis not in [0, 1]:
            raise ValueError("Unexpected value for `axis`: {}".format(axis))

        V = prop_table * (1 - prop_table)
        if axis == 0:
            # If axis is 0, sumation is performed across the 'i' index, which
            # requires the matrix to be multiplied from the right
            # (because of the inner matrix dimensions).
            return np.dot(V, prop_margin)
        elif axis == 1:
            # If axis is 1, sumation is performed across the 'j' index, which
            # requires the matrix to be multiplied from the left
            # (because of the inner matrix dimensions).
            return np.dot(prop_margin, V)

    @lazyproperty
    def _col_direction_axis(self):
        return self.ndim - 2

    def _counts(self, weighted):
        """Return _BaseMeasure subclass for *weighted* counts.

        The return value is a _WeightedCountMeasure object if *weighted* is
        True and the cube response is weighted. Otherwise it is an
        _UnweightedCountMeasure object. Any means measure that may be present
        is not considered. Contrast with `._measure()` below.
        """
        return (
            self._measures.weighted_counts
            if weighted
            else self._measures.unweighted_counts
        )

    @staticmethod
    def _create_mask(res, row_prune_inds, col_prune_inds):
        mask_rows = np.repeat(row_prune_inds[:, None], len(col_prune_inds), axis=1)
        mask_cols = np.repeat(col_prune_inds[None, :], len(row_prune_inds), axis=0)
        return np.logical_or(mask_rows, mask_cols)

    @lazyproperty
    def _cube_dict(self):
        """dict containing raw cube response, parsed from JSON payload."""
        try:
            cube_response = self._cube_response_arg
            # ---parse JSON to a dict when constructed with JSON---
            cube_dict = (
                cube_response
                if isinstance(cube_response, dict)
                else json.loads(cube_response)
            )
            # ---cube is 'value' item in a shoji response---
            return cube_dict.get("value", cube_dict)
        except TypeError:
            raise TypeError(
                "Unsupported type <%s> provided. Cube response must be JSON "
                "(str) or dict." % type(self._cube_response_arg).__name__
            )

    def _drop_mr_cat_dims(self, array, fix_valids=False):
        """Return ndarray reflecting *array* with MR_CAT dims dropped.

        If any (except 1st) dimension has a single element, it is
        flattened in the resulting array (which is more convenient for the
        users of the CrunchCube).

        If the original shape of the cube is needed (e.g. to calculate the
        margins with correct axis arguments), this needs to happen before the
        call to this method '_drop_mr_cat_dims'.
        """
        # TODO: We cannot arbitrarily drop any dimension simply because it
        # has a length (shape) of 1. We must target MR_CAT dimensions
        # specifically. Otherwise unexpected results can occur based on
        # accidents of cube category count etc. If "user-friendly" reshaping
        # needs be done, it should be as a very last step and much safer to
        # leave that to the cr.cube client; software being "helpful" almost
        # never is.

        if not array.shape or len(array.shape) != len(self._all_dimensions):
            # This condition covers two cases:
            # 1. In case of no dimensions, the shape of the array is empty
            # 2. If the shape was already fixed, we don't need to fix it again.
            # This might happen while constructing the masked arrays. In case
            # of MR, we will have the selections dimension included thoughout
            # the calculations, and will only remove it before returning the
            # result to the user.
            return array

        # We keep MR selections (MR_CAT) dimensions in the array, all the way
        # up to here. At this point, we need to remove the non-selected part of
        # selections dimension (and subsequently purge the dimension itself).

        display_ind = (
            tuple(
                0 if dim.dimension_type == DT.MR_CAT else slice(None)
                for dim, n in zip(self._all_dimensions, array.shape)
            )
            if not fix_valids
            else np.ix_(
                *[
                    dim.valid_elements.element_idxs if n > 1 else [0]
                    for dim, n in zip(self._all_dimensions, array.shape)
                ]
            )
        )
        array = array[display_ind]

        # If a first dimension only has one element, we don't want to
        # remove it from the shape. Hence the i == 0 part. For other dimensions
        # that have one element, it means that these are the remnants of the MR
        # selections, which we don't need as separate dimensions.
        new_shape = [
            length for (i, length) in enumerate(array.shape) if length != 1 or i == 0
        ]
        return array.reshape(new_shape)

    @classmethod
    def _fix_valid_indices(cls, valid_indices, insertion_index, dim):
        """Add indices for H&S inserted elements."""
        # TODO: make this accept an immutable sequence for valid_indices
        # (a tuple) and return an immutable sequence rather than mutating an
        # argument.
        indices = np.array(sorted(valid_indices[dim]))
        slice_index = np.sum(indices <= insertion_index)
        indices[slice_index:] += 1
        indices = np.insert(indices, slice_index, insertion_index + 1)
        valid_indices[dim] = indices.tolist()
        return valid_indices

    @staticmethod
    def _hs_dims_for_den(hs_dims, axis):
        if axis is None or hs_dims is None:
            return None
        if isinstance(axis, int):
            axis = [axis]
        return [dim for dim in hs_dims if dim not in axis]

    def _inserted_dim_inds(self, transform_dims, axis):
        dim_ind = axis if self.ndim < 3 else axis + 1
        if not transform_dims or dim_ind not in transform_dims:
            return np.array([])

        inserted_inds = self.inserted_hs_indices()
        return np.array(inserted_inds[dim_ind] if len(inserted_inds) else [])

    def _insertions(self, result, dimension, dimension_index):
        """Return list of (idx, sum) pairs representing subtotals.

        *idx* is the int offset at which to insert the ndarray subtotal
        in *sum*.
        """

        def iter_insertions():
            for anchor_idx, addend_idxs in dimension.hs_indices:
                insertion_idx = (
                    -1
                    if anchor_idx == "top"
                    else result.shape[dimension_index] - 1
                    if anchor_idx == "bottom"
                    else anchor_idx
                )
                addend_fancy_idx = tuple(
                    [slice(None) for _ in range(dimension_index)]
                    + [np.array(addend_idxs)]
                )
                yield (
                    insertion_idx,
                    np.sum(result[addend_fancy_idx], axis=dimension_index),
                )

        return [insertion for insertion in iter_insertions()]

    def _is_axis_allowed(self, axis):
        """Check if axis are allowed.

        In case the calculation is requested over CA items dimension, it is not
        valid. It's valid in all other cases.
        """
        if axis is None:
            # If table direction was requested, we must ensure that each slice
            # doesn't have the CA items dimension (thus the [-2:] part). It's
            # OK for the 0th dimension to be items, since no calculation is
            # performed over it.
            if DT.CA_SUBVAR in self.dim_types[-2:]:
                return False
            return True

        if isinstance(axis, int):
            if self.ndim == 1 and axis == 1:
                # Special allowed case of a 1D cube, where "row"
                # directions is requested.
                return True
            axis = [axis]

        # ---axis is a tuple---
        for dim_idx in axis:
            if self.dim_types[dim_idx] == DT.CA_SUBVAR:
                # If any of the directions explicitly asked for directly
                # corresponds to the CA items dimension, the requested
                # calculation is not valid.
                return False

        return True

    @staticmethod
    def _margin_pruned_indices(margin, inserted_ind, axis):
        pruned_ind = np.logical_or(margin == 0, np.isnan(margin))

        if pruned_ind.ndim == 0:
            pruned_ind = np.array([pruned_ind])

        if np.any(margin) and inserted_ind is not None and any(inserted_ind):
            ind_inserted = np.zeros(pruned_ind.shape, dtype=bool)
            if len(pruned_ind.shape) == 2 and axis == 1:
                ind_inserted[:, inserted_ind] = True
            else:
                ind_inserted[inserted_ind] = True
            pruned_ind = np.logical_and(pruned_ind, ~ind_inserted)

        return pruned_ind

    def _measure(self, weighted):
        """_BaseMeasure subclass representing primary measure for this cube.

        If the cube response includes a means measure, the return value is
        means. Otherwise it is counts, with the choice between weighted or
        unweighted determined by *weighted*.

        Note that weighted counts are provided on an "as-available" basis.
        When *weighted* is True and the cube response is not weighted,
        unweighted counts are returned.
        """
        return (
            self._measures.means
            if self._measures.means is not None
            else self._measures.weighted_counts
            if weighted
            else self._measures.unweighted_counts
        )

    @lazyproperty
    def _measures(self):
        """_Measures object for this cube.

        Provides access to unweighted counts, and weighted counts and/or means
        when available.
        """
        return _Measures(self._cube_dict, self._all_dimensions)

    def _prune_3d_body(self, res, transforms):
        """Return masked array where mask indicates pruned vectors.

        *res* is an ndarray (result). *transforms* is a list of ...
        """
        mask = np.zeros(res.shape)
        mr_dim_idxs = self.mr_dim_ind

        for i, prune_inds in enumerate(self.prune_indices(transforms)):
            rows_pruned = prune_inds[0]
            cols_pruned = prune_inds[1]
            rows_pruned = np.repeat(rows_pruned[:, None], len(cols_pruned), axis=1)
            cols_pruned = np.repeat(cols_pruned[None, :], len(rows_pruned), axis=0)
            slice_mask = np.logical_or(rows_pruned, cols_pruned)

            # In case of MRs we need to "inflate" mask
            if mr_dim_idxs == (1, 2):
                slice_mask = slice_mask[:, np.newaxis, :, np.newaxis]
            elif mr_dim_idxs == (0, 1):
                slice_mask = slice_mask[np.newaxis, :, np.newaxis, :]
            elif mr_dim_idxs == (0, 2):
                slice_mask = slice_mask[np.newaxis, :, :, np.newaxis]
            elif mr_dim_idxs == 1 and self.ndim == 3:
                slice_mask = slice_mask[:, np.newaxis, :]
            elif mr_dim_idxs == 2 and self.ndim == 3:
                slice_mask = slice_mask[:, :, np.newaxis]

            mask[i] = slice_mask

        res = np.ma.masked_array(res, mask=mask)
        return res

    def _prune_3d_indices(self, transforms):
        row_margin = self._pruning_base(
            hs_dims=transforms, axis=self.row_direction_axis
        )
        col_margin = self._pruning_base(
            hs_dims=transforms, axis=self._col_direction_axis
        )
        return [
            self._prune_indices_tuple(rm, cm, transforms)
            for rm, cm in zip(row_margin, col_margin)
        ]

    def _prune_body(self, res, transforms=None):
        """Return a masked version of *res* where pruned rows/cols are masked.

        Return value is an `np.ma.MaskedArray` object. Pruning is the removal
        of rows or columns whose corresponding marginal elements are either
        0 or not defined (np.nan).
        """
        if self.ndim > 2:
            return self._prune_3d_body(res, transforms)

        res = self._drop_mr_cat_dims(res)

        # ---determine which rows should be pruned---
        row_margin = self._pruning_base(
            hs_dims=transforms, axis=self.row_direction_axis
        )
        # ---adjust special-case row-margin values---
        item_types = (DT.MR, DT.CA_SUBVAR)
        if self.ndim > 1 and self.dim_types[1] in item_types and len(res.shape) > 1:
            # ---when row-dimension has only one category it gets squashed---
            axis = 1 if res.shape[0] > 1 else None
            # ---in CAT x MR case (or if it has CA subvars) we get
            # a 2D margin (denom really)---
            row_margin = np.sum(row_margin, axis=axis)
        row_prune_inds = self._margin_pruned_indices(
            row_margin, self._inserted_dim_inds(transforms, 0), 0
        )

        # ---a 1D only has rows, so mask only with row-prune-idxs---
        if self.ndim == 1 or len(res.shape) == 1:
            # For 1D, margin is calculated as the row margin.
            return np.ma.masked_array(res, mask=row_prune_inds)

        # ---determine which columns should be pruned---
        col_margin = self._pruning_base(
            hs_dims=transforms, axis=self._col_direction_axis
        )
        if col_margin.ndim > 1:
            # In case of MR x CAT, we have 2D margin
            col_margin = np.sum(col_margin, axis=0)
        col_prune_inds = self._margin_pruned_indices(
            col_margin, self._inserted_dim_inds(transforms, 1), 1
        )

        # ---create rows x cols mask and mask the result array---
        mask = self._create_mask(res, row_prune_inds, col_prune_inds)
        res = np.ma.masked_array(res, mask=mask)

        # ---return the masked array---
        return res

    def prune_indices(self, transforms=None):
        """Return indices of pruned rows and columns as list.

        The return value has one of three possible forms:

        * a 1-element list of row indices (in case of 1D cube)
        * 2-element list of row and col indices (in case of 2D cube)
        * n-element list of tuples of 2 elements (if it's 3D cube).

        For each case, the 2 elements are the ROW and COL indices of the
        elements that need to be pruned. If it's a 3D cube, these indices are
        calculated "per slice", that is NOT on the 0th dimension (as the 0th
        dimension represents the slices).
        """
        if self.ndim >= 3:
            # In case of a 3D cube, return list of tuples
            # (of row and col pruned indices).
            return self._prune_3d_indices(transforms)

        def prune_non_3d_indices(transforms):
            row_margin = self._pruning_base(
                hs_dims=transforms, axis=self.row_direction_axis
            )
            row_indices = self._margin_pruned_indices(
                row_margin, self._inserted_dim_inds(transforms, 0), 0
            )

            if row_indices.ndim > 1:
                # In case of MR, we'd have 2D prune indices
                row_indices = row_indices.all(axis=1)

            if self.ndim == 1:
                return [row_indices]

            col_margin = self._pruning_base(
                hs_dims=transforms, axis=self._col_direction_axis
            )

            col_indices = self._margin_pruned_indices(
                col_margin, self._inserted_dim_inds(transforms, 1), 1
            )
            if col_indices.ndim > 1:
                # In case of MR, we'd have 2D prune indices
                col_indices = col_indices.all(axis=0)

            return [row_indices, col_indices]

        # In case of 1 or 2 D cubes, return a list of
        # row indices (or row and col indices)
        return prune_non_3d_indices(transforms)

    def _prune_indices_tuple(self, row_margin, column_margin, transforms):
        if row_margin.ndim > 1:
            row_margin = np.sum(row_margin, axis=1)
        if column_margin.ndim > 1:
            column_margin = np.sum(column_margin, axis=0)

        return (
            self._margin_pruned_indices(
                row_margin, self._inserted_dim_inds(transforms, 0), 0
            ),
            self._margin_pruned_indices(
                column_margin, self._inserted_dim_inds(transforms, 1), 1
            ),
        )

    def _pruning_base(self, axis=None, hs_dims=None):
        """Gets margin if across CAT dimension. Gets counts if across items.

        Categorical variables are pruned based on their marginal values. If the
        marginal is a 0 or a NaN, the corresponding row/column is pruned. In
        case of a subvars (items) dimension, we only prune if all the counts
        of the corresponding row/column are zero.
        """
        if not self._is_axis_allowed(axis):
            # In case we encountered axis that would go across items dimension,
            # we need to return at least some result, to prevent explicitly
            # checking for this condition, wherever self._margin is used
            return self.as_array(weighted=False, include_transforms_for_dims=hs_dims)

        # In case of allowed axis, just return the normal API margin. This call
        # would throw an exception when directly invoked with bad axis. This is
        # intended, because we want to be as explicit as possible. Margins
        # across items are not allowed.
        return self.margin(
            axis=axis, weighted=False, include_transforms_for_dims=hs_dims
        )

    def _update_result(self, result, insertions, dimension_index):
        """Insert subtotals into resulting ndarray."""
        for j, (ind_insertion, value) in enumerate(insertions):
            result = np.insert(
                result, ind_insertion + j + 1, value, axis=dimension_index
            )
        return result


class _Measures(object):
    """Provides access to measures contained in cube response."""

    def __init__(self, cube_dict, all_dimensions):
        self._cube_dict = cube_dict
        self._all_dimensions = all_dimensions

    @lazyproperty
    def is_weighted(self):
        """True if weights have been applied to the measure(s) for this cube.

        Unweighted counts are available for all cubes. Weighting applies to
        any other measures provided by the cube.
        """
        cube_dict = self._cube_dict
        if cube_dict.get("query", {}).get("weight") is not None:
            return True
        if cube_dict.get("weight_var") is not None:
            return True
        if cube_dict.get("weight_url") is not None:
            return True
        unweighted_counts = cube_dict["result"]["counts"]
        count_data = cube_dict["result"]["measures"].get("count", {}).get("data")
        if unweighted_counts != count_data:
            return True
        return False

    @lazyproperty
    def means(self):
        """_MeanMeasure object providing access to means values.

        None when the cube response does not contain a mean measure.
        """
        mean_measure_dict = (
            self._cube_dict.get("result", {}).get("measures", {}).get("mean")
        )
        if mean_measure_dict is None:
            return None
        return _MeanMeasure(self._cube_dict, self._all_dimensions)

    @lazyproperty
    def missing_count(self):
        """numeric representing count of missing rows in cube response."""
        if self.means:
            return self.means.missing_count
        return self._cube_dict["result"].get("missing", 0)

    @lazyproperty
    def population_fraction(self):
        """The filtered/unfiltered ratio for cube response.

        This value is required for properly calculating population on a cube
        where a filter has been applied. Returns 1.0 for an unfiltered cube.
        Returns `np.nan` if the unfiltered count is zero, which would
        otherwise result in a divide-by-zero error.
        """
        numerator = self._cube_dict["result"].get("filtered", {}).get("weighted_n")
        denominator = self._cube_dict["result"].get("unfiltered", {}).get("weighted_n")
        try:
            return numerator / denominator
        except ZeroDivisionError:
            return np.nan
        except Exception:
            return 1.0

    @lazyproperty
    def unweighted_counts(self):
        """_UnweightedCountMeasure object for this cube.

        This object provides access to unweighted counts for this cube,
        whether or not the cube contains weighted counts.
        """
        return _UnweightedCountMeasure(self._cube_dict, self._all_dimensions)

    @lazyproperty
    def unweighted_n(self):
        """int count of actual rows represented by query response."""
        return self._cube_dict["result"]["n"]

    @lazyproperty
    def weighted_counts(self):
        """_WeightedCountMeasure object for this cube.

        This object provides access to weighted counts for this cube, if
        available. If the cube response is not weighted, the
        _UnweightedCountMeasure object for this cube is returned.
        """
        if not self.is_weighted:
            return self.unweighted_counts
        return _WeightedCountMeasure(self._cube_dict, self._all_dimensions)

    @lazyproperty
    def weighted_n(self):
        """float count of returned rows adjusted for weighting."""
        if not self.is_weighted:
            return float(self.unweighted_n)
        return float(sum(self._cube_dict["result"]["measures"]["count"]["data"]))


class _BaseMeasure(object):
    """Base class for measure objects."""

    def __init__(self, cube_dict, all_dimensions):
        self._cube_dict = cube_dict
        self._all_dimensions = all_dimensions

    @lazyproperty
    def raw_cube_array(self):
        """Return read-only ndarray of measure values from cube-response.

        The shape of the ndarray mirrors the shape of the (raw) cube
        response. Specifically, it includes values for missing elements, any
        MR_CAT dimensions, and any prunable rows and columns.
        """
        array = np.array(self._flat_values).reshape(self._all_dimensions.shape)
        # ---must be read-only to avoid hard-to-find bugs---
        array.flags.writeable = False
        return array

    @lazyproperty
    def _flat_values(self):
        """Return tuple of mean values as found in cube response.

        This property must be implemented by each subclass.
        """
        raise NotImplementedError("must be implemented by each subclass")


class _MeanMeasure(_BaseMeasure):
    """Statistical mean values from a cube-response."""

    @lazyproperty
    def missing_count(self):
        """numeric representing count of missing rows reflected in response."""
        return self._cube_dict["result"]["measures"]["mean"].get("n_missing", 0)

    @lazyproperty
    def _flat_values(self):
        """Return tuple of mean values as found in cube response.

        Mean data may include missing items represented by a dict like
        {'?': -1} in the cube response. These are replaced by np.nan in the
        returned value.
        """
        return tuple(
            np.nan if type(x) is dict else x
            for x in self._cube_dict["result"]["measures"]["mean"]["data"]
        )


class _UnweightedCountMeasure(_BaseMeasure):
    """Unweighted counts for cube."""

    @lazyproperty
    def _flat_values(self):
        """tuple of int counts before weighting."""
        return tuple(self._cube_dict["result"]["counts"])


class _WeightedCountMeasure(_BaseMeasure):
    """Weighted counts for cube."""

    @lazyproperty
    def _flat_values(self):
        """tuple of numeric counts after weighting."""
        return tuple(self._cube_dict["result"]["measures"]["count"]["data"])
