Some groupby methods have an axis
argument in addition to the axis
argument of DataFrame.groupby
. When these two axis arguments do not agree (e.g. df.groupby(..., axis=0).skew(axis=1)
), it provides the same operation that can be achieved more efficiently without using groupby. More explicitly,
df.groupby(..., axis=1).op(axis=0)
will operate column by column, the same as DataFrame.op(axis=0)
df.groupby(..., axis=0).op(axis=1)
will operate row by row, the same as DataFrame.op(axis=1)
There is some reshaping that is necessary to get exact equality (see the code below) because groupby shapes the indices in a specific way.
In addition to this, various groupby ops either fail or effectively ignore argument (it is used in the code, but in such a way that it does not influence the result).
I propose we deprecate the axis
argument across groupby ops. For those ops where it is necessary, the future behavior will have the op's axis
value the same as that provided to groupby. In other words, all ops either act as df.groupby(..., axis=0).op(axis=0)
or df.groupby(..., axis=1).op(axis=1)
.
Code:
import inspect
import pandas._testing as tm
from pandas.core.groupby.base import reduction_kernels, groupby_other_methods, transformation_kernels
from pandas.tests.groupby import get_groupby_method_args
size = 10
df = pd.DataFrame(
{
'a': np.random.randint(0, 3, size),
'b': np.random.randint(0, 10, size),
'c': np.random.randint(0, 10, size),
'd': np.random.randint(0, 10, size),
}
)
gb = df.groupby('a', axis=0)
kernels = reduction_kernels | groupby_other_methods | transformation_kernels
for kernel in kernels:
if kernel in ('plot',):
continue
try:
op = getattr(gb, kernel)
if not callable(op):
continue
arguments = inspect.signature(op).parameters.keys()
if 'axis' not in arguments:
continue
args = get_groupby_method_args(kernel, df)
result = op(*args, axis=1)
if kernel in ('skew', 'idxmax', 'idxmin', 'fillna', 'take'):
expected = getattr(df[['b', 'c', 'd']], kernel)(*args, axis=1)
else:
expected = getattr(df, kernel)(*args, axis=1)
except Exception as err:
print('Failed:', kernel, err)
else:
if kernel in ('skew', 'idxmax', 'idxmin'):
expected = expected.to_frame('values')
if kernel in ('diff', 'skew', 'idxmax', 'idxmin', 'take'):
expected = expected.set_index(df['a'], append=True)
expected = expected.swaplevel(0, 1).sort_index()
if kernel in ('skew', 'idxmax', 'idxmin'):
expected = expected['values'].rename(None)
try:
tm.assert_equal(result, expected)
except Exception as err:
print(kernel)
import inspect
import pandas._testing as tm
from pandas.core.groupby.base import reduction_kernels, groupby_other_methods, transformation_kernels
from pandas.tests.groupby import get_groupby_method_args
size = 10
df = pd.DataFrame(
{
'a': np.random.randint(0, 3, size),
'b': np.random.randint(0, 10, size),
'c': np.random.randint(0, 10, size),
'd': np.random.randint(0, 10, size),
}
)
gb = df.groupby([0, 0, 0, 1], axis=1)
kernels = reduction_kernels | groupby_other_methods | transformation_kernels
for kernel in kernels:
if kernel in ('plot',):
continue
if kernel in ('rank', 'cummax', 'cummin', 'cumprod', 'cumsum', 'shift', 'diff', 'pct_change'):
# Already ignores the axis argument
continue
if kernel in ('idxmin', 'idxmax', 'skew'):
# Raises when using .groupby(..., axis=1).op(axis=0)
continue
try:
op = getattr(gb, kernel)
if not callable(op):
continue
arguments = inspect.signature(op).parameters.keys()
if 'axis' not in arguments:
continue
args = get_groupby_method_args(kernel, df)
result = op(*args, axis=0)
expected = getattr(df, kernel)(*args, axis=0)
except Exception as err:
print('Failed:', kernel, err)
else:
try:
tm.assert_equal(result, expected)
except Exception as err:
print(kernel)