The Pandas pivot_table() is used to calculate, aggregate, and summarize your data. It is defined as a powerful tool that aggregates data with calculations such as Sum, Count, Average, Max, and Min.
It also allows the user to sort and filter your data when the pivot table has been created. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame.
import pandas as pd
import numpy as np
df = pd.DataFrame({"A": ["burger", "burger", "burger", "burger", "burger",
"fries", "fries", "fries", "fries"],
"B": ["one", "one", "one", "two", "two",
"one", "one", "two", "two"],
"C": ["small", "large", "large", "small",
"small", "large", "small", "small",
"large"],
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
"E": [2, 4, 5, 5, 6, 6, 8, 9, 9]})
df
A | B | C | D | E | |
---|---|---|---|---|---|
0 | burger | one | small | 1 | 2 |
1 | burger | one | large | 2 | 4 |
2 | burger | one | large | 2 | 5 |
3 | burger | two | small | 3 | 5 |
4 | burger | two | small | 3 | 6 |
5 | fries | one | large | 4 | 6 |
6 | fries | one | small | 5 | 8 |
7 | fries | two | small | 6 | 9 |
8 | fries | two | large | 7 | 9 |
table = pd.pivot_table(df, index=['A', 'B'])
table
D | E | ||
---|---|---|---|
A | B | ||
burger | one | 1.666667 | 3.666667 |
two | 3.000000 | 5.500000 | |
fries | one | 4.500000 | 7.000000 |
two | 6.500000 | 9.000000 |
table = pd.pivot_table(df, index=['A', 'B'], aggfunc=np.sum)
table
D | E | ||
---|---|---|---|
A | B | ||
burger | one | 5 | 11 |
two | 6 | 11 | |
fries | one | 9 | 14 |
two | 13 | 18 |
table = pd.pivot_table(df, values='D', index=['A', 'B'],
columns=['C'], aggfunc=np.sum)
table
C | large | small | |
---|---|---|---|
A | B | ||
burger | one | 4.0 | 1.0 |
two | NaN | 6.0 | |
fries | one | 4.0 | 5.0 |
two | 7.0 | 6.0 |
table = pd.pivot_table(df, values='D', index=['A', 'B'],
columns=['C'], aggfunc=np.sum, fill_value=0)
table
C | large | small | |
---|---|---|---|
A | B | ||
burger | one | 4 | 1 |
two | 0 | 6 | |
fries | one | 4 | 5 |
two | 7 | 6 |
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
aggfunc={'D': np.mean,
'E': np.mean})
table
D | E | ||
---|---|---|---|
A | C | ||
burger | large | 2.000000 | 4.500000 |
small | 2.333333 | 4.333333 | |
fries | large | 5.500000 | 7.500000 |
small | 5.500000 | 8.500000 |
table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'],
aggfunc={'D': np.mean,
'E': [min, max, np.mean]})
table
D | E | ||||
---|---|---|---|---|---|
mean | max | mean | min | ||
A | C | ||||
burger | large | 2.000000 | 5 | 4.500000 | 4 |
small | 2.333333 | 6 | 4.333333 | 2 | |
fries | large | 5.500000 | 9 | 7.500000 | 6 |
small | 5.500000 | 9 | 8.500000 | 8 |