Create pivot table in Pandas


Import modules

import pandas as pd
 

Create some dummy data

raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'],
                      'age': [20, 19, 22, 21],
                      'favorite_color': ['blue', 'blue', 'yellow', "green"],
                      'grade': [88, 92, 95, 70]}
df = pd.DataFrame(raw_data)
df
age favorite_color grade name
0 20 blue 88 Willard Morris
1 19 blue 92 Al Jennings
2 22 yellow 95 Omar Mullins
3 21 green 70 Spencer McDaniel

#strip dataframe to contain just name, favorite_color
df = df[['name', 'favorite_color']]

#pivot the df (note that reset_index can be removed if we want to hold pivot levels. In most cases I reset the index)
df_pivot = df.groupby(['favorite_color']).count().reset_index()

#rename 'name' column to n-count of colors
df_pivot.columns = ['favorite_color', 'name']

df_pivot.head()
favorite_color name
0 blue 2
1 green 1
2 yellow 1

#you can pull mean, other aggregate functions by replacing 'count' with 'mean', etc
#you can select multiple levels for your pivot in the groupby function, e.g. df.groupby(['X', 'Y', 'Z']).count()



Copyright © Erik Rood 2020 | Twitter