import pandas as pd
raw_data = {'name': ['Willard Morris', 'Al Jennings', 'Omar Mullins', 'Spencer McDaniel'], 'age': [20, 19, 22, 21], 'favorite_color': ['blue', 'blue', 'yellow', "green"], 'grade': [88, 92, 95, 70]} df = pd.DataFrame(raw_data) df
#strip dataframe to contain just name, favorite_color df = df[['name', 'favorite_color']] #pivot the df (note that reset_index can be removed if we want to hold pivot levels. In most cases I reset the index) df_pivot = df.groupby(['favorite_color']).count().reset_index() #rename 'name' column to n-count of colors df_pivot.columns = ['favorite_color', 'name'] df_pivot.head()
#you can pull mean, other aggregate functions by replacing 'count' with 'mean', etc #you can select multiple levels for your pivot in the groupby function, e.g. df.groupby(['X', 'Y', 'Z']).count()