I was given two datasets with ride-sharing data for a fictional service, similar to data used by the famous ride-sharing app, Uber. I was asked to interpret the data, and exactly reproduce a bubble plot and three pie charts.
Here, I used Pandas in a Jupyter Notebook. The requirements of this task required me to use MatPlotLib. One quirk about MatPlotLib is using plt.show() precludes plt.savefig(). Since I used plt.show() first, the image didn't save properly (to the file location I specified), and resulted in the strange output beneath the outputs of each pie chart. I decided to keep that in as a reminder. By contrast, the bubble plot saved correctly.
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# File to Load (Remember to change these)
city_data_to_load = "data/city_data.csv"
ride_data_to_load = "data/ride_data.csv"
# Read the City and Ride Data
dfa = pd.read_csv(city_data_to_load)
dfb = pd.read_csv(ride_data_to_load)
dfc = pd.merge(dfb, dfa, how='left')
dfc.head()
dfc_grouped = dfc.groupby(['type','city'])
ride_counts = dfc_grouped['ride_id'].count()
just_urban_ride_counts = ride_counts.loc['Urban']
just_suburban_ride_counts = ride_counts.loc['Suburban']
just_rural_ride_counts = ride_counts.loc['Rural']
average_fares = dfc_grouped['fare'].mean()
just_urban_fares = average_fares.loc['Urban']
just_suburban_fares = average_fares.loc['Suburban']
just_rural_fares = average_fares.loc['Rural']
drivers_counts = dfc_grouped['driver_count'].sum() / dfc_grouped['driver_count'].count()
just_urban_drivers_counts = drivers_counts.loc['Urban']
just_suburban_drivers_counts = drivers_counts.loc['Suburban']
just_rural_drivers_counts = drivers_counts.loc['Rural']
plt.scatter(just_urban_ride_counts, just_urban_fares, s=just_urban_drivers_counts * 10, c = 'orange', alpha = 0.8, edgecolors = 'black')
plt.scatter(just_suburban_ride_counts, just_suburban_fares, s=just_suburban_drivers_counts * 10, c = 'blue', alpha = 0.8, edgecolors = 'black')
plt.scatter(just_rural_ride_counts, just_rural_fares, s=just_rural_drivers_counts * 10, c = 'gold', alpha = 0.8, edgecolors = 'black')
plt.legend(loc='upper right', labels = ['Urban', 'Suburban', 'Rural'], title = 'City Types')
plt.title('Pyber Ride Sharing Data (2016)')
plt.ylabel('Average Fare ($)')
plt.xlabel('Total Number of Rides (Per City)')
plt.grid(True)
plt.savefig("../Images/Ride.png")
total_fares = dfc_grouped['fare'].sum()
just_urban_fares_sum = total_fares.loc['Urban'].sum()
just_suburban_fares_sum = total_fares.loc['Suburban'].sum()
just_rural_fares_sum = total_fares.loc['Rural'].sum()
fares_sum_list = [just_urban_fares_sum, just_rural_fares_sum, just_suburban_fares_sum]
explode1 = [0.1, 0, 0]
labels = ['Urban', 'Rural', 'Suburban']
colors = ['#f28383', '#f6cf05', '#84cbfb']
plt.axis('equal')
plt.pie(fares_sum_list, explode = explode1, labels = labels, colors = colors, autopct="%1.1f%%", shadow = True, startangle = 290)
plt.title('% of Total Fares by City Type')
plt.show()
plt.savefig("../Images/FaresSumPie.png")
just_urban_ride_counts_sum = ride_counts.loc['Urban'].sum()
just_suburban_ride_counts_sum = ride_counts.loc['Suburban'].sum()
just_rural_ride_counts_sum = ride_counts.loc['Rural'].sum()
sum_counts_list = [just_urban_ride_counts_sum, just_rural_ride_counts_sum, just_suburban_ride_counts_sum]
explode1 = [0.1, 0, 0]
labels = ['Urban', 'Rural', 'Suburban']
colors = ['#f28383', '#f6cf05', '#84cbfb']
plt.axis('equal')
plt.pie(sum_counts_list, explode = explode1, labels = labels, colors = colors, autopct="%1.1f%%", shadow = True, startangle = 260)
plt.title('% of Total Rides by City Type')
plt.show()
plt.savefig("../Images/RideCountPie.png")
just_urban_drivers_counts_sum = drivers_counts.loc['Urban'].sum()
just_suburban_drivers_counts_sum = drivers_counts.loc['Suburban'].sum()
just_rural_drivers_counts_sum = drivers_counts.loc['Rural'].sum()
drivers_sum_list = [just_urban_drivers_counts_sum, just_rural_drivers_counts_sum, just_suburban_drivers_counts_sum]
explode1 = [0.1, 0, 0]
labels = ['Urban', 'Rural', 'Suburban']
colors = ['#f28383', '#f6cf05', '#84cbfb']
plt.axis('equal')
plt.pie(drivers_sum_list, explode = explode1, labels = labels, colors = colors, autopct="%1.1f%%", shadow = True, startangle = 235)
plt.title('% of Total Drivers by City Type')
plt.show()
plt.savefig("../Images/DriverCountPie.png")