Beta
Pandas
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter # for custom number format on x and y axis
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
# Data: Number of registered passenger cars in 2016
df = pd.read_csv('https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/panel_data.csv')
df.head()
df[df['brand'].str.lower().str.contains('audi|bmw|benz')]
# df[df['brand'].str.contains('(?i)audi|bmw|benz')] # Regex ignore case
df.info()
df.drop(['month'], axis=1).sum(numeric_only=True)
df.drop(['month'], axis=1).groupby('brand').sum().nlargest(5, 'total')
# df.drop(['month'], axis=1).groupby('brand').sum().nsmallest(5, 'total')
df.drop(['month'], axis=1).groupby('brand').sum().nlargest(5, 'total') \
.plot(kind='barh', subplots=True, layout=(2,4), figsize=(12,5), sharey=True)
plt.show()
DataFrame
df = pd.read_csv('https://raw.githubusercontent.com/prasertcbs/tutorial/master/mpg.csv')
df.head()
df['cty_kml'] = round(df['cty'] * .425143707, 1)
df['cty_hwy'] = round(df['cty'] * .425143707, 1)
df['trans_am'] = df['trans'].str[0]
df.head()
df.describe()
Series
sales = pd.Series([20, 12, 18, 10])
sales
sales.index