<Python> Data Exploratory (Demo)
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner

    Pandas

    import pandas as pd
    import numpy as np
    import seaborn as sns
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FuncFormatter # for custom number format on x and y axis
    %matplotlib inline
    %config InlineBackend.figure_format = 'retina'
    # Data: Number of registered passenger cars in 2016
    
    df = pd.read_csv('https://raw.githubusercontent.com/prasertcbs/basic-dataset/master/panel_data.csv')
    df.head()
    df[df['brand'].str.lower().str.contains('audi|bmw|benz')]
    # df[df['brand'].str.contains('(?i)audi|bmw|benz')] # Regex ignore case
    df.info()
    df.drop(['month'], axis=1).sum(numeric_only=True)
    df.drop(['month'], axis=1).groupby('brand').sum().nlargest(5, 'total')
    # df.drop(['month'], axis=1).groupby('brand').sum().nsmallest(5, 'total')
    df.drop(['month'], axis=1).groupby('brand').sum().nlargest(5, 'total') \
    	.plot(kind='barh', subplots=True, layout=(2,4), figsize=(12,5), sharey=True)
    plt.show()

    DataFrame

    df = pd.read_csv('https://raw.githubusercontent.com/prasertcbs/tutorial/master/mpg.csv')
    df.head()
    df['cty_kml'] = round(df['cty'] * .425143707, 1)
    df['cty_hwy'] = round(df['cty'] * .425143707, 1)
    df['trans_am'] = df['trans'].str[0]
    df.head()
    df.describe()

    Series

    sales = pd.Series([20, 12, 18, 10])
    sales
    sales.index