Eduardo Bastos de Moraes
‌
‌
‌
‌
‌
‌
‌
‌
‌
‌
‌
‌
‌
‌
Sign up
Beta
Spinner

User Retention by Cohort

This template helps visualize user retention as the percentage of users in an acquisition cohort who are still using the product after several elapsed time periods. Retention can be visualized in two different ways:

  1. Annotated Heatmap: An annotated heatmap of retention by cohort is useful to visualize and compare rates across cohorts and time periods.

  2. Line Plot: A line plot of retention grouped by cohort is useful to visualize the dropoff in users over time periods.

# Load packages
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff

1. Load your data

Each row in the data aggregates the number of users by cohort and segment who were active in a time period. (user-activity.csv)

# Upload your data as CSV and load as a data frame
df = pd.read_csv('data/user-activity.csv', parse_dates=["cohort_date", "period_date"])
df.head()

2. Compute Retention

The retention rate is computed as the percentage of users in a cohort (or cohort-segment) who stayed active over time.

# Compute Retention
def compute_retention(df):
    df_all = (
      df
        .groupby(['cohort_date', 'period_date'])
        .agg('sum')
        .reset_index()
    )
    df_all['period_index'] = (df_all['period_date'] - df_all['cohort_date']) / np.timedelta64(1, 'W')
    df_all['nb_users_total'] = df_all.groupby(['cohort_date'])['nb_users'].transform(max)
    df_all['pct_users'] = df_all['nb_users'] / df_all['nb_users_total']
    df_all.drop(columns = ['nb_users_total'], inplace=True)
    df_all = df_all[['cohort_date', 'period_date', 'period_index', 'nb_users', 'pct_users']]
    return df_all

df_retention = compute_retention(df)
df_retention.head()

3. Visualize retention as heatmap

Each row in the heatmap represents a cohort and visualizes the percentage of users retained over time.

# Plot cohort retention heatmap
def plot_cohorts_heatmap(df, nb_periods=15):
  df = df.query('period_index > 0 & period_index <= @nb_periods')
  df_wide = (df
    .pivot(index="cohort_date", columns='period_index', values='pct_users')
    .sort_values(by=['cohort_date'], ascending=False)
    .fillna(0)
  )
  fig = ff.create_annotated_heatmap(
      z = df_wide.values, 
      annotation_text = df_wide.applymap(lambda x: '{:.1%}'.format(x) if x > 0 else '').values.tolist(),
      y = df_wide.index.strftime('%Y - W%W').values.tolist(),
      x = df_wide.columns.tolist(),
      colorscale='viridis_r',
  )
  fig.update_layout(
      width=900, 
      height=700, 
      xaxis={"title": "# Periods Elapsed"},
      title="User Retention by Cohort: Heatmap")
  return fig

fig = plot_cohorts_heatmap(df_retention)
fig.show(config={"displayModeBar": False})

Visualize retention as line plot

Each line represents a cohort and visualizes the dropoff in number of users over time.

# Plot cohort retention lines
def plot_cohort_lines(df, nb_periods=15):
  df['cohort_date'] = df['cohort_date'].astype(str)
  fig = px.line(
    df.query('period_index > 0 & period_index < @nb_periods'), 
    x='period_index', 
    y='pct_users', 
    line_group='cohort_date', 
    color_discrete_sequence=["lightslategray"]
  )
  fig.update_layout(
    xaxis = {"title": "# Periods Elapsed"},
    yaxis = {"title": "% Users Retained"},
    title="User Retention by Cohorts: Line Plot"
  )
  return fig


fig_lines = plot_cohort_lines(df_retention)
fig_lines.show(config = {"displayModeBar": False})
  • AI Chat
  • Code