Workspace
Stefan Stojkovic/

Introduction to Data Visualization with ggplot2

0
Beta
Spinner

Introduction to Data Visualization with ggplot2

Run the hidden code cell below to import the data used in this course.

Take Notes

Add notes about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Add your code snippets here
library("ggplot2")

# Change the command below so that cyl is treated as factor
ggplot(mtcars, aes(factor(cyl), mpg)) + 	
  geom_point() +
  # add regression line
  geom_smooth(method ="lm", se =FALSE) +
  # set the size of points in geom_point
  geom_point(alpha = 0.4)

# save the plot as plt_prop_unemployed_over_time
plt_prop_unemployed_over_time <- ggplot(economics, aes(date, unemploy/pop)) +
  geom_line()

# Position the legend at the bottom of the plot
plt_prop_unemployed_over_time +
  theme(legend.position = "bottom")

# Using the late_shipments dataset, draw a proportional stacked bar plot of vendor_inco_term with fill color by freight_cost_group.
ggplot(late_shipments, aes(vendor_inco_term, fill = freight_cost_group)) +
  geom_bar(position = "fill") +
  ylab("proportion")

# coord_flip()
# In ggplot2, the coord_flip() function is used to flip the x and y axes, effectively transposing the plot's orientation from horizontal to vertical or vice versa.

#################################################################################################
#### plot with two lines of two independent continuous variables
ggplot(houseprice, aes(x = size, y = price)) + 
       geom_point() +
       geom_line(aes(y = pred, color = modeltype)) + # the predictions
       scale_color_brewer(palette = "Dark2")   #### below is what preceded it

# houseprice is available
summary(houseprice)
# Create the formula for price as a function of squared size
(fmla_sqr <- price ~ I(size^2))
# Fit a model of price as a function of squared size (use fmla_sqr)
model_sqr <- lm(fmla_sqr, data = houseprice)
# Fit a model of price as a linear function of size
model_lin <- lm(price ~ size, data = houseprice)
# Make predictions and compare
houseprice %>% 
    mutate(pred_lin = predict(model_lin),       # predictions from linear model
           pred_sqr = predict(model_sqr)) %>%   # predictions from quadratic model 
    gather(key = modeltype, value = pred, pred_lin, pred_sqr)
##################################################################################################

#coord_fixed() is used to equalize x and y axis intervals on a ggplot


### plot regression with multiple (3) numeric IV; note cool color palette inferno
ggplot(taiwan_real_estate, aes(n_convenience, sqrt(dist_to_mrt_m), color = price_twd_msq)) + 
  # Make it a scatter plot
  geom_point() +
  # Use the continuous viridis plasma color scale
  scale_color_viridis_c(option = "inferno") # we can also put "plasma"