Workspace
Felix Friedrich/

Breast cancer classification using limma (Bioconductor)

0
Beta
Spinner
knitr::opts_chunk$set(fig.align="center", out.width="80%", echo = TRUE)

Preparations

Installing packages

install.packages("tidyverse", quietly=T)
install.packages("gplots", quietly=T)
install.packages("readxl", quietly=T)
install.packages("gplots", quietly=T)
install.packages("BiocManager", quietly=T)
BiocManager::install("limma", quietly=T, force=TRUE)
BiocManager::install("BiocGenerics", quietly=T, force=TRUE)
BiocManager::install("Biobase", quietly=T, force=TRUE)

Loading packages

library("tidyverse", quietly=T)
library("parallel", quietly=T)
library("limma", quietly=T)
library("readxl", quietly=T)
library("BiocGenerics", quietly=T)
library("gplots", quietly=T)
library("Biobase", quietly=T)

Get data

url_file <- "https://github.com/friedrfe/breast_cancer_protein_abundance/raw/main/20211212_SubtypeExample.xlsx"
download.file(url_file, "file.xlsx")
e <- read_xlsx("file.xlsx")

Exploratory Data Analysis

head(e)
glimpse(e)
summary(e)
sapply(e, function(x) sum(is.na(x)))
df <- e %>% rename(Gene=`Gene name for protein`)
names(df)

Filter for the estrogen, progesteron, and HER2 receptor proteins

recept <- grepl("ESR1|PGR|ERBB2", df$Gene)
subset(df,recept)
rcpt <- filter(df,Gene %in% c("ESR1", "PGR", "ERBB2"))
rcpt

Limma

Preparing data for ExpressionSet class creation

samples <- df %>% select(-Gene, -accession_number) # extract trial replicates for phenoData matrix design
df$Gene <- make.unique(df$Gene, sep=".") # if Gene names uses as row indices they must be unique
length(unique(df$Gene))