Beta
Table of Contents
The outline of your notebook will show up here. You can include headings in any text cell by starting a line with #
, ##
, ###
, etc., depending on the desired title hierarchy.
Note that this notebook was automatically generated from an RDocumentation page. It depends on the package and the example code whether this code will run without errors. You may need to edit the code to make things work.
if(!require('openintro')) {
install.packages('openintro')
library('openintro')
}
e <- email
# ______ Variables For Logistic Regression ______#
# Variables are modified to match
# OpenIntro Statistics, Second Edition
# As Is (7): spam, to_multiple, winner, format,
# re_subj, exclaim_subj
# Omitted (6): from, sent_email, time, image,
# viagra, urgent_subj, number
# Become Indicators (5): cc, attach, dollar,
# inherit, password
e$cc <- ifelse(email$cc > 0, 1, 0)
e$attach <- ifelse(email$attach > 0, 1, 0)
e$dollar <- ifelse(email$dollar > 0, 1, 0)
e$inherit <- ifelse(email$inherit > 0, 1, 0)
e$password <- ifelse(email$password > 0, 1, 0)
# Transform (3): num_char, line_breaks, exclaim_mess
# e$num_char <- cut(email$num_char, c(0,1,5,10,20,1000))
# e$line_breaks <- cut(email$line_breaks, c(0,10,100,500,10000))
# e$exclaim_mess <- cut(email$exclaim_mess, c(-1,0,1,5,10000))
g <- glm(spam ~ to_multiple + winner + format +
re_subj + exclaim_subj +
cc + attach + dollar +
inherit + password, # +
# num_char + line_breaks + exclaim_mess,
data = e, family = binomial
)
summary(g)
# ______ Variable Selection Via AIC ______#
g. <- step(g)
plot(predict(g., type = "response"), e$spam)
# ______ Splitting num_char by html ______#
x <- log(email$num_char)
bw <- 0.004
R <- range(x) + c(-1, 1)
wt <- sum(email$format == 1) / nrow(email)
htmlAll <- density(x, bw = 0.4, from = R[1], to = R[2])
htmlNo <- density(x[email$format != 1],
bw = 0.4,
from = R[1], to = R[2]
)
htmlYes <- density(x[email$format == 1],
bw = 0.4,
from = R[1], to = R[2]
)
htmlNo$y <- htmlNo$y #* (1-wt)
htmlYes$y <- htmlYes$y #* wt + htmlNo$y
plot(htmlAll, xlim = c(-4, 6), ylim = c(0, 0.4))
lines(htmlNo, col = 4)
lines(htmlYes, lwd = 2, col = 2)