LAB_7_ANALYTICS_CALIFICADO
  • AI Chat
  • Code
  • Report
  • Beta
    Spinner

    LAB 7: PARTE CALIFICADA

    MARKO ANDRE TALLEDO HERRERA (20162271)

    # Leemos las librerias necesarias para el estudio
    library(tidyverse)
    library(dplyr)
    library(ggplot2)
    library(stats)
    library(e1071)
    # Vemos si existen nulos dentro del dataset
    colSums(is.na(led_data))
    dim(led_data)
    # es al rededor del 18% de filas con datos nulos
    # Omitimos los datos nulos
    led_data = na.omit(led_data)
    # Categorizamos y discretizamos variables para crear variables dummies
    install.packages('countrycode')
    library(countrycode)
    led_data = led_data %>%
      mutate(Continent = countrycode(Country, 'country.name', 'continent'))
    
    led_data['discret_year'] = ifelse(led_data['Year'] >= 2010, "2010-2015",ifelse(led_data['Year'] >= 2005,"2005-2010",ifelse(led_data['Year'] >= 2000,"200-2005","")))
    
    
    # Dummificamos las columnas
    install.packages('fastDummies')
    library(fastDummies)
    led_data =  dummy_cols(led_data, select_columns = 'Continent')
    led_data =  dummy_cols(led_data, select_columns = 'discret_year')
    led_data =  dummy_cols(led_data, select_columns = 'Status')
    led_data =  led_data[, -which(names(led_data) %in% c("Country", "Year","Status",'Continent','discret_year','Status_Developing','Continent_NA'))]
    head(led_data)
    # PCA DEL DATASET
    prcomp(led_data,scale. = TRUE)
    summary(prcomp(led_data),scale.=TRUE)
    
    
    biplot(prcomp(led_data), scale.=TRUE)
    #Sample
    x<- sample(1:nrow(led_data),1154)
    
    led_train = led_data[x, ]
    
    led_test = led_data[-x, ]
    
    #Regresion
    
    fit=lm(data=led_train ,Lifeexpectancy ~ .)
    fit
    
    #Resumen
    summary(fit)
    
    #Graficas de diagnostico
    plot(fit)
    
    
    
    
    #Prediccion
    pred=predict(fit, newdata = led_test)
    style=c(rep(1,12), rep(2,4))