#Multiple Regression (Multiple_regress.txt) #Y, the number of people employed; #X1, the index; #X2, the percentage price deflation; #X3, the GNP in millions of dollars; #X4, the number of unemployed in thousands; #X5, the number of people employed by the military; #X6, the number of people over 14; #X7, the year. ## Load the data set and prepare for use data <- read.table("Multiple_regress.txt") dimnames(data)[[2]] <- c("X1","X2","X3","X4","X5","X6","X7","Y") data <- as.data.frame(data) ## Fit a multiple linear regression with the data fit <- lm(Y~X2+X3+X4+X5+X6+X7, data=data) ## Model selection ##Forward selection step(lm(Y~1,data), scope = list(lower = ~1, upper = ~X2+X3+X4+X5+X6+X7), direction = "forward", test="F") ##Backward selection step(lm(Y~X2+X3+X4+X5+X6+X7,data), scope = list(lower = ~1, upper = ~X2+X3+X4+X5+X6+X7), direction = "backward", test="F") ##Stepwise selection step(lm(Y~X2+X3+X4+X5+X6+X7,data), scope = list(lower = ~1, upper = ~X2+X3+X4+X5+X6+X7), direction = "both", test="F") #################### library("leaps") x <- data[,c("X2","X3","X4","X5","X6","X7")] y <- data$Y ##Select the best model by the R2 criterion outs <- leaps(x,y,method="r2") plot(outs$size, outs$r2, log = "y", xlab = "p", ylab = expression(R2)) ##Chose the best model using the R2 criterion best.r2 <- outs$r2 == max(outs$r2) model.r2 <- outs$which[best.r2, ] ##Write the formula of the winner model formulae <- "Y ~ " cont <- 0 for(i in 2:(ncol(data)-1)){ if(model.r2[i-1]){ if(cont == 0){ formulae <- paste(formulae,names(data)[i]) cont <- 1 } else formulae <- paste(formulae,"+",names(data)[i]) } } ##Print the best model formula formulae ##Fit the best linear model out.best.r2 <- lm(formulae,data=data) summary(out.best.r2) ##Select the best model by the adjusted R2 criterion outs <- leaps(x,y,method="adjr2") plot(outs$size, outs$adjr2, log = "y", xlab = "p", ylab = expression(adjR2)) ##Chose the best model using the adjusted R2 criterion best.adjr2 <- outs$adjr2 == max(outs$adjr2) model.adjr2 <- outs$which[best.adjr2, ] ##Write the formula of the winner model formulae <- "Y ~ " cont <- 0 for(i in 2:(ncol(data)-1)){ if(model.adjr2[i-1]){ if(cont == 0){ formulae <- paste(formulae,names(data)[i]) cont <- 1 } else formulae <- paste(formulae,"+",names(data)[i]) } } ##Print the best model formula formulae ##Fit the best linear model out.best.adjr2 <- lm(formulae,data=data) summary(out.best.adjr2) fit <- lm(Y~X3+X4+X5+X7, data=data) ##Select the best model by the Cp criterion outs <- leaps(x,y,method="Cp") plot(outs$size, outs$Cp, log = "y", xlab = "p", ylab = expression(C[p])) lines(outs$size, outs$size) ##Chose the best model using the Cp criterion Cp <- outs$Cp-outs$size best.cp <- which(Cp < 0) Cp <- Cp[best.cp] Cp <- Cp[which(Cp == max(Cp))] best.cp <- which(outs$Cp-outs$size == Cp) model.cp <- outs$which[best.cp, ] ##Write the formula of the winner model formulae <- "Y ~ " cont <- 0 for(i in 2:(ncol(data)-1)){ if(model.cp[i-1]){ if(cont == 0){ formulae <- paste(formulae,names(data)[i]) cont <- 1 } else formulae <- paste(formulae,"+",names(data)[i]) } } ##Print the best model formula formulae ##Fit the best linear model out.best.cp <- lm(formulae,data=data) summary(out.best.cp)