#Multiple Regression (Multiple_regress.txt) #Y, the number of people employed; #X1, the index; #X2, the percentage price deflation; #X3, the GNP in millions of dollars; #X4, the number of unemployed in thousands; #X5, the number of people employed by the military; #X6, the number of people over 14; #X7, the year. ## Define the directory where the data set is setwd("C:/UConn/242_3115/R/") ## Load the data set and prepare for use data <- read.table("Multiple_regress.txt") dimnames(data)[[2]] <- c("X1","X2","X3","X4","X5","X6","X7","Y") data <- as.data.frame(data) ## Fit a multiple linear regression with the data fit <- lm(Y~X2+X3+X4+X5+X6+X7, data=data) ## Print the summary of the output of fit summary(fit) ##For VIF library("car") vif(fit) ####################################### ## Solutions ####################################### ##Center the covariates to remove collinearity data2 <- sapply(data[,c("X2","X3","X4","X5","X6","X7")],scale) data2 <- as.data.frame(cbind(Y=data$Y,data2)) ## Fit a multiple linear regression with the data fit2 <- lm(Y~X2+X3+X4+X5+X6+X7, data=data2) ## Print the summary of the output of fit summary(fit2) ##For VIF vif(fit2) ##If no improvement delete the non-significant variables in data or data2 analysis ##Delete non-significant variables to reduce VIF's fit2 <- lm(Y~X4+X5+X7, data=data2) ## Print the summary of the output of fit summary(fit2) ##VIF vif(fit2) ####################################### ## For the Condition Index calculation ####################################### library("perturb") ##Generate the covariates data set cov.data <- data[,-c(1,ncol(data))] ## Use the colldiag function to get the Condition Index ## My guess is that if add.intercept=TRUE (control's for intercetp) it doesn't otherwise colldiag(cov.data, scale = FALSE, center = FALSE, add.intercept = FALSE)