#Multiple Regression (Multiple_regress.txt)

#Y,   the number of people employed;
#X1,  the index;
#X2,  the percentage price deflation;
#X3,  the GNP in millions of dollars;
#X4,  the number of unemployed in thousands;
#X5,  the number of people employed by the military;
#X6,  the number of people over 14;
#X7,  the year.

## Define the directory where the data set is
setwd("C:/UConn/242_3115/R/")

## Load the data set and prepare for use
data <- read.table("Multiple_regress.txt")
dimnames(data)[[2]] <- c("X1","X2","X3","X4","X5","X6","X7","Y")
data <- as.data.frame(data)

## Fit a multiple linear regression with the data
fit <- lm(Y~X2+X3+X4+X5+X6+X7, data=data)

## Print the summary of the output of fit
summary(fit)

##For VIF
library("car")
vif(fit)

#######################################
## Solutions
#######################################

##Center the covariates to remove collinearity
data2 <- sapply(data[,c("X2","X3","X4","X5","X6","X7")],scale)
data2 <- as.data.frame(cbind(Y=data$Y,data2))

## Fit a multiple linear regression with the data
fit2 <- lm(Y~X2+X3+X4+X5+X6+X7, data=data2)

## Print the summary of the output of fit
summary(fit2)

##For VIF
vif(fit2)

##If no improvement delete the non-significant variables in data or data2 analysis

##Delete non-significant variables to reduce VIF's
fit2 <- lm(Y~X4+X5+X7, data=data2)

## Print the summary of the output of fit
summary(fit2)

##VIF
vif(fit2)

#######################################
## For the Condition Index calculation
#######################################
library("perturb")

##Generate the covariates data set
cov.data <- data[,-c(1,ncol(data))]

## Use the colldiag function to get the Condition Index
## My guess is that if  add.intercept=TRUE (control's for intercetp) it doesn't otherwise
colldiag(cov.data, scale = FALSE, center = FALSE, add.intercept = FALSE)