#Multiple Regression (Multiple_regress.txt) #Y, the number of people employed; #X1, the index; #X2, the percentage price deflation; #X3, the GNP in millions of dollars; #X4, the number of unemployed in thousands; #X5, the number of people employed by the military; #X6, the number of people over 14; #X7, the year. ## Define the directory where the data set is setwd("/run/media/marcos/OS/UFMG/Disciplinas/mlg_mestrado/slides/analise de residuos/R/") ## Load the data set and prepare for use data <- read.table("Multiple_regress.txt") dimnames(data)[[2]] <- c("X1","X2","X3","X4","X5","X6","X7","Y") data <- as.data.frame(data) ## Fit a multiple linear regression with the data fit <- lm(Y~X2+X3+X4+X5+X6+X7, data=data) X <- model.matrix(fit) ## Print the summary of the output of fit summary(fit) # CIs for model parameters confint(fit, level=0.95) ## Plot Q-Q normal plot of the residuals qqnorm(residuals(fit)) ## Plot the residual x predicted value to validate the hypothesis plot(x=predict(fit),y=residuals(fit)) ## Perform the normal Shapiro-Wilk test for the residuals shapiro.test(residuals(fit)) # Other useful functions coefficients(fit) # model coefficients confint(fit, level=0.95) # CIs for model parameters fitted(fit) # predicted values residuals(fit) # residuals residuals(fit)/sqrt(sum(residuals(fit)^2)/(length(residuals(fit))-ncol(X)) # standadized residuals rstandard(fit) # internaly studentized residuals rstudent(fit) # externaly studentized residuals anova(fit) # anova table vcov(fit) # covariance matrix for model parameters influence(fit) # regression diagnostics influence.measures(fit) # Table with the main influence measures hatvalues(fit) # for the hat matrix diagonals covratio(fit) dffits(fit) #DFFITS dfbetas(fit) #DFBETAS cooks.distance(fit) #Cook's Distance ##For SAS anova table source("SASAnova.R") SASanova(fit) ##For VIF library("car") vif(fit) ## Perform breush-pagan test for hetereocedascity library(lmtest) bptest(fit) ## Perform white test for hetereocedascity library(bstats) white.test(fit)