############################################################################################ ## Here are the contraceptive use data from page 46 of the lecture notes (and from the Stata handout), ## showing the distribution of 1607 currently married and fecund women interviewed in the Fiji ## Fertility Survey, according to age, education, desire for more children and current use of contraception. ############################################################################################ ############################################################################################ ## The dataset is also available in the format used in the Stata handout. This version has 32 ## rows corresponding to all possible covariate and response patterns, and includes a weight ## indicating the frequency of each combination. The file has 5 columns with numeric codes: ## ## age (four groups, 1=<25, 2=25-29, 3=30-39 and 4=40-49), ## education (0=none, 1=some), ## desire for more children (0=more, 1=no more), ## contraceptive use (0=no, 1=yes), and ## frequency (number of cases in this category). ############################################################################################ cuse <- read.table("http://data.princeton.edu/wws509/datasets/cuse.dat", header=TRUE) cuse$noMore <- cuse$wantsMore == "no" cuse$hiEduc <- cuse$education == "high" lrfit <- glm( cbind(using,notUsing) ~ age + hiEduc + noMore, family=binomial, data = cuse) #lrfit <- glm( cbind(using,notUsing) ~ age * noMore + hiEduc , family=binomial, data = cuse) lrfit 1-pchisq(deviance(lrfit),lrfit$df.residual) residuals(lrfit,type="pearson") 1-pchisq(sum(residuals(lrfit,type="pearson")^2),lrfit$df.residu) ## X <- model.matrix(~age*noMore+hiEduc,data=cuse) X <- model.matrix(lrfit) #w <-(cuse$notUsing+cuse$using)*fitted(lrfit)*(1-fitted(lrfit)) w = lrfit$weights W <- diag(w) H <- sqrt(W) %*% X %*% solve(t(X) %*% W %*% X) %*% t(X) %*% sqrt(W) h <- diag(H) h ##Residuos rd <- residuals(lrfit,type="deviance") fi <- deviance(lrfit)/(nrow(cuse)-ncol(X)) td <- rd/sqrt(fi*(1-h)) td rp <- residuals(lrfit,type="pearson") ts <- rp/sqrt(fi *(1-h)) ts ##Cook's LD <- h * ts^2/(1-h) LD ##Influencia B <- diag(rp) %*% H %*% diag(rp) Cmax <- eigen(B)$val[1] amax <- abs(eigen(B)$vec[,1]) plot(1:nrow(cuse),amax) ## j-esima observbacao Cj <- 2*h*rp plot(1:nrow(cuse),Cj/sum(Cj)) abline(h=2*abs(mean(Cj))) abline(h=-2*abs(mean(Cj))) ## Age i = 5 Xi <- as.matrix(X[,i]) v <- sqrt(W) %*% Xi - sqrt(W) %*% X[,-i] %*% solve(t(X[,-i]) %*% W %*% X[,-i]) %*% t(X[,-i]) %*% W %*% Xi amax <- abs(v*rp/sqrt(Cmax)) plot(1:nrow(cuse),amax)