rm(list = ls(all=TRUE)) # clean the environment setwd('C:/Users/qjhws/Desktop/Classes/512TA/hw1') # change the working directory data <- read.table("card.txt") names(data)[26] <- "wage" names(data)[4] <- "educ" names(data)[29] <- "exper" names(data)[24] <- "south" names(data)[22] <- "black" names(data)[2] <- "nearc2" names(data)[3] <- "nearc4" names(data)[6] <- "fatheduc" names(data)[7] <- "motheduc" attach(data) # The data is attached to the R search path. # so we can work directly on variables exper_2 = exper^2 # part a-------------------------------------------------------------------------------------------- x <- cbind(rep(1,nrow(data)),educ, exper, exper_2,south,black ) y <- log(wage) # OLS Estimation bols <- solve((t(x)%*%x),(t(x)%*%y)) #ols estimators n <- nrow(x) k <- ncol(x) e <- y-x%*%bols # residual s2 <- (t(e)%*%e)/(n-k) vb <- s2[1,1]*solve(t(x)%*%x) se <- sqrt(diag(vb)) tval=bols/se tt <- data.frame(col1=bols, col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black") print(tt) # part b-------------------------------------------------------------------------------------------- # define the log likelihood function ll <-function(theta,y,x){ n<-nrow(x) k<-ncol(x) beta<-theta[1:k] sigma2<-theta[k+1] e<-y-x%*%beta logl<- -.5*n*log(2*pi)-.5*n*log(sigma2)- ((t(e)%*%e)/(2*sigma2)) return(-logl) } x <- cbind(rep(1,nrow(data)),educ, exper, exper_2,south,black ) y <- log(wage) result <-optim(c(1,1,1,1,1,1,1),ll,method="BFGS",hessian=T,y=y,x=x) # The output result is a collection of the following objects: # result$par vector of parameter estimates # result$value value of the ll at convergence # result$counts number of function evaluations # result$convergence "0"= successful convergence, "1"= maximum number of iterations reached # result$hessian matrix se <- sqrt(diag(solve(result$hessian))) tval=result$par/se tt <- data.frame(col1=result$par, col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black","sigma") print(tt) # part c-------------------------------------------------------------------------------------------- # first stage, regress endogenous variable on IV and all other exogenous variables x1 <- educ x <- cbind(rep(1,nrow(data)), educ, exper, exper_2,south,black) iv <- cbind(rep(1,nrow(data)), nearc4,exper, exper_2,south,black) firstOLS <- solve((t(iv)%*%iv),(t(iv)%*%x1)) fitted <- iv%*%firstOLS # second stage, regress wage on fitted and all other exogenous variables x2 <- cbind(rep(1,nrow(data)),fitted, exper, exper_2,south,black ) y <- log(wage) bols <- solve((t(x2)%*%x2),(t(x2)%*%y)) #ols estimators n <- nrow(x) k <- ncol(x) e <- y-x%*%bols # residual s2 <- (t(e)%*%e)/(n-k) vb <- s2[1,1]*solve((t(x)%*%iv)%*%solve(t(iv)%*%iv)%*%(t(iv)%*%x)) se <- sqrt(diag(vb)) tval=bols/se tt <- data.frame(col1=bols, col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black") print(tt) # part d-------------------------------------------------------------------------------------------- # first stage, regress endogenous variable on IV and all other exogenous variables x1 <- educ x <- cbind(rep(1,nrow(data)), educ, exper, exper_2,south,black) iv <- cbind(rep(1,nrow(data)),nearc2, nearc4, fatheduc, motheduc, exper, exper_2,south,black) firstOLS <- solve((t(iv)%*%iv),(t(iv)%*%x1)) fitted <- iv%*%firstOLS # second stage, regress wage on fitted and all other exogenous variables x2 <- cbind(rep(1,nrow(data)),fitted, exper, exper_2,south,black ) y <- log(wage) bols <- solve((t(x2)%*%x2),(t(x2)%*%y)) #ols estimators n <- nrow(x) k <- ncol(x) e <- y-x%*%bols # residual s2 <- (t(e)%*%e)/(n-k) vb <- s2[1,1]*solve((t(x)%*%iv)%*%solve(t(iv)%*%iv)%*%(t(iv)%*%x)) se <- sqrt(diag(vb)) tval=bols/se tt <- data.frame(col1=bols, col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black") print(tt) # part e-------------------------------------------------------------------------------------------- # define the GMM function # Exactly identified mygmm <- function(Y, X, IV){ bgmm <- solve((t(IV)%*%X),(t(IV)%*%Y)) } x <- cbind(rep(1,nrow(data)),educ, exper, exper_2, south, black) y <- log(wage) iv <- cbind(rep(1,nrow(data)),nearc4, exper, exper_2, south, black) n <- nrow(x) k <- ncol(x) beta <- mygmm(y,x,iv) e <- y-x%*%mygmm(y,x,iv) # residual var_mom <- matrix(((t(e)%*%e)/n) , nrow = ncol(iv), ncol = ncol(iv)) * ( t(iv) %*% iv) var_beta <- solve ( (t(x) %*% iv) %*% solve(var_mom) %*% (t(iv) %*% x) ) se <- sqrt(diag(var_beta)) tval=beta/se tt <- data.frame(col1=beta, col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black") print(tt) # part e, more than one IV mygmm <- function(Y, X, IV){ first_stage <- solve((t(X)%*%IV)%*%(t(IV)%*%X))%*%((t(X)%*%IV)%*%(t(IV)%*%Y)) n <- nrow(X) k <- ncol(X) e <- y-x%*%first_stage # residual var_mom <- matrix(((t(e)%*%e)/n) , nrow = ncol(IV), ncol = ncol(IV)) * ( t(IV) %*% IV) eff_gmm <- solve( (t(X)%*%IV ) %*% solve(var_mom) %*% ( t(IV)%*%X) ) %*% (t(X)%*%IV) %*% solve(var_mom) %*% (t(IV)%*%Y) return(list(eff_gmm,var_mom)) } x <- cbind(rep(1,nrow(data)),educ, exper, exper_2, south, black) y <- log(wage) iv <- cbind(rep(1,nrow(data)),nearc2, nearc4, fatheduc, motheduc, exper, exper_2,south,black) result <- mygmm(y,x,iv) var_beta <- solve ( (t(x) %*% iv) %*% solve(result[[2]]) %*% (t(iv) %*% x) ) se <- sqrt(diag(var_beta)) tval=result[[1]]/se tt <- data.frame(col1=result[[1]], col2=se, col3=tval) colnames(tt) <- c("estimate","s.e.","t") rownames(tt) <- c("constant", "educ", "exper","exper_2","south","black") print(tt)