Neural Network IV with Simulated Data
Some simulated data, borrowed from this post.
# library for generation multivariate distributions
library(MASS)
# always use the same random numbers
set.seed(123)
# the means and errors for the multivariate distribution
MUs <- c(10,15)
SIGMAs <- matrix(c(1, 0.5,
0.5, 2 ),
nrow=2,
ncol=2 )
# the multivariate distribution
mdist <- mvrnorm(n = 1000,
mu = MUs,
Sigma = SIGMAs)
# create unobserved covariate
c <- mdist[ , 2]
# create the instrumental variable
z <- rnorm(1000)
# create observed variable
x <- mdist[ , 1] + z
# constuct the dependent variable
y <- 1 + x + c + rnorm(1000, 0, 0.5)
Check if the variables behave as expected
cor(x, c)
## [1] 0.1986307
cor(z, c)
## [1] -0.0120011
Let’s look at the true model.
lm(y ~ x + c)
##
## Call:
## lm(formula = y ~ x + c)
##
## Coefficients:
## (Intercept) x c
## 0.9079 1.0156 0.9955
Estimate using OLS.
lm(y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 13.787 1.226
Now using instrumental variables.
library(AER)
ivreg(y ~ x | z)
##
## Call:
## ivreg(formula = y ~ x | z)
##
## Coefficients:
## (Intercept) x
## 15.949 1.008
Now using the lm
function.
# first stage
lms1 <- lm(x ~ z)
# manually obtain fitted values
lmXhat <- lms1$coefficients[2]*z + lms1$coefficients[1]
# estimate second stage using Xhat
(lms2 <- lm(y ~ lmXhat) )
##
## Call:
## lm(formula = y ~ lmXhat)
##
## Coefficients:
## (Intercept) lmXhat
## 15.949 1.008
Now we can do the same using a neural network
library(nnet)
# first stage
nns1 <- nnet(x ~ z, size=0, skip=TRUE, linout=TRUE)
## # weights: 2
## initial value 100832.781903
## final value 924.804075
## converged
# manually obtain fitted values
nnXhat <- nns1$fitted.values
# estimate second stage using Xhat
nns2 <- nnet(y ~ nnXhat, size=0, skip=TRUE, linout=TRUE)
## # weights: 2
## initial value 528901.038261
## final value 4019.409973
## converged
summary(nns2)
## a 1-0-1 network with 2 weights
## options were - skip-layer connections linear output units
## b->o i1->o
## 15.95 1.01
Compare output.
lms2$coefficients - nns2$wts
## (Intercept) lmXhat
## -1.749729e-10 -2.814797e-09
Compare estimates.
library(ggplot2)
qplot(lms2$fitted.values - nns2$fitted.values)
Now redo using a non-linearity