####################################### # Stochastic processes ####################################### # Multiple realizations of a single underlying process plot(arima.sim(list(ar=0.8), 200)) lines(arima.sim(list(ar=0.8), 200), col='red') lines(arima.sim(list(ar=0.8), 200), col='blue') lines(arima.sim(list(ar=0.8), 200), col='green') # White noise w <- rnorm(100) plot.ts(w) # Random walk x.rw <- cumsum(w) plot.ts(x.rw) # Moving average x.ma <- rep(0, 100) x.ma[1] <- w[1] for (i in 2:100) { x.ma[i] <- 0.7 * w[i - 1] + w[i] } plot.ts(x.ma) # Autoregressive x.ar <- rep(0, 100) for (i in 2:100) { x.ar[i] <- 0.7 * x.ar[i - 1] + w[i] } plot.ts(x.ar) # Plot all four together par(mfrow=c(2,2)) plot.ts(w, main='White noise') plot.ts(x.rw, main='Random walk') plot.ts(x.ma, main='Moving Average') plot.ts(x.ar, main='Autoregressive') ############## # Stationarity ############## # x.ar from above is stationary. But if we change the # AR coefficient from 0.7 to 1.1, it isn't... x.ar.2 <- rep(0, 100) for (i in 2:100) { x.ar.2[i] <- 1.02 * x.ar.2[i - 1] + w[i] } par(mfrow=c(1,2)) plot.ts(x.ar, main=expression(phi==0.7~(stationary))) # In this case, the expected value of x depends on the time plot.ts(x.ar.2, main=expression(phi==1.01~(not~stationary))) par(mfrow=c(1,2)) plot.ts(x.ma) # In this case, the variance of x depends on t plot.ts(x.ma * 1:100) # A real dataset, with an obvious trend t <- time(co2) co2 <- as.numeric(co2) par(mfrow=c(1,1)) plot(t, co2, ty='l', main=expression('Mauna Loa CO'[2])) ############ # Detrending ############ plot(t, co2, ty='l', main=expression('Mauna Loa CO'[2])) # Estimate a linear trend trend.linear <- lm(co2 ~ t) abline(trend.linear, col='red') # Pretty decent...still, doesn't look totally appropriate....it # doesn't catch the subtle dip in the middle of the series. Let's # try a local regression (LOESS) trend.loess <- loess(co2 ~ t) lines(trend.loess$x, trend.loess$fit, col='blue') # That's better. co2.dt <- co2 - trend.loess$fit plot(t, co2.dt, ty='l') # Can also remove trend by differencing plot.ts(diff(co2)) ################# # Autocorrelation ################# par(mfrow=c(2,1)) plot(t, co2.dt, ty='l') acf(co2.dt, lag.max=40) # Look at another dataset (Annual flow of the Nile River) par(mfrow=c(2,1)) plot(Nile) acf(Nile, lag.max=40) # Compare ACF and PACF acf(Nile, lag.max=40) pacf(Nile, lag.max=40) ################ # Fitting models ################ # Use a made-up dataset, so we know the parameters. # Simulate a realization from an AR(4) process x <- arima.sim(model=list(ar=c(0.6, 0.3, -0.2, 0.2)), n=1000) par(mfrow=c(1,1)) plot(x) par(mfrow=c(2,1)) acf(x, lag.max=40) pacf(x, lag.max=40) # ACF trails off, PACF cuts off after lag 4, as expected. # Fit an AR(4) model (mod.ar.4 <- arima(x, order=c(4, 0, 0))) par(mfrow=c(1, 1)) acf(mod.ar.4$resid, lag.max=40) # Try some other AR(p) models, and plot the AIC as a function of p AICs <- rep(0, 10) for (p in 1:10) { AICs[p] <- arima(x, c(p, 0, 0))$aic } plot(AICs) # Simulate another model: This time, it's an ARMA(2, 3) with a trend x.1 <- arima.sim(list(ar=c(0.6), ma=c(0.6, 0.25, 0.1)), 1000) x.1 <- x.1 + (1:1000 / 75) par(mfrow=c(2,1)) plot(x.1) plot(diff(x.1)) acf(diff(x.1), lag=40) pacf(diff(x.1), lag=40) # Both kind of dribble off...but we know there's one difference in # the model, making it an ARIMA(p, 1, q). (mod.010 <- arima(x.1, c(0, 1, 0))) (mod.110 <- arima(x.1, c(1, 1, 0))) (mod.011 <- arima(x.1, c(0, 1, 1))) (mod.111 <- arima(x.1, c(1, 1, 1))) (mod.112 <- arima(x.1, c(1, 1, 2))) (mod.212 <- arima(x.1, c(2, 1, 2))) (mod.113 <- arima(x.1, c(1, 1, 3))) # This is the best (mod.213 <- arima(x.1, c(2, 1, 3))) (mod.312 <- arima(x.1, c(3, 1, 2))) # Look at the autocorrelation of the residuals acf(mod.113$resid, lag.max=40) pacf(mod.113$resid, lag.max=40) # Try to predict the future plot(900:1000, x.1[900:1000], ty='l', xlim=c(900, 1050)) forecast <- predict(mod.113, 50) points(forecast$pred, pch=3, col='red') lines(forecast$pred + forecast$se, lty=3, col='red') lines(forecast$pred - forecast$se, lty=3, col='red') #################### # Fourier Transforms #################### plot(sunspots) s <- as.numeric(sunspots) N <- length(s) s.ft <- fft(s) plot(s.ft, ty='l') # That looks kinda weird--note that it is plotted on the complex # plane. The Fourier transform is a complex-valued function # (it has both real and imaginary parts). Plot the absolute value: plot(abs(s.ft), ty='l') # And with a log-scale on the y-axis. plot(abs(s.ft), ty='l', log='y') # The Fourier transform is reversible plot.ts(s) lines(Re(fft(s.ft, inverse=TRUE) / N), lty=3, col='red') ###################################### # Spectral density and the periodogram ###################################### # Power Spectral Density pg.sun <- spectrum(sunspots) # Usually plotted on log-log axes. Like this, area under # the curve is proportional to variance. plot(pg.sun$freq, pg.sun$spec, ty='l', log='xy', main='Sunspot Power Spectrum', xlab='Frequency (1/year)', ylab='Spectrum') peak <- locator() # Click on the highest peak on the periodogram peak$x^-1 # The sunspot cycle has a period of about 11 years