Invert SARIMA models to AR
SARIMA
Seasonal Autoregressive Integrated Moving Average, SARIMA or Seasonal ARIMA, is an extension of ARIMA that explicitly supports univariate time series data with a seasonal component.
It adds three new hyperparameters to specify the autoregression (AR), differencing (I) and moving average (MA) for the seasonal component of the series, as well as an additional parameter for the period of the seasonality. The notation for a SARIMA model is specified as ARIMA
An ARIMA
where
Invert SARIMA to ARMA
In this part, we invert SARIAM model to ARMA model.
For autoregression, we have
, for , for .
Then, the AR polynomial of the inverted ARMA model can be written as
For moving average, we have
. for , for .
Then, the MA polynomial of the inverted ARMA model can be written as
By this way, we can get the ARMA
Three model representations for an ARMA model
In this part, we discuss three model representations for a stationary ARMA
- Using the back-shift operator.
This representation is compact and useful in parameter estimation. It is also useful in computing recursively multistep-ahead forecasts of
AR representation.
MA representation.
For the AR and MA representations, we use long division of two polynomials. Given two polynomials
AR representation
Using the result of long division in Eq (
MA representation
Again, using the result of long division in Eq (1), an ARMA(u,v) model can also be written as
Function pi_coefficients
In the function pi_coefficients( ) in the R package gratis, the steps of inverting SARIMA model to AR representation are:
Step 1. Invert SARIMA model to ARMA model.
ARIMA
ARMA(
Step 2. Invert ARMA model to AR model.
Given two polynomials
So, we can obtain the AR representation of ARMA(
step 3. Obtain the AR coefficients.
From the recursion process presented above, we get the value
Test of pi_coefficents
library(M4comp2018)
library(forecast)
library(polynom)
data(M4)
hourly_M4 <- Filter(function(l) l$period == "Hourly", M4)
n_ts <- length(hourly_M4)
nlength <- sapply(hourly_M4, function(lentry){
n <- lentry$n
n
})
ts <- hourly_M4[[300]]
forecast_fun <- function(ts, coef.AR){
# ts: a list contains of x, xx, n, h, period
x <- ts$x
h <- ts$h
tspx <- tsp(x)
coef <- coef.AR
p <- ifelse("intercept" %in% names(coef), length(coef)-1, length(coef))
c <- ifelse("intercept" %in% names(coef), coef[p+1]*(1-sum(coef[1:p])), 0)
fits <- (c + quantmod::Lag(as.vector(x), k = 1:p) %*% coef[1:p]) %>%
as.vector() %>%
ts(frequency = tspx[3], start = tspx[1])
res <- x - fits
y <- x
for (i in 1:h){
predvalue <- c + tail(quantmod::Lag(as.vector(y), k = 0:(p-1)), 1) %*%
coef[1:p]
y_extend <- c(y, predvalue) %>%
ts(frequency = tspx[3], start = tspx[1])
y <- y_extend
}
pred <- tail(y, h)
return(
list(
x = ts$x, n = ts$n, xx= ts$xx, h = ts$h,
period = ts$period, mean = pred,
fitted = fits, residuals = res
)
)
}
mase.fun <- function(x, xx, pred){
freq <- frequency(x)
scaling <- mean(abs(diff(as.vector(x), freq)))
mase <- abs(xx - pred) / scaling
mase
}
pi_coefficients <- function(ar = 0, d = 0L, ma = 0, sar = 0, D = 0L, sma = 0, m = 1L, tol = 1e-10) {
# non-seasonal AR
ar <- polynomial(c(1, -ar)) * polynomial(c(1, -1))^d
# seasonal AR
if (m > 1) {
P <- length(sar)
seasonal_poly <- numeric(m * P)
seasonal_poly[m * seq(P)] <- sar
sar <- polynomial(c(1, -seasonal_poly)) *
polynomial(c(1, rep(0, m - 1), -1))^D
}
else {
sar <- 1
}
# non-seasonal MA
ma <- polynomial(c(1, ma))
# seasonal MA
if (m > 1) {
Q <- length(sma)
seasonal_poly <- numeric(m * Q)
seasonal_poly[m * seq(Q)] <- sma
sma <- polynomial(c(1, seasonal_poly))
}
else {
sma <- 1
}
n <- 500L
theta <- -c(coef(ma * sma))[-1]
if (length(theta) == 0L) {
theta <- 0
}
phi <- -c(coef(ar * sar)[-1], numeric(n))
q <- length(theta)
pie <- c(numeric(q), 1, numeric(n))
for (j in seq(n))
pie[j + q + 1L] <- -phi[j] + sum(theta * pie[(q:1L) + j])
pie <- pie[(0L:n) + q + 1L]
# Return up to last element greater than tol
maxj <- max(which(abs(pie) > tol))
pie <- head(pie, maxj)
return(-pie[-1])
}
### 1. ARMA model (include mean)
# ARMA
p <- 2
q <- 2
fit_arma <- Arima(ts$x, order = c(p, 0, q), seasonal = c(0, 0, 0),
method = "ML")
forec_arma <- forecast(fit_arma, h = ts$h)
mase_arma <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_arma$mean)
mean(mase_arma)
# invert ARMA to AR
frequency <- frequency(ts$x)
phi <- fit_arma$coef[1:p]
theta <- fit_arma$coef[(p+1):(p+q)]
ar <- pi_coefficients(ar = phi, d = 0L, ma = theta, sar = 0, D = 0L,
sma = 0, m = frequency)
ar <- 'names<-' (c(ar, tail(fit_arma$coef, 1)),
c(paste("ar", sep = "", seq(length(ar))), "intercept"))
forec_ar <- forecast_fun(ts, coef.AR = ar)
mase_ar <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_ar$mean)
mean(mase_ar)
all.equal(forec_arma$mean, forec_ar$mean)
### 2. ARMA model (no mean)
# ARMA
p <- 2
q <- 2
fit_arma <- Arima(ts$x, order = c(p, 0, q), seasonal = c(0, 0, 0),
method = "ML", include.constant = FALSE)
forec_arma <- forecast(fit_arma, h = ts$h)
mase_arma <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_arma$mean)
mean(mase_arma)
# invert ARMA to AR
frequency <- frequency(ts$x)
phi <- fit_arma$coef[1:p]
theta <- fit_arma$coef[(p+1):(p+q)]
ar <- pi_coefficients(ar = phi, d = 0L, ma = theta, sar = 0, D = 0L,
sma = 0, m = frequency)
ar <- 'names<-' (ar, paste("ar", sep = "", seq(length(ar))))
forec_ar <- forecast_fun(ts, coef.AR = ar)
mase_ar <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_ar$mean)
mean(mase_ar)
all.equal(forec_arma$mean, forec_ar$mean)
### 3. ARIMA model (d=1, no mean and no drift)
# ARIMA
p <- 2
q <- 1
d <- 1
fit_arima <- Arima(ts$x, order = c(p, d, q), seasonal = c(0, 0, 0),
include.drift = FALSE, method = "ML")
forec_arima <- forecast(fit_arima, h = ts$h)
mase_arima <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_arima$mean)
mean(mase_arima)
# invert ARIMA to AR
frequency <- frequency(ts$x)
phi <- fit_arima$coef[1:p]
theta <- fit_arima$coef[(p+1):(p+q)]
ar <- pi_coefficients(ar = phi, d = d, ma = theta, sar = 0, D = 0L,
sma = 0, m = frequency)
ar <- 'names<-' (ar, paste("ar", sep = "", seq(length(ar))))
forec_ar <- forecast_fun(ts, coef.AR = ar)
mase_ar <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_ar$mean)
mean(mase_ar)
all.equal(forec_arima$mean, forec_ar$mean)
### 4. SARIMA model (d=D=0, include mean)
# SARIMA
p <- 2
q <- 1
d <- 0
P <- 1
Q <- 1
D <- 0
fit_sarima <- Arima(ts$x, order = c(p, d, q), seasonal = c(P, D, Q),
method = "ML")
forec_sarima <- forecast(fit_sarima, h = ts$h)
mase_sarima <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_sarima$mean)
mean(mase_sarima)
# invert SARIMA to AR
phi <- fit_sarima$coef[1:p]
theta <- fit_sarima$coef[(p+1):(p+q)]
Phi <- fit_sarima$coef[(p+q+1):(p+q+P)]
Theta <- fit_sarima$coef[(p+q+P+1):(p+q+P+Q)]
ar <- pi_coefficients(ar = phi, d = d, ma = theta, sar = Phi, D = D,
sma = Theta, m = frequency)
ar <- 'names<-' (c(ar, tail(fit_sarima$coef, 1)), c(paste("ar", sep = "", seq(length(ar))), "intercept"))
forec_ar <- forecast_fun(ts, coef.AR = ar)
mase_ar <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_ar$mean)
mean(mase_ar)
all.equal(forec_sarima$mean, forec_ar$mean)
### 5. SARIMA model (d+D>0, no mean and no drift)
# SARIMA
p <- 2
q <- 1
d <- 0
P <- 1
Q <- 1
D <- 1
fit_sarima <- Arima(ts$x, order = c(p, d, q), seasonal = c(P, D, Q), method = "ML")
forec_sarima <- forecast(fit_sarima, h = ts$h)
mase_sarima <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_sarima$mean)
mean(mase_sarima)
# invert SARIMA to AR
phi <- fit_sarima$coef[1:p]
theta <- fit_sarima$coef[(p+1):(p+q)]
Phi <- fit_sarima$coef[(p+q+1):(p+q+P)]
Theta <- fit_sarima$coef[(p+q+P+1):(p+q+P+Q)]
ar <- pi_coefficients(ar = phi, d = d, ma = theta, sar = Phi, D = D,
sma = Theta, m = 24)
ar <- 'names<-' (ar, paste("ar", sep = "", seq(length(ar))))
forec_ar <- forecast_fun(ts, coef.AR = ar)
mase_ar <- mase.fun(x = ts$x, xx = ts$xx, pred = forec_ar$mean)
mean(mase_ar)
all.equal(forec_sarima$mean, forec_ar$mean)