--- title: "Exploration during development" output: rmarkdown::html_vignette description: | Exploration of different methods during package development. vignette: > %\VignetteIndexEntry{Exploration during development} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE) library(fastcpd) ``` # Exploration of the QMLE method for ARMA models ```{r qmle} qmle <- function(data, theta, p = 1, q = 1) { if (nrow(data) < max(p, q) + 1) { return(0) } variance_term <- rep(0, nrow(data)) for (i in (max(p, q) + 1):nrow(data)) { variance_term[i] <- data[i] - theta[1:p] %*% data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)] } (log(2 * pi) + log(theta[p + q + 1])) * (nrow(data) - q) / 2 + sum(variance_term^2) / (2 * theta[p + q + 1]) } qmle_gradient <- function(data, theta, p = 1, q = 1) { if (nrow(data) < max(p, q) + 1) { return(rep(1, length(theta))) } variance_term <- rep(0, nrow(data)) for (i in (max(p, q) + 1):nrow(data)) { variance_term[i] <- data[i] - theta[1:p] %*% data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)] } phi_coefficient <- matrix(0, nrow(data), p) psi_coefficient <- matrix(0, nrow(data), q) for (i in (max(p, q) + 1):nrow(data)) { phi_coefficient[i, ] <- -data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ] } for (i in (q + 1):nrow(data)) { psi_coefficient[i, ] <- -variance_term[(i - 1):(i - q)] - theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ] } c( phi_coefficient[nrow(data), ] * variance_term[nrow(data)] / theta[p + q + 1], psi_coefficient[nrow(data), ] * variance_term[nrow(data)] / theta[p + q + 1], 1 / 2 / theta[p + q + 1] - variance_term[nrow(data)]^2 / (2 * theta[p + q + 1]^2) ) } qmle_gradient_sum <- function(data, theta, p = 1, q = 1) { if (nrow(data) < max(p, q) + 1) { return(rep(1, length(theta))) } variance_term <- rep(0, nrow(data)) for (i in (max(p, q) + 1):nrow(data)) { variance_term[i] <- data[i] - theta[1:p] %*% data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)] } phi_coefficient <- matrix(0, nrow(data), p) psi_coefficient <- matrix(0, nrow(data), q) for (i in (max(p, q) + 1):nrow(data)) { phi_coefficient[i, ] <- -data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ] } for (i in (q + 1):nrow(data)) { psi_coefficient[i, ] <- -variance_term[(i - 1):(i - q)] - theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ] } c( crossprod(phi_coefficient, variance_term) / theta[p + q + 1], crossprod(psi_coefficient, variance_term) / theta[p + q + 1], (nrow(data) - q) / 2 / theta[p + q + 1] - crossprod(variance_term) / 2 / theta[p + q + 1]^2 ) } qmle_hessian <- function(data, theta, p = 1, q = 1) { if (nrow(data) < max(p, q) + 1) { return(diag(length(theta))) } variance_term <- rep(0, nrow(data)) for (i in (max(p, q) + 1):nrow(data)) { variance_term[i] <- data[i] - theta[1:p] %*% data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)] } phi_coefficient <- matrix(0, nrow(data), p) psi_coefficient <- matrix(0, nrow(data), q) for (i in (max(p, q) + 1):nrow(data)) { phi_coefficient[i, ] <- -data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ] } for (i in (q + 1):nrow(data)) { psi_coefficient[i, ] <- -variance_term[(i - 1):(i - q)] - theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ] } phi_psi_coefficient <- array(0, c(q, p, nrow(data))) psi_psi_coefficient <- array(0, c(q, q, nrow(data))) for (i in (q + 1):nrow(data)) { phi_psi_coefficient[, , i] <- -phi_coefficient[(i - 1):(i - q), ] - rowSums( sweep( phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE], 3, theta[(p + 1):(p + q)], `*` ), dims = 2 ) psi_psi_coefficient[, , i] <- -psi_coefficient[(i - 1):(i - q), ] - t(psi_coefficient[(i - 1):(i - q), ]) - rowSums( sweep( psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE], 3, theta[(p + 1):(p + q)], `*` ), dims = 2 ) } hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1) hessian[1:p, 1:p] <- crossprod(phi_coefficient[nrow(data), , drop = FALSE]) / theta[p + q + 1] hessian[1:p, (p + 1):(p + q)] <- ( t(phi_psi_coefficient[, , nrow(data)]) * variance_term[nrow(data)] + crossprod( phi_coefficient[nrow(data), , drop = FALSE], psi_coefficient[nrow(data), , drop = FALSE] ) ) / theta[p + q + 1] hessian[(p + 1):(p + q), 1:p] <- t(hessian[1:p, (p + 1):(p + q)]) hessian[1:p, p + q + 1] <- -t(phi_coefficient[nrow(data), ]) * variance_term[nrow(data)] / theta[p + q + 1]^2 hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1]) hessian[(p + 1):(p + q), (p + 1):(p + q)] <- ( crossprod(psi_coefficient[nrow(data), , drop = FALSE]) + psi_psi_coefficient[, , nrow(data)] * variance_term[nrow(data)] ) / theta[p + q + 1] hessian[(p + 1):(p + q), p + q + 1] <- -t(psi_coefficient[nrow(data), ]) * variance_term[nrow(data)] / theta[p + q + 1]^2 hessian[p + q + 1, (p + 1):(p + q)] <- t(hessian[(p + 1):(p + q), p + q + 1]) hessian[p + q + 1, p + q + 1] <- variance_term[nrow(data)]^2 / theta[p + q + 1]^3 - 1 / 2 / theta[p + q + 1]^2 hessian } qmle_hessian_sum <- function(data, theta, p = 1, q = 1) { if (nrow(data) < max(p, q) + 1) { return(diag(length(theta))) } variance_term <- rep(0, nrow(data)) for (i in (max(p, q) + 1):nrow(data)) { variance_term[i] <- data[i] - theta[1:p] %*% data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)] } phi_coefficient <- matrix(0, nrow(data), p) psi_coefficient <- matrix(0, nrow(data), q) for (i in (max(p, q) + 1):nrow(data)) { phi_coefficient[i, ] <- -data[(i - 1):(i - p)] - theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ] } for (i in (q + 1):nrow(data)) { psi_coefficient[i, ] <- -variance_term[(i - 1):(i - q)] - theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ] } phi_psi_coefficient <- array(0, c(q, p, nrow(data))) psi_psi_coefficient <- array(0, c(q, q, nrow(data))) for (i in (q + 1):nrow(data)) { phi_psi_coefficient[, , i] <- -phi_coefficient[(i - 1):(i - q), ] - rowSums( sweep( phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE], 3, theta[(p + 1):(p + q)], `*` ), dims = 2 ) psi_psi_coefficient[, , i] <- -psi_coefficient[(i - 1):(i - q), ] - t(psi_coefficient[(i - 1):(i - q), ]) - rowSums( sweep( psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE], 3, theta[(p + 1):(p + q)], `*` ), dims = 2 ) } hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1) hessian[1:p, 1:p] <- crossprod(phi_coefficient) / theta[p + q + 1] hessian[(p + 1):(p + q), 1:p] <- ( rowSums( sweep( phi_psi_coefficient, 3, variance_term, `*` ), dims = 2 ) + crossprod( psi_coefficient, phi_coefficient ) ) / theta[p + q + 1] hessian[1:p, (p + 1):(p + q)] <- t(hessian[(p + 1):(p + q), 1:p]) hessian[1:p, p + q + 1] <- -crossprod(phi_coefficient, variance_term) / theta[p + q + 1]^2 hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1]) hessian[(p + 1):(p + q), (p + 1):(p + q)] <- ( crossprod(psi_coefficient) + rowSums( sweep( psi_psi_coefficient, 3, variance_term, `*` ), dims = 2 ) ) / theta[p + q + 1] hessian[(p + 1):(p + q), p + q + 1] <- -crossprod(psi_coefficient, variance_term) / theta[p + q + 1]^2 hessian[p + q + 1, (p + 1):(p + q)] <- t(hessian[(p + 1):(p + q), p + q + 1]) hessian[p + q + 1, p + q + 1] <- crossprod(variance_term) / theta[p + q + 1]^3 - (nrow(data) - q) / 2 / theta[p + q + 1]^2 hessian } # fastcpd arma 1 1 set.seed(1) n <- 600 w <- rnorm(n + 1, 0, 1) x <- rep(0, n + 1) for (i in 1:300) { x[i + 1] <- 0.1 * x[i] + w[i + 1] + 0.1 * w[i] } for (i in 301:n) { x[i + 1] <- 0.3 * x[i] + w[i + 1] + 0.4 * w[i] } result <- fastcpd( formula = ~ . - 1, data = data.frame(x = x[1 + seq_len(n)]), trim = 0, p = 1 + 1 + 1, beta = (1 + 1 + 1 + 1) * log(n) / 2, cost = qmle, cost_gradient = qmle_gradient, cost_hessian = qmle_hessian, cp_only = TRUE, lower = c(rep(-1, 1 + 1), 1e-10), upper = c(rep(1, 1 + 1), Inf), line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9) ) # fastcpd arma 3 2 set.seed(1) n <- 600 w <- rnorm(n + 2, 0, 1) x <- rep(0, n + 3) for (i in 1:300) { x[i + 3] <- 0.1 * x[i + 2] - 0.2 * x[i + 1] + 0.6 * x[i] + w[i + 2] + 0.1 * w[i + 1] + 0.5 * w[i] } for (i in 301:n) { x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + w[i + 2] + 0.4 * w[i + 1] + 0.1 * w[i] } result <- fastcpd( formula = ~ . - 1, data = data.frame(x = x[3 + seq_len(n)]), trim = 0, p = 3 + 2 + 1, beta = (3 + 2 + 1 + 1) * log(n) / 2, cost = function(data, theta) { qmle(data, theta, 3, 2) }, cost_gradient = function(data, theta) { qmle_gradient(data, theta, 3, 2) }, cost_hessian = function(data, theta) { qmle_hessian(data, theta, 3, 2) }, cp_only = TRUE, lower = c(rep(-1, 3 + 2), 1e-10), upper = c(rep(1, 3 + 2), Inf), line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9) ) testthat::expect_equal(result@cp_set, c(4, 22, 290)) # hessian check theta_estimate <- rep(0.1, 3 + 2 + 1) testthat::expect_equal( numDeriv::hessian( qmle, theta_estimate, data = matrix(x[3 + seq_len(n)]), p = 3, q = 2 ), qmle_hessian_sum(matrix(x[3 + seq_len(n)]), theta_estimate, 3, 2) ) optim( rep(0.1, 3 + 2 + 1), fn = function(data, theta) { qmle(data, theta, 3, 2) }, data = data, method = "L-BFGS-B", lower = c(rep(-1, 3 + 2), 1e-10), upper = c(rep(1, 3 + 2), Inf), gr = function(data, theta) { qmle_gradient_sum(data, theta, 3, 2) } ) # convergence check x <- arima.sim(list(ar = c(0.1, -0.2, 0.6), ma = c(0.1, 0.5)), n = n + 3) theta_estimate <- rep(0.1, 3 + 2 + 1) data <- matrix(x[3 + seq_len(n)]) qmle_path <- NULL prev_qmle <- 1 curr_qmle <- Inf epochs_num <- 0 while (abs(curr_qmle - prev_qmle) > 1e-5) { prev_qmle <- curr_qmle hessian <- Matrix::nearPD(qmle_hessian_sum(data, theta_estimate, 3, 2))$mat step <- solve( hessian, qmle_gradient_sum(data, theta_estimate, 3, 2) ) # line search lr_choices <- c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9) lr <- lr_choices[which.min( sapply(lr_choices, function(lr) { qmle(data, pmin( pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)), c(rep(1, 3 + 2), Inf) ), 3, 2) }) )] theta_estimate <- pmin( pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)), c(rep(1, 3 + 2), Inf) ) curr_qmle <- qmle(data, theta_estimate, 3, 2) cat(epochs_num, curr_qmle, theta_estimate, "\n") qmle_path <- c(qmle_path, curr_qmle) epochs_num <- epochs_num + 1 } ``` # Notes The evaluation of this vignette is set to be `FALSE`. # Appendix: all code snippets ```{r ref.label = knitr::all_labels(), echo = TRUE} ```