
The fastcpd (fast change point detection) is a fast implmentation of change point detection methods in R/Python.
# install.packages("pak")
pak::pak("doccstat/fastcpd")
# or install from CRAN
install.packages("fastcpd")# python -m ensurepip --upgrade
pip install .
# or install from PyPI
pip install fastcpd
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
result <- fastcpd::fastcpd.ar(x[3 + seq_len(n)], 3, r.progress = FALSE)
summary(result)
#>
#> Call:
#> fastcpd::fastcpd.ar(data = x[3 + seq_len(n)], order = 3, r.progress = FALSE)
#>
#> Change points:
#> 614
#>
#> Cost values:
#> 2754.116 2038.945
#>
#> Parameters:
#> segment 1 segment 2
#> 1 0.57120256 0.2371809
#> 2 -0.20985108 0.4031244
#> 3 0.08221978 0.2290323
plot(result)
import fastcpd.segmentation
from numpy import concatenate
from numpy.random import normal, multivariate_normal
covariance_mat = [[100, 0, 0], [0, 100, 0], [0, 0, 100]]
data = concatenate((multivariate_normal([0, 0, 0], covariance_mat, 300),
multivariate_normal([50, 50, 50], covariance_mat, 400),
multivariate_normal([2, 2, 2], covariance_mat, 300)))
fastcpd.segmentation.mean(data)
import fastcpd.variance_estimation
fastcpd.variance_estimation.mean(data)library(microbenchmark)
set.seed(1)
n <- 5 * 10^6
mean_data <- c(rnorm(n / 2, 0, 1), rnorm(n / 2, 50, 1))
ggplot2::autoplot(microbenchmark(
not = not::not(mean_data, contrast = "pcwsConstMean"),
changepoint = changepoint::cpt.mean(mean_data, method = "PELT"),
jointseg = jointseg::jointSeg(mean_data, K = 12),
fpop = fpop::Fpop(mean_data, 2 * log(n)),
mosum = mosum::mosum(c(mean_data), G = 40),
fastcpd = fastcpd::fastcpd.mean(mean_data, r.progress = FALSE, cp_only = TRUE, variance_estimation = 1),
times = 10
))
library(microbenchmark)
set.seed(1)
n <- 10^8
mean_data <- c(rnorm(n / 2, 0, 1), rnorm(n / 2, 50, 1))
system.time(fastcpd::fastcpd.mean(mean_data, r.progress = FALSE, cp_only = TRUE, variance_estimation = 1))
#> user system elapsed
#> 9.866 4.932 14.831
system.time(mosum::mosum(c(mean_data), G = 40))
#> user system elapsed
#> 9.229 6.694 15.936
system.time(fpop::Fpop(mean_data, 2 * log(n)))
#> user system elapsed
#> 44.848 2.346 47.220
system.time(changepoint::cpt.mean(mean_data, method = "PELT"))
#> user system elapsed
#> 468.840 956.814 1479.480
ggplot2::autoplot(microbenchmark(
changepoint = changepoint::cpt.mean(mean_data, method = "PELT"),
fpop = fpop::Fpop(mean_data, 2 * log(n)),
mosum = mosum::mosum(c(mean_data), G = 40),
fastcpd = fastcpd::fastcpd.mean(mean_data, r.progress = FALSE, cp_only = TRUE, variance_estimation = 1),
times = 10
))
The suggested packages are not required for the main functionality of the package. They are only required for the vignettes. If you want to learn more about the package comparison and other vignettes, you could either check out vignettes on CRAN or pkgdown generated documentation.
The package should be able to install on Mac and any Linux
distribution without any problems if all the dependencies are installed.
However, if you encountered problems related to gfortran, it might be
because RcppArmadillo is not installed previously. Try Mac OSX stackoverflow
solution or Linux
stackover solution if you have trouble installing
RcppArmadillo.
Fork the repo.
Create a new branch from main branch.
Make changes and commit them.
src/fastcpd_class_cost.cc with proper example and tests in
vignettes/gallery.Rmd and
tests/testthat/test-gallery.R.src/fastcpd_constants.h.R/fastcpd_wrappers.R for the new family of models and move
the examples to the new wrapper function as roxygen examples._pkgdown.yml.Push the changes to your fork.
Create a pull request.
Make sure the pull request does not create new warnings or errors
in devtools::check().
Python headers are required to install the Python package. If you are using Ubuntu, you can install the headers with:
sudo apt install python3-dev
Special thanks to clODE.