library(timeSeries) library(mosum) # library(readr) ### only for reproducing Figure 3 #### Required functions #### 'Function to simulate data with changes in mean' data_generation <- function(cpts,ev,n,seed_error){ # cpts - vector of change point locations # ev - vector of expected values of the different segments # n - sample size of time series # seed - seed for generating random series # append change point vector by 0 and n cpts_ex <- c(0,cpts,n) # initialize signal series signalts <- c(rep(NA,n)) # generate signal series for (i in 1:(length(cpts)+1)){ signalts[(cpts_ex[i]+1):cpts_ex[i+1]] <- c(rep(ev[i],(cpts_ex[i+1]-cpts_ex[i]))) } #noisy series set.seed(seed_error) error <-rnorm(n=n,mean=0,sd=1) X <- error+signalts Xts <- timeSeries(X) # return relevant results structure( list(Xts = Xts, #time series signalts = signalts #signal of time series )) } 'Estimating function of the classical median: sigmoid function hmed=sgn(x)' hmed <- function(x){ if (x==0) return(0) else{ if (x<0) return(-1) else return(1) } } # vectorize function hmed vhmed <- Vectorize(hmed) 'Estimating function of the median-like estimator: arcus tangens function hml=atan(x)/pi' hml <- function(x,t){2*atan(x-t)/pi} # vectorize function hml vhml <- Vectorize(hml,vectorize.args = "x") #### Reproducing Figure 1: Plots of the classical MOSUM procedure #### # Determine parameters of the simulation # vector of change points cpts <- c(100,250,700) # expected values of the segments ev <- c(0,2,1,3) # sample size n <- 1000 # parameters of MOSUM procedure # bandwidth G <- 60 # significance level alpha <- 0.05 # minimal interval length epsilon <- 0.2 # seed for generating random series seed_error <- 3841 # Simulation of the data # generate time series data <- data_generation(cpts,ev,n,seed_error) Xts <- data$Xts signalts <- data$signalts # append 0 and n to change point vector cpts_ex <- c(0,cpts,n) # Compute MOSUM statistic mo2 <- mosum(Xts, G = G, boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) cv <- mosum.criticalValue(n = n, G.right = G, G.left = G, alpha = alpha) # Signal of the statistic msignal <- mosum(signalts, G=G, var.est.method="custom", var.custom=c(rep(1,n)), boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) # consider results of MOSUM procedure # estimated change points cpts_est <- mo2$cpts qhat <- length(cpts_est) y <- mo2$stat[cpts_est] # Prepare plotting # determine intervals of exceedings # auxilliary logical vector of exceedings exceedings_bin <- (!is.na(mo2$stat) & mo2$stat>=cv) # auxilliary vector to color the exceeding intervals in time series exceedings <- mo2$stat exceedings <- replace(exceedings, !exceedings_bin, NA) # determine start and end of exceeding intervals exceedings_start <- c(rep(FALSE,n)) exceedings_start[G:(n-G)] <- (!exceedings_bin[(G-1):(n-G-1)]& exceedings_bin[G:(n-G)]) exceedings_start_index <- which(exceedings_start) exceedings_end <- c(rep(FALSE,n)) exceedings_end[G:(n-G)] <- (!exceedings_bin[(G+1):(n-G+1)]& exceedings_bin[G:(n-G)]) exceedings_end_index <- which(exceedings_end) # Creating the plots par(mfrow=c(4,1)) par(mar=c(2,2,1,1)) # plot of time series with expected values plot(Xts,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ") for (i in 1:(length(cpts)+1)){ lines(x=c((cpts_ex[i]+1),cpts_ex[i+1]), y=rep(ev[i],2), col="blue",lwd="3") } # plot signal of MOSUM statistic plot(msignal$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ") # plot of MOSUM statistic with true change points plot(mo2$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ") for (i in 1:length(cpts)){ abline(v= cpts[i], lwd="2", col="red") } # plot results of MOSUM procedure plot(mo2$stat, lwd="3",type="l", cex.lab=1.5,ylab=" ", xlab=" ") lines(timeSeries(exceedings),type="l", col="blue", lwd="3") abline(h = cv, col = "green", lwd="3") if (qhat>0){ for (k in 1:qhat){ ind <- which(((exceedings_start_index<=cpts_est[k])& exceedings_end_index>=cpts_est[k]))[1] # highlight start and end of relevant exceeding intervals points(exceedings_start_index[ind],cv, lwd="3", col="red", cex=1, pch=21,bg="red") text(exceedings_start_index[ind]-20, cv+0.5, bquote(v[.(k)]), cex = 1.3, lwd="3", col="red") points(exceedings_end_index[ind],cv, lwd="3", col="red", cex=1, pch=21,bg="red") text(exceedings_end_index[ind]+20, cv+0.5, bquote(w[.(k)]), cex = 1.3, lwd="3", col="red") # draw change point estimates abline(v= cpts_est[k], lwd="2", col="red") axis(1,at=cpts_est[k],labels=bquote(hat(k)[.(k)]), col.axis="red", cex.axis=1.3) } } #### Reproducing Figure 2: Plots for illustrating the problem in detectability #### # Determine parameters of the simulation # sample size n <- 1000 # parameters of MOSUM procedure: # bandwidth G <- 50 # significance level alpha <- 0.05 # minimal interval length epsilon <- 0.2 # parameters of scenario (a) # vector of change points cpts_a <- c(200,400,600,800) # expected values of the segments ev_a <- c(0,1,4,5,0) # parameters of scenario (b) # vector of change points cpts_b <- c(100,200,600,900) # expected values of the segments ev_b <- c(1,2,5,3,4) # seed for generating random series seed_error <- 45 # Simulation of the data # generate time series of scenario (a) data_a <- data_generation(cpts_a,ev_a,n,seed_error) Xts_a <- data_a$Xts signalts_a <- data_a$signalts # generate time series of scenario (b) data_b <- data_generation(cpts_b,ev_b,n,seed_error) Xts_b <- data_b$Xts signalts_b <- data_b$signalts # Compute MOSUM-score statistic # with estimation function of classical median (sigmoid function) # and global median as inspection parameter # MOSUM of scenario (a) # transformed series of estimation function H_sig_a <- vhmed(signalts_a-median(signalts_a)) # compute MOSUM statistic of transformed series mo_sig_a <- mosum(H_sig_a, G=G, var.est.method="custom", var.custom=c(rep(1,n)), boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) # MOSUM of scenario (b) # transformed series of estimation function H_sig_b <- vhmed(signalts_b-median(signalts_b)) # compute MOSUM statistic of transformed series mo_sig_b <- mosum(H_sig_b, G=G, var.est.method="custom", var.custom=c(rep(1,n)), boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) # Creating the plots par(mfrow=c(2,2)) par(mar=c(2,2,1,1)) # plot of time series with change points for scenario (a) plot(Xts_a,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab = "Scenario (a)") for (i in 1:length(cpts_a)){ abline(v= cpts_a[i], lwd="2", col="red") } # plot of time series with change points for scenario (b) plot(Xts_b,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Scenario (b)') for (i in 1:length(cpts_b)){ abline(v= cpts_b[i], lwd="2", col="red") } # plot of MOSUM statistic with change points for scenario (a) plot(mo_sig_a$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Signal (a)') for (i in 1:length(cpts_a)){ abline(v= cpts_a[i], lwd="2", col="red") } # plot of MOSUM statistic with change points for scenario (b) plot(mo_sig_b$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Signal (b)') for (i in 1:length(cpts_b)){ abline(v= cpts_b[i], lwd="2", col="red") } library(readr) #### Reproducing Figure 3: Analysis of well-log data #### # Read well-log data # load data from Github repository data <- read_table("https://raw.githubusercontent.com/alan-turing-institute/rbocpdms/master/Data/well%20log/well.txt", col_names=FALSE) # determine length of dataset n <- length(data$X1) # Determine parameters of computation # parameters of MOSUM procedure: # bandwidth G <- 20 # significance level alpha <- 0.05 # minimal interval length epsilon <- 0.2 # inspection parameter 1 # global median is used ipar_1 <- median(data$X1) # inspection parameter 2 # median of the stretch starting at 1070 and ending at 2767 is used ipar_2 <- median(data$X1[1070:2767]) # Compute MOSUM statistics # with median-like estimation function # and medians computed on different stretches as inspection parameters # #for inspection parameter 1 (global median) # generate transformed series of median like-estimation function H_ipar_1 <- vhml(data$X1,ipar_1) # compute MOSUM statistic on transformed series mo_ipar_1 <- mosum(H_ipar_1, G=G, boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) # for inspection parameter 2 # generate transformed series of median like-estimation function H_ipar_2 <- vhml(data$X1,ipar_2) # compute MOSUM statistic on transformed series mo_ipar_2 <- mosum(H_ipar_2, G=G, boundary.extension = FALSE, alpha = alpha, criterion = "epsilon", epsilon = epsilon) # Creating the plots in Figure 3 # determine critical value cv <- mosum.criticalValue(n=n, G.left=G, G.right=G, alpha=alpha) par(mfrow=c(3,1)) par(mar=c(2,2,1,1)) # plot of time series with change points obtained by the different inspection parameters plot(data$X1, type="l", cex.axis=1, font=2, ylab="", xlab="") for (j in 1: length(mo_ipar_2$cpts)){ abline(v= mo_ipar_2$cpts[j], lwd="1", col="blue") } for (j in 1: length(mo_ipar_1$cpts)){ abline(v= mo_ipar_1$cpts[j], lwd="1", col="red",lty=2) } # plot of MOSUM with inspection parameter 1 (global median) plot(mo_ipar_1$stat, type="l", cex.axis=1, font=2, ylab="", xlab="", ylim=c(0,10)) for (j in 1: length(mo_ipar_1$cpts)){ abline(v= mo_ipar_1$cpts[j], lwd="1", col="red",lty=2) } abline(h= cv, lwd="1", col="green2") # plot of MOSUM with inspection parameter 2 (median of stretch [1070:2767] ) plot(mo_ipar_2$stat, type="l", cex.axis=1, font=2, ylab="", xlab="", ylim=c(0,10)) for (j in 1: length(mo_ipar_2$cpts)){ abline(v= mo_ipar_2$cpts[j], lwd="1", col="blue") } abline(h= cv, lwd="1", col="green2")