library(timeSeries)
library(mosum)
# library(readr)  ### only for reproducing Figure 3

#### Required functions  ####

'Function to simulate data with changes in mean'

data_generation <- function(cpts,ev,n,seed_error){
  # cpts - vector of change point locations
  # ev - vector of expected values of the different segments
  # n - sample size of time series
  # seed - seed for generating random series
  
  # append change point vector by 0 and n
  cpts_ex <- c(0,cpts,n)
  
  # initialize signal series
  signalts <- c(rep(NA,n))
  # generate signal series
  for (i in 1:(length(cpts)+1)){
    signalts[(cpts_ex[i]+1):cpts_ex[i+1]] <- c(rep(ev[i],(cpts_ex[i+1]-cpts_ex[i])))
  }
  
  #noisy series
  set.seed(seed_error)
  error <-rnorm(n=n,mean=0,sd=1)
  X <- error+signalts
  Xts <- timeSeries(X)
  
  # return relevant results
  structure(
    list(Xts = Xts, #time series
         signalts = signalts #signal of time series
         ))
  
}


'Estimating function of the classical median: 
sigmoid function hmed=sgn(x)'

hmed <- function(x){
  if (x==0) return(0)
  else{
    if (x<0) return(-1)
    else return(1)
  }
}

# vectorize function hmed
vhmed <- Vectorize(hmed)

'Estimating function of the median-like estimator: 
 arcus tangens function hml=atan(x)/pi'

hml <- function(x,t){2*atan(x-t)/pi}

# vectorize function hml
vhml <- Vectorize(hml,vectorize.args = "x")



#### Reproducing Figure 1: Plots of the classical MOSUM procedure ####

# Determine parameters of the simulation

# vector of change points
cpts <- c(100,250,700)

# expected values of the segments
ev <- c(0,2,1,3)

# sample size
n <- 1000

# parameters of MOSUM procedure
# bandwidth
G <- 60
# significance level
alpha <- 0.05
# minimal interval length
epsilon <- 0.2

# seed for generating random series
seed_error <- 3841


# Simulation of the data

# generate time series
data <- data_generation(cpts,ev,n,seed_error)
Xts <- data$Xts
signalts <- data$signalts

# append 0 and n to change point vector
cpts_ex <- c(0,cpts,n)


# Compute MOSUM statistic

mo2 <- mosum(Xts,
             G = G, 
             boundary.extension = FALSE, 
             alpha = alpha, 
             criterion = "epsilon",
             epsilon = epsilon)
cv <- mosum.criticalValue(n = n,
                          G.right = G,
                          G.left = G, 
                          alpha = alpha)

# Signal of the statistic
msignal <- mosum(signalts,
                 G=G,
                 var.est.method="custom", 
                 var.custom=c(rep(1,n)), 
                 boundary.extension = FALSE,
                 alpha = alpha, 
                 criterion = "epsilon",
                 epsilon = epsilon)

# consider results of MOSUM procedure
# estimated change points
cpts_est <- mo2$cpts
qhat <- length(cpts_est)
y <- mo2$stat[cpts_est]


# Prepare plotting

# determine intervals of exceedings
# auxilliary logical vector of exceedings
exceedings_bin <- (!is.na(mo2$stat) & mo2$stat>=cv)

# auxilliary vector to color the exceeding intervals in time series
exceedings <- mo2$stat
exceedings <- replace(exceedings, !exceedings_bin, NA)


# determine start and end of exceeding intervals
exceedings_start <- c(rep(FALSE,n))
exceedings_start[G:(n-G)] <- (!exceedings_bin[(G-1):(n-G-1)]& 
                                exceedings_bin[G:(n-G)])

exceedings_start_index <- which(exceedings_start)                    

exceedings_end <- c(rep(FALSE,n))
exceedings_end[G:(n-G)] <- (!exceedings_bin[(G+1):(n-G+1)]& 
                              exceedings_bin[G:(n-G)])
exceedings_end_index <- which(exceedings_end)



# Creating the plots


par(mfrow=c(4,1))
par(mar=c(2,2,1,1))


# plot of time series with expected values
plot(Xts,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ")
for (i in 1:(length(cpts)+1)){
  lines(x=c((cpts_ex[i]+1),cpts_ex[i+1]), y=rep(ev[i],2), col="blue",lwd="3")
}


# plot signal of MOSUM statistic
plot(msignal$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ")

# plot of MOSUM statistic with true change points
plot(mo2$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab=" ")
for (i in 1:length(cpts)){
  abline(v= cpts[i], lwd="2", col="red")
}


# plot results of MOSUM procedure
plot(mo2$stat, lwd="3",type="l", cex.lab=1.5,ylab=" ", xlab=" ")
lines(timeSeries(exceedings),type="l", col="blue", lwd="3")
abline(h = cv, col = "green", lwd="3")

if (qhat>0){
  
  for (k in 1:qhat){
    ind <- which(((exceedings_start_index<=cpts_est[k])& 
                    exceedings_end_index>=cpts_est[k]))[1]
    
    # highlight start and end of relevant exceeding intervals
    points(exceedings_start_index[ind],cv, lwd="3", col="red", cex=1, pch=21,bg="red")
    text(exceedings_start_index[ind]-20, cv+0.5, bquote(v[.(k)]), cex = 1.3, lwd="3", col="red")
    points(exceedings_end_index[ind],cv, lwd="3", col="red", cex=1, pch=21,bg="red")
    text(exceedings_end_index[ind]+20, cv+0.5, bquote(w[.(k)]), cex = 1.3, lwd="3", col="red")
    
    # draw change point estimates
    abline(v= cpts_est[k], lwd="2", col="red")
    axis(1,at=cpts_est[k],labels=bquote(hat(k)[.(k)]), col.axis="red", cex.axis=1.3)
    
    
  }
  
}

#### Reproducing Figure 2: Plots for illustrating the problem in detectability ####
# Determine parameters of the simulation
#  sample size
n <- 1000

# parameters of MOSUM procedure:
# bandwidth
G <- 50
# significance level
alpha <- 0.05
# minimal interval length
epsilon <- 0.2

# parameters of scenario (a)
# vector of change points
cpts_a <- c(200,400,600,800)
# expected values of the segments
ev_a <- c(0,1,4,5,0)

# parameters of scenario (b)
# vector of change points
cpts_b <- c(100,200,600,900)
# expected values of the segments
ev_b <- c(1,2,5,3,4)

# seed for generating random series
seed_error <- 45

# Simulation of the data
# generate time series of scenario (a)
data_a <- data_generation(cpts_a,ev_a,n,seed_error)
Xts_a <- data_a$Xts
signalts_a <- data_a$signalts

# generate time series of scenario (b)
data_b <- data_generation(cpts_b,ev_b,n,seed_error)
Xts_b <- data_b$Xts
signalts_b <- data_b$signalts


# Compute MOSUM-score statistic
# with estimation function of classical median (sigmoid function)
# and global median as inspection parameter 


# MOSUM of scenario (a)
# transformed series of estimation function
H_sig_a <- vhmed(signalts_a-median(signalts_a))
# compute MOSUM statistic of transformed series
mo_sig_a <- mosum(H_sig_a,
                  G=G,
                  var.est.method="custom", 
                  var.custom=c(rep(1,n)), 
                  boundary.extension = FALSE,
                  alpha = alpha,
                  criterion = "epsilon",
                  epsilon = epsilon)


# MOSUM of scenario (b)
# transformed series of estimation function
H_sig_b <- vhmed(signalts_b-median(signalts_b))
# compute MOSUM statistic of transformed series
mo_sig_b <- mosum(H_sig_b,
                  G=G,
                  var.est.method="custom", 
                  var.custom=c(rep(1,n)), 
                  boundary.extension = FALSE,
                  alpha = alpha, 
                  criterion = "epsilon",
                  epsilon = epsilon)


# Creating the plots

par(mfrow=c(2,2))
par(mar=c(2,2,1,1))

# plot of time series with change points for scenario (a)
plot(Xts_a,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab = "Scenario (a)")
for (i in 1:length(cpts_a)){
  abline(v= cpts_a[i], lwd="2", col="red")
}

# plot of time series with change points for scenario (b)
plot(Xts_b,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Scenario (b)')
for (i in 1:length(cpts_b)){
  abline(v= cpts_b[i], lwd="2", col="red")
}

# plot of MOSUM statistic with change points for scenario (a)
plot(mo_sig_a$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Signal (a)')
for (i in 1:length(cpts_a)){
  abline(v= cpts_a[i], lwd="2", col="red")
}

# plot of MOSUM statistic with change points for scenario (b)
plot(mo_sig_b$stat,lwd="3",cex.lab=1.5, type="l",ylab=" ",xlab='Signal (b)')
for (i in 1:length(cpts_b)){
  abline(v= cpts_b[i], lwd="2", col="red")
}


library(readr)

#### Reproducing Figure 3: Analysis of well-log data ####

# Read well-log data 
# load data from Github repository
data <- read_table("https://raw.githubusercontent.com/alan-turing-institute/rbocpdms/master/Data/well%20log/well.txt", col_names=FALSE)

# determine length of dataset
n <- length(data$X1)

# Determine parameters of computation
# parameters of MOSUM procedure:
# bandwidth
G <- 20
# significance level
alpha <- 0.05
# minimal interval length
epsilon <- 0.2

# inspection parameter 1
# global median is used
ipar_1 <- median(data$X1)

# inspection parameter 2
#  median of the stretch starting at 1070 and ending at 2767 is used
ipar_2 <- median(data$X1[1070:2767])

# Compute MOSUM statistics 
# with median-like estimation function
# and medians computed on different stretches as inspection parameters
#
#for inspection parameter 1 (global median)
# generate transformed series of median like-estimation function
H_ipar_1 <- vhml(data$X1,ipar_1)

# compute MOSUM statistic on transformed series
mo_ipar_1 <- mosum(H_ipar_1, 
                   G=G,
                   boundary.extension = FALSE,
                   alpha = alpha,
                   criterion = "epsilon",
                   epsilon = epsilon)

# for inspection parameter 2
# generate transformed series of median like-estimation function
H_ipar_2 <- vhml(data$X1,ipar_2)

# compute MOSUM statistic on transformed series
mo_ipar_2 <- mosum(H_ipar_2, 
                   G=G,
                   boundary.extension = FALSE,
                   alpha = alpha,
                   criterion = "epsilon",
                   epsilon = epsilon)

# Creating the plots in Figure 3
# determine critical value
cv <- mosum.criticalValue(n=n, G.left=G, G.right=G, alpha=alpha)

par(mfrow=c(3,1))
par(mar=c(2,2,1,1))

# plot of time series with change points obtained by the different inspection parameters
plot(data$X1, type="l", cex.axis=1, font=2, ylab="", xlab="")
for (j in 1: length(mo_ipar_2$cpts)){
  abline(v= mo_ipar_2$cpts[j], lwd="1", col="blue")
}
for (j in 1: length(mo_ipar_1$cpts)){
  abline(v= mo_ipar_1$cpts[j], lwd="1", col="red",lty=2)
}


# plot of MOSUM with inspection parameter 1 (global median)
plot(mo_ipar_1$stat, type="l", cex.axis=1, 
     font=2, ylab="", xlab="", ylim=c(0,10))
for (j in 1: length(mo_ipar_1$cpts)){
  abline(v= mo_ipar_1$cpts[j], lwd="1", col="red",lty=2)
}
abline(h= cv, lwd="1", col="green2")


# plot of MOSUM with inspection parameter 2 (median of stretch [1070:2767] )
plot(mo_ipar_2$stat, type="l", cex.axis=1, 
     font=2, ylab="", xlab="", ylim=c(0,10))
for (j in 1: length(mo_ipar_2$cpts)){
  abline(v= mo_ipar_2$cpts[j], lwd="1", col="blue")
}
abline(h= cv, lwd="1", col="green2")