#==================================================================
# R CODE FOR 
#   "ROBUST ESTIMATION OF LOCATION AND CONCENTRATION
#    PARAMETERS FOR THE VON MISES-FISHER DISTRIBUTION"
#
#            BY SHOGO KATO AND SHINTO EGUCHI
#==================================================================

#------------------------------------------------------------------
#  FUNCTIONS FOR PARAMETER ESTIMATION
#------------------------------------------------------------------

# MAXIMUM LIKELIHOOD ESTIMATOR: mle_vmf
#
# INPUT:
# x: dataset -- (n,p) matrix & norm of each row vector should be 1
# 
# OUTPUT:
# xi: estimate of xi
# mu: estimate of mu
# kappa: estimate of kappa

mle_vmf=function(x){

n=dim(x)[1]; p=dim(x)[2]

A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T)
temp=sqrt(sum(apply(x/n,2,sum)^2))

mu_mle=apply(x,2,sum)/sqrt(sum(apply(x,2,sum)^2))
kappa_mle=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]]
xi_mle=kappa_mle*mu_mle

return(list(xi=xi_mle,mu=mu_mle,kappa=kappa_mle))
}


# TYPE 1 ESTIMATOR: type1_vmf
#
# INPUT:
# x: dataset -- (n,p) matrix & norm of each row vector should be 1
# beta: tuning paramter
#
# OUTPUT:
# xi: estimate of xi
# mu: estimate of mu
# kappa: estimate of kappa
# iterations: number of iterations

type1_vmf=function(x,beta){

n=dim(x)[1]; p=dim(x)[2]

func_part=function(x,mu,kappa) exp(kappa*mu%*%x)
A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T)
B=function(x,beta,p) besselI(x*(beta+1),(p-2)/2)/besselI(x,(p-2)/2)
norm=function(x) sqrt(sum(x^2))

mu_t=matrix(1,3,p)
kappa_t=NULL
mu_t[1,]=10
kappa_t[1]=1e+10

mu_t[2,]=t(mle_vmf(x)$mu)  # MLE AS INITIAL VALUE
kappa_t[2]=mle_vmf(x)$kappa

t=1
while((abs(kappa_t[2]-kappa_t[1])>1e-8) || (norm(mu_t[2,]-mu_t[1,])>1e-8)) {
mu_t[3,]=func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x/norm(func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x)
con=n*B(kappa_t[2],beta,p)*(A(kappa_t[2]*(1+beta),p)-A(kappa_t[2],p))/(1+beta)^{(p-2)/2}
temp=norm(func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x-con*mu_t[2,])/sum(func_part(t(x),mu_t[2,],beta*kappa_t[2]))
kappa_t[3]=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]]
kappa_t[1:2]=kappa_t[2:3]
mu_t[1:2,]=mu_t[2:3,]
t=t+1
if (t==5000) break
}

mu_beta=t(mu_t[3,])
kappa_beta=kappa_t[3]
xi_beta=kappa_beta*mu_beta

return(list(xi=xi_beta,mu=mu_beta,kappa=kappa_beta,iterations=t))
}


# TYPE 0 ESTIMATOR: type0_vmf
#
# INPUT:
# x: dataset -- (n,p) matrix & norm of each row vector should be 1
# gamma: tuning paramter
#
# OUTPUT:
# xi: estimate of xi
# mu: estimate of mu
# kappa: estimate of kappa
# iterations: number of iterations


type0_vmf=function(x,gamma){

n=dim(x)[1]; p=dim(x)[2]

func_part=function(x,mu,kappa) exp(kappa*mu%*%x)
A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T)
B=function(x,gamma,p) besselI(x*(gamma+1),(p-2)/2)/besselI(x,(p-2)/2)
norm=function(x) sqrt(sum(x^2))

mu_t=matrix(1,3,p)
kappa_t=NULL
mu_t[1,]=10
kappa_t[1]=1e+10

mu_t[2,]=t(mle_vmf(x)$mu)  # MLE AS INITIAL VALUE
kappa_t[2]=mle_vmf(x)$kappa

t=1
while((abs(kappa_t[2]-kappa_t[1])>1e-8) || (norm(mu_t[2,]-mu_t[1,])>1e-8)) {
mu_t[3,]=(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x)/norm(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x)
temp=norm(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x)/sum(func_part(t(x),mu_t[2,],gamma*kappa_t[2]))
kappa_t[3]=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]]/(1+gamma)

kappa_t[1:2]=kappa_t[2:3]
mu_t[1:2,]=mu_t[2:3,]
t=t+1
if (t==5000) break
}

mu_gamma=t(mu_t[3,])
kappa_gamma=kappa_t[3]
xi_gamma=kappa_gamma*mu_gamma

return(list(xi=xi_gamma,mu=mu_gamma,kappa=kappa_gamma,iterations=t))
}


#------------------------------------------------------------------
#  ESTIMATION OF TUNING PARAMETER USING CROSS-VALIDATION
#------------------------------------------------------------------

# ESTIMATION OF TUNING PARAMETER FOR TYPE 1 ESTIMATOR: type1_cv
#
# INPUT:
# x: dataset -- (n,p) matrix & norm of each row vector should be 1
# K: number of folds
# beta_loss: tuning paramter for loss function
#
# OUTPUT:
# beta: estimate of beta
# xi: estimate of xi
# mu: estimate of mu
# kappa: estimate of kappa


type1_cv=function(x,K=3,beta_loss=0.6){

n=dim(x)[1]; p=dim(x)[2]
id=sample(1:n,n)%%K
loss=rep(0,100)

func_part=function(x,xi) exp(xi%*%x)
norm=function(x) sqrt(sum(x^2))

for (k in 1:100){

beta_t=0.01*k

for (j in 0:(K-1)){

test=x[id==j,]
train=x[id!=j,]

xi_beta=type1_vmf(train,beta_t)$xi

loss[k]=loss[k]-(norm(xi_beta)^((p-2)/2)/((2*pi)^(p/2)*besselI(norm(xi_beta),(p-2)/2)))^beta_loss*sum(func_part(t(test),beta_loss*xi_beta))/(dim(test)[1]*beta_loss)+norm(xi_beta)^((p-2)*beta_loss/2)*besselI((1+beta_loss)*norm(xi_beta),(p-2)/2)/((2*pi)^(p*beta_loss/2)*(1+beta_loss)^(p/2)*besselI(norm(xi_beta),(p-2)/2)^(1+beta_loss))

}}

scale_x=(1:100)/100
loss=loss/n
plot(scale_x,loss,type="o",pch=20,xlab="beta",ylab="Loss")

beta=(1:length(loss))[loss==min(loss)]/100
type1_est=type1_vmf(x,beta)

return(list(beta=beta,xi=type1_est$xi,mu=type1_est$mu,kappa=type1_est$kappa))
}


# ESTIMATION OF TUNING PARAMETER FOR TYPE 0 ESTIMATOR: type0_cv
#
# INPUT:
# x: dataset -- (n,p) matrix & norm of each row vector should be 1
# K: number of folds
# gamma_loss: tuning paramter for loss function
#
# OUTPUT:
# gamma: estimate of gamma
# xi: estimate of xi
# mu: estimate of mu
# kappa: estimate of kappa


type0_cv=function(x,K=3,gamma_loss=0.6){

n=dim(x)[1]; p=dim(x)[2]
id=sample(1:n,n)%%K
loss=rep(0,100)

func_part=function(x,xi) exp(xi%*%x)
norm=function(x) sqrt(sum(x^2))

for (k in 1:100){

gamma_t=0.01*k

for (j in 0:(K-1)){

test=x[id==j,]
train=x[id!=j,]

xi_gamma=type0_vmf(train,gamma_t)$xi

loss[k]=loss[k]-log(sum(func_part(t(test),gamma_loss*xi_gamma))/dim(test)[1])/gamma_loss-log(((1+gamma_loss)*norm(xi_gamma))^((p-2)/2)/((2*pi)^(p/2)*besselI((1+gamma_loss)*norm(xi_gamma),(p-2)/2)))/(1+gamma_loss)

}}

scale_x=(1:100)/100
loss=loss/n
plot(scale_x,loss,type="o",pch=20,xlab="gamma",ylab="Loss")

gamma=(1:length(loss))[loss==min(loss)]/100
type0_est=type0_vmf(x,gamma)

return(list(gamma=gamma,xi=type0_est$xi,mu=type0_est$mu,kappa=type0_est$kappa))
}


#------------------------------------------------------------------
#  EXAMPLES
#------------------------------------------------------------------

# EXAMPLE 1

library(circular)

n=200; r=runif(n); theta=NULL

theta[r<0.95]=rvonmises(sum(r<0.95),0,15)
theta[r>=0.95]=runif(sum(r>=0.95),0,2*pi)
x=matrix(c(cos(theta),sin(theta)),n,2)

mle_vmf(x)
type1_vmf(x,beta=0.5)
type0_vmf(x,gamma=0.4)

type1_cv(x,K=3,beta_loss=0.6)
type0_cv(x,K=3,gamma_loss=0.6)


# EXAMPLE 2

n=300
p=5
x=matrix(1,n,p)
for(j in 1:n){r=rnorm(p); r[1]=r[1]+3; x[j,]=r/sqrt(sum(r^2))}

mle_vmf(x)
type1_vmf(x,beta=0.2)
type0_vmf(x,gamma=0.3)

type1_cv(x,K=3,beta_loss=0.6)
type0_cv(x,K=3,gamma_loss=0.6)