#================================================================== # R CODE FOR # "ROBUST ESTIMATION OF LOCATION AND CONCENTRATION # PARAMETERS FOR THE VON MISES-FISHER DISTRIBUTION" # # BY SHOGO KATO AND SHINTO EGUCHI #================================================================== #------------------------------------------------------------------ # FUNCTIONS FOR PARAMETER ESTIMATION #------------------------------------------------------------------ # MAXIMUM LIKELIHOOD ESTIMATOR: mle_vmf # # INPUT: # x: dataset -- (n,p) matrix & norm of each row vector should be 1 # # OUTPUT: # xi: estimate of xi # mu: estimate of mu # kappa: estimate of kappa mle_vmf=function(x){ n=dim(x)[1]; p=dim(x)[2] A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T) temp=sqrt(sum(apply(x/n,2,sum)^2)) mu_mle=apply(x,2,sum)/sqrt(sum(apply(x,2,sum)^2)) kappa_mle=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]] xi_mle=kappa_mle*mu_mle return(list(xi=xi_mle,mu=mu_mle,kappa=kappa_mle)) } # TYPE 1 ESTIMATOR: type1_vmf # # INPUT: # x: dataset -- (n,p) matrix & norm of each row vector should be 1 # beta: tuning paramter # # OUTPUT: # xi: estimate of xi # mu: estimate of mu # kappa: estimate of kappa # iterations: number of iterations type1_vmf=function(x,beta){ n=dim(x)[1]; p=dim(x)[2] func_part=function(x,mu,kappa) exp(kappa*mu%*%x) A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T) B=function(x,beta,p) besselI(x*(beta+1),(p-2)/2)/besselI(x,(p-2)/2) norm=function(x) sqrt(sum(x^2)) mu_t=matrix(1,3,p) kappa_t=NULL mu_t[1,]=10 kappa_t[1]=1e+10 mu_t[2,]=t(mle_vmf(x)$mu) # MLE AS INITIAL VALUE kappa_t[2]=mle_vmf(x)$kappa t=1 while((abs(kappa_t[2]-kappa_t[1])>1e-8) || (norm(mu_t[2,]-mu_t[1,])>1e-8)) { mu_t[3,]=func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x/norm(func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x) con=n*B(kappa_t[2],beta,p)*(A(kappa_t[2]*(1+beta),p)-A(kappa_t[2],p))/(1+beta)^{(p-2)/2} temp=norm(func_part(t(x),mu_t[2,],beta*kappa_t[2])%*%x-con*mu_t[2,])/sum(func_part(t(x),mu_t[2,],beta*kappa_t[2])) kappa_t[3]=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]] kappa_t[1:2]=kappa_t[2:3] mu_t[1:2,]=mu_t[2:3,] t=t+1 if (t==5000) break } mu_beta=t(mu_t[3,]) kappa_beta=kappa_t[3] xi_beta=kappa_beta*mu_beta return(list(xi=xi_beta,mu=mu_beta,kappa=kappa_beta,iterations=t)) } # TYPE 0 ESTIMATOR: type0_vmf # # INPUT: # x: dataset -- (n,p) matrix & norm of each row vector should be 1 # gamma: tuning paramter # # OUTPUT: # xi: estimate of xi # mu: estimate of mu # kappa: estimate of kappa # iterations: number of iterations type0_vmf=function(x,gamma){ n=dim(x)[1]; p=dim(x)[2] func_part=function(x,mu,kappa) exp(kappa*mu%*%x) A=function(x,p) besselI(x,p/2,expon.scaled=T)/besselI(x,(p-2)/2,expon.scaled=T) B=function(x,gamma,p) besselI(x*(gamma+1),(p-2)/2)/besselI(x,(p-2)/2) norm=function(x) sqrt(sum(x^2)) mu_t=matrix(1,3,p) kappa_t=NULL mu_t[1,]=10 kappa_t[1]=1e+10 mu_t[2,]=t(mle_vmf(x)$mu) # MLE AS INITIAL VALUE kappa_t[2]=mle_vmf(x)$kappa t=1 while((abs(kappa_t[2]-kappa_t[1])>1e-8) || (norm(mu_t[2,]-mu_t[1,])>1e-8)) { mu_t[3,]=(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x)/norm(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x) temp=norm(func_part(t(x),mu_t[2,],gamma*kappa_t[2])%*%x)/sum(func_part(t(x),mu_t[2,],gamma*kappa_t[2])) kappa_t[3]=uniroot(function(x) A(x,p)-temp,interval=c(0,1e+3),f.upper=1-temp,f.lower=-temp,tol=1e-7)[[1]]/(1+gamma) kappa_t[1:2]=kappa_t[2:3] mu_t[1:2,]=mu_t[2:3,] t=t+1 if (t==5000) break } mu_gamma=t(mu_t[3,]) kappa_gamma=kappa_t[3] xi_gamma=kappa_gamma*mu_gamma return(list(xi=xi_gamma,mu=mu_gamma,kappa=kappa_gamma,iterations=t)) } #------------------------------------------------------------------ # ESTIMATION OF TUNING PARAMETER USING CROSS-VALIDATION #------------------------------------------------------------------ # ESTIMATION OF TUNING PARAMETER FOR TYPE 1 ESTIMATOR: type1_cv # # INPUT: # x: dataset -- (n,p) matrix & norm of each row vector should be 1 # K: number of folds # beta_loss: tuning paramter for loss function # # OUTPUT: # beta: estimate of beta # xi: estimate of xi # mu: estimate of mu # kappa: estimate of kappa type1_cv=function(x,K=3,beta_loss=0.6){ n=dim(x)[1]; p=dim(x)[2] id=sample(1:n,n)%%K loss=rep(0,100) func_part=function(x,xi) exp(xi%*%x) norm=function(x) sqrt(sum(x^2)) for (k in 1:100){ beta_t=0.01*k for (j in 0:(K-1)){ test=x[id==j,] train=x[id!=j,] xi_beta=type1_vmf(train,beta_t)$xi loss[k]=loss[k]-(norm(xi_beta)^((p-2)/2)/((2*pi)^(p/2)*besselI(norm(xi_beta),(p-2)/2)))^beta_loss*sum(func_part(t(test),beta_loss*xi_beta))/(dim(test)[1]*beta_loss)+norm(xi_beta)^((p-2)*beta_loss/2)*besselI((1+beta_loss)*norm(xi_beta),(p-2)/2)/((2*pi)^(p*beta_loss/2)*(1+beta_loss)^(p/2)*besselI(norm(xi_beta),(p-2)/2)^(1+beta_loss)) }} scale_x=(1:100)/100 loss=loss/n plot(scale_x,loss,type="o",pch=20,xlab="beta",ylab="Loss") beta=(1:length(loss))[loss==min(loss)]/100 type1_est=type1_vmf(x,beta) return(list(beta=beta,xi=type1_est$xi,mu=type1_est$mu,kappa=type1_est$kappa)) } # ESTIMATION OF TUNING PARAMETER FOR TYPE 0 ESTIMATOR: type0_cv # # INPUT: # x: dataset -- (n,p) matrix & norm of each row vector should be 1 # K: number of folds # gamma_loss: tuning paramter for loss function # # OUTPUT: # gamma: estimate of gamma # xi: estimate of xi # mu: estimate of mu # kappa: estimate of kappa type0_cv=function(x,K=3,gamma_loss=0.6){ n=dim(x)[1]; p=dim(x)[2] id=sample(1:n,n)%%K loss=rep(0,100) func_part=function(x,xi) exp(xi%*%x) norm=function(x) sqrt(sum(x^2)) for (k in 1:100){ gamma_t=0.01*k for (j in 0:(K-1)){ test=x[id==j,] train=x[id!=j,] xi_gamma=type0_vmf(train,gamma_t)$xi loss[k]=loss[k]-log(sum(func_part(t(test),gamma_loss*xi_gamma))/dim(test)[1])/gamma_loss-log(((1+gamma_loss)*norm(xi_gamma))^((p-2)/2)/((2*pi)^(p/2)*besselI((1+gamma_loss)*norm(xi_gamma),(p-2)/2)))/(1+gamma_loss) }} scale_x=(1:100)/100 loss=loss/n plot(scale_x,loss,type="o",pch=20,xlab="gamma",ylab="Loss") gamma=(1:length(loss))[loss==min(loss)]/100 type0_est=type0_vmf(x,gamma) return(list(gamma=gamma,xi=type0_est$xi,mu=type0_est$mu,kappa=type0_est$kappa)) } #------------------------------------------------------------------ # EXAMPLES #------------------------------------------------------------------ # EXAMPLE 1 library(circular) n=200; r=runif(n); theta=NULL theta[r<0.95]=rvonmises(sum(r<0.95),0,15) theta[r>=0.95]=runif(sum(r>=0.95),0,2*pi) x=matrix(c(cos(theta),sin(theta)),n,2) mle_vmf(x) type1_vmf(x,beta=0.5) type0_vmf(x,gamma=0.4) type1_cv(x,K=3,beta_loss=0.6) type0_cv(x,K=3,gamma_loss=0.6) # EXAMPLE 2 n=300 p=5 x=matrix(1,n,p) for(j in 1:n){r=rnorm(p); r[1]=r[1]+3; x[j,]=r/sqrt(sum(r^2))} mle_vmf(x) type1_vmf(x,beta=0.2) type0_vmf(x,gamma=0.3) type1_cv(x,K=3,beta_loss=0.6) type0_cv(x,K=3,gamma_loss=0.6)