# Implementation of APF algorithm and kernel smoothing for parameter estimation
# By Winston Chong and Nikita Reymer

library(mvtnorm)
T <- 150 # Length of time series to consider
y <- as.vector( as.matrix( read.table('returns.txt') ) )[1:T] # returns time series
hmp <- as.vector( as.matrix( read.table('hmp.txt') ) )[1:T] # Hidden Markov process
y <- (y-mean(y)) # Demean the returns

# tracking prob of state
N <<- 1500
# Store some of the parameters for tracking as the algorithm progresses (states, transition probabilities, weights)
s_stored <- p11_stored <- p22_stored <- K_store <- weight_stored <- rep(0,T)
sigma2_t <- matrix(0,nrow=N,ncol=T) # Matrix of innovations

# initialize all the model parameters 
w1 <- rgamma(N,1,0.5)
a1 <- rbeta(N,4,1)
b1 <- (1-a1)*rbeta(N,1,8)

w2 <- rgamma(N,1,0.5)
a2 <- rbeta(N,4,1)
b2 <- (1-a2)*rbeta(N,1,8)

p11_temp <- rnorm(N,2,1)
p11 <- exp(p11_temp)/(1+exp(p11_temp))
p22_temp <- rnorm(N,2,1)
p22 <- exp(p22_temp)/(1+exp(p22_temp))

sigma2_t[,1] <- w1/(1-a1-b1) 
weight_t <- rep(1/N,N)
s_t <- rep(1,N) 


### DIAGNOSTICS ###
dnorm1 <- dnorm2 <- rep(0,T)
weight_D <- count_1 <- rep(0,T)
s_stored2 <- rep(0,T)
sigma_store <- rep(0,T)
param_store <- matrix(0,nrow = 8,ncol = T)


##### kernel parameters
delta <- 0.75 # kernel shrinkage tuning parameter (try 0.25, 0.5, 0.75, 0.99)
b <- 1-((3*delta-1)/(2*delta))^2
a <- sqrt(1-b)
P <- 9

##### initialize the particle set
theta.particles_t <- cbind(log(w1), log(a1/(1-a1)), log(b1/(1-a1-b1)), log(w2), log(a2/(1-a2)), log(b2/(1-a2-b2)), p11_temp, p22_temp ) # N by 8



for (t in 1:(T-1) ) {
	
	################# step1 - calculate predictive mean for states

	# transition probabilities from time t:
	p11_t <- exp(theta.particles_t[,7])/(1+exp(theta.particles_t[,7]))
	p22_t <- exp(theta.particles_t[,8])/(1+exp(theta.particles_t[,8]))
	
	#estimate kernel mean and variance based on the current Monte Carlo sample
	mu_t1 <- (s_t==1)*( 1+((1-p11_t) > p11_t) ) + (s_t==2)*( 2-(p22_t < (1-p22_t)) ) # mode of predicted states, N sized vector
	theta.bar_t <- t(weight_t)%*%theta.particles_t # theta.bar is P-1 sized vector
	theta.m_t <- a*theta.particles_t + (1-a)*matrix(theta.bar_t,nrow=N,ncol=P-1,byrow=TRUE) # kernel means, N by P-1
	theta.v_t <- b*cov.wt(theta.particles_t, wt=weight_t)$cov # kernel covariance, P-1 by P-1 matrix
	

	
	
	################# step 2 - calculate auxillary weights (to choose which particle to update)

	# reparameterizations : a* = log(a1/(1-a1)), w* = log(w1), b* = log(b1/(1-a1-b1))
	# so a1 = exp(a*)/(1+exp(a*)), w1 = exp(w*), b1 = (1-a1)*exp(b*)/(1+exp(b*))

	# parameters from kernal means (need to convert back to original scale to perform computation)
	w1 <- exp(theta.m_t[,1])
	a1 <- exp(theta.m_t[,2])/(1+exp(theta.m_t[,2]))
	b1 <- (1-a1)*exp(theta.m_t[,3])/(1+exp(theta.m_t[,3]))

	w2 <- exp(theta.m_t[,4])
	a2 <- exp(theta.m_t[,5])/(1+exp(theta.m_t[,5]))
	b2 <- (1-a2)*exp(theta.m_t[,6])/(1+exp(theta.m_t[,6]))

	p11 <- exp(theta.m_t[,7])/(1+exp(theta.m_t[,7]))
	p22 <- exp(theta.m_t[,8])/(1+exp(theta.m_t[,8]))


	# innovation calculated based on kernel means of parameters and mode estimates of states
	sd.mu_t1 <- sqrt( (mu_t1==1)*(w1 + y[t]^2*a1 + sigma2_t[,t]*b1) + (mu_t1==2)*(w2 + y[t]^2*a2 + sigma2_t[,t]*b2) )
	
	# calculate aux weights for drawing values
	aux.weights <- weight_t*dnorm(y[t+1],sd=sd.mu_t1)
	aux.weights <- aux.weights/sum(aux.weights)
	
	#### This section stores certain parameters (densities for state 1 and 2, auxiliary and particle weights) for later diagnostics
	dnorm1[t+1] <- weight_t%*%dnorm(y[t+1],sd=sqrt(w1 + y[t]^2*a1 + sigma2_t[,t]*b1) )
	dnorm2[t+1] <- weight_t%*%dnorm(y[t+1],sd=sqrt(w2 + y[t]^2*a2 + sigma2_t[,t]*b2) )
	weight_stored[t] <-sum(aux.weights*(s_t == 1))
	####

	# stratified sampling
	cdf.weights <- as.vector( aux.weights%*%upper.tri(matrix(1,N,N),diag=TRUE) ) # cumulative weights
	strata.unif <- seq(0,(N-1)/N,by=1/N)+runif(N,0,1/N) # uniform intervals
	strata.sample <- function(x) { sum(x>cdf.weights) + 1}
	k.sample <- sapply(strata.unif,strata.sample) #sampled set of auxiliary indecies
	

	
	################# step 3 - generate new parameter values
	
	# Below we update the selected parameter values from the Kernel
	theta_t1 <- t( apply(theta.m_t[k.sample,],1,n=1,rmvnorm,sigma=theta.v_t) ) # new parameter values, t+1, matrix N by P-1

	# new generated parameter values
	w1_t1 <- exp(theta_t1[,1])
	a1_t1 <- exp(theta_t1[,2])/(1+exp(theta_t1[,2]))
	b1_t1 <- (1-a1_t1)*exp(theta_t1[,3])/(1+exp(theta_t1[,3]))

	w2_t1 <- exp(theta_t1[,4])
	a2_t1 <- exp(theta_t1[,5])/(1+exp(theta_t1[,5]))
	b2_t1 <- (1-a2_t1)*exp(theta_t1[,6])/(1+exp(theta_t1[,6]))

	p11_t1 <- exp(theta_t1[,7])/(1+exp(theta_t1[,7]))
	p22_t1 <- exp(theta_t1[,8])/(1+exp(theta_t1[,8]))



	
	################# step 4 - generate new state values

	test_uni <- runif(N)		
	s_t1 <- s_t[k.sample] # the selected states we need to update
	s_t1 <- ( 2-(test_uni<p11_t1[k.sample]) )*(s_t1==1) + ( (test_uni<p22_t1[k.sample])+1 )*(s_t1==2) # new state values, t+1, vector of size N

	K_store[t+1] <- length(which(s_t1 == 1)) #Stoe the number of particles that are in state 1 for later diagnostics

	
	
	
	################# step 5 - generate new weight values using new particles y_t1,theta_t1

	sd_t1 <- sqrt( (s_t1==1)*(w1_t1 + y[t]^2*a1_t1 + sigma2_t[,t]*b1_t1) + (s_t1==2)*(w2_t1 + y[t]^2*a2_t1 + sigma2_t[,t]*b2_t1) )
	sigma2_t[,t+1] = sd_t1^2 # update the innovation matrix

	# evaluate new weights - ratio of likelihoods (likelihood using new particles / likelihood based on predicted particles)
	weight_t1 <- dnorm(y[t+1],sd=sd_t1)/dnorm(y[t+1],sd=sd.mu_t1[k.sample]) # N by 1 vector of new weights
	weight_t1 <- weight_t1/sum(weight_t1)


	##### store the parameters		
	param_store[,t+1] <- t(weight_t1)%*%theta_t1
	weight_D[t] <- sum(weight_t*(s_t1==1))
	s_stored[t] <- sum(weight_t1*s_t1)
	s_stored2[t] <- which.max( c( sum(weight_t1*(s_t1==1)), sum(weight_t1*(s_t1==2)) ) )
	p11_stored[t+1] <- sum(weight_t1*p11_t1)
	p22_stored[t+1] <- sum(weight_t1*p22_t1)
	#####
	
	# reassignment
	theta.particles_t <- theta_t1
	s_t <- s_t1
	weight_t <- weight_t1
	sigma_store[t+1] <- sigma2_t[,t+1]%*%weight_t1


	}

trans <- function(x) { exp(x)/(1+exp(x)) } # Define a funstion that transforms the parameters back to original scale
w <- t(exp(param_store[c(1,4),]))
a <- t(trans(param_store[c(2,5),]))
b <- (1-a)*t(trans(param_store[c(3,6),]))

# generate the plots for parameter evolution through time
jpeg('parameter_evolution.jpeg',height=600, width=900)
par(mfrow=c(2,3))
plot(w[,1],type='l',xlab='time',ylab='w1')
abline(h=0.07,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
plot(a[,1],type='l',xlab='time',ylab='a1',ylim=c(0,0.9))
abline(h=0.04,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
plot(b[,1],type='l',xlab='time',ylab='b1')
abline(h=0.03,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
plot(w[,2],type='l',xlab='time',ylab='w2')
abline(h=7.75,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
plot(a[,2],type='l',xlab='time',ylab='a2',ylim=c(0,1))
abline(h=0.21,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
plot(b[,2],type='l',xlab='time',ylab='b2',ylim=c(0,0.8))
abline(h=0.77,lty=2)
abline(v=63,lty=3)
abline(v=94,lty=3)
dev.off()

# generate plots for weight evolution through time
jpeg('weights.jpeg',height= 600, width= 600)
par(mfrow=c(2,1))
matplot(cbind(1-weight_stored,hmp[1:T]),type='s',xlab='time', main='Auxiliary Weights for State 2',ylab='Weight and Observed State')
matplot(cbind(1-weight_D,hmp[1:T]),type='s',main='Total Weight for State 2',ylab='Weight and Observed State')
dev.off()

# Generate plots for states evolution through time
jpeg('states.jpeg',height=600,width=600)
par(mfrow=c(2,1))
matplot(cbind(s_stored,hmp[1:T]),type='s',main='Mean of States',ylab='State',xlab='time')
matplot(cbind(s_stored2,hmp[1:T]),type='s',main='Mode of States',ylab='State',xlab='time')
dev.off()

# plot marginal likelihood densities through time 
jpeg('densities.jpeg',height=400,width=800)
matplot(cbind(dnorm1,dnorm2),type='l',main='Likelihoods',ylab='Density',xlab='time')
dev.off()
