
# 1. DPMM Implementation matching 
dpm <- function(y, prior, mcmc, standardise = FALSE) {
  #helper function multinom() for efficient multinomial sampling
  multinom <- function(probs) {
    probs <- t(apply(probs, 1, cumsum))
    res <- rowSums(probs - runif(nrow(probs)) < 0) + 1
    return(res)
  }
  
  n <- length(y)
  yt <- y
  if(standardise) {
    yt <- (y-mean(y))/sd(y)
  }
  #Prior Specification
  m0 <- prior$m0       # Prior mean for cluster means
  S0 <- prior$S0       # Prior variance for cluster means
  a <- prior$a         # Gamma shape for precision
  b <- prior$b         # Gamma rate for precision
  alpha <- prior$alpha # DP concentration parameter
  L <- prior$L         # Maximum number of clusters
  #MCMC Setup
  nburn <- mcmc$nburn  # Burn-in iterations
  nsave <- mcmc$nsave  # Saved iterations
  nskip <- mcmc$nskip  # Thinning interval
  nsim <- nsave*nskip + nburn  # Total iterations
  #Initialization
  p <- ns <- rep(0, L) # Cluster weights and counts
  v <- rep(1/L, L)     # Stick-breaking weights
  v[L] <- 1            # Last weight is 1 
  
  prop <- matrix(NA, nrow = n, ncol = L)
  z <- matrix(NA, nrow = nsim, ncol = n) # Cluster assignments
  z_tmp <- vector(length = n)
  
  z[1,] <- rep(1, n)   # Start with all in 1st cluster
  P <- Mu <- Sigma2 <- matrix(NA, nrow = nsim, ncol = L)
  
  Mu[1,] <- rep(mean(yt), L)     # Initial cluster means
  Sigma2[1,] <- rep(var(yt), L)  # Initial cluster variances
  #MCMC Main Loop
  for(i in 2:nsim) {
    Sigma2_tmp <- Sigma2[i-1,]
    #Stick-Breaking Weights Update
    cumv <- cumprod(1-v)        # Cumulative products for stick-breaking
    p[1] <- v[1]                # First weight
    p[2:L] <- v[2:L]*cumv[1:(L-1)] # Remaining weights
    #Cluster Assignment Probabilities
    for(l in 1:L) {
      prop[,l] <- p[l]*dnorm(yt, mean = Mu[i-1,l], sd = sqrt(Sigma2_tmp[l]))
    }
    prob <- prop/rowSums(prop) # Normalize probabilities
    #Cluster Assignment 
    z_tmp <- multinom(prob)     # Sample cluster assignments
    ns <- sapply(1:L, function(x) sum(z_tmp == x)) # Cluster sizes
    yt_z_l <- sapply(1:L, function(x) sum(yt[z_tmp == x])) # Cluster sums
    #Stick-Breaking Variables Update
    v[1:(L-1)] <- rbeta(L-1, 1 + ns[1:(L-1)], alpha + rev(cumsum(rev(ns[-1]))))
    #Cluster Means Update
    varmu <- 1/((1/S0) + (ns/Sigma2_tmp))
    meanmu <- ((yt_z_l/Sigma2_tmp) + (m0/S0))/((1/S0) + (ns/Sigma2_tmp))
    Mu_tmp <- rnorm(L, mean = meanmu, sd = sqrt(varmu))
    #Cluster Variances Update
    yt_z_l_mu <- sapply(1:L, function(l) sum((yt[z_tmp == l] - Mu_tmp[l])^2))
    Sigma2_tmp <- 1/rgamma(L, a + ns/2, b + 0.5*yt_z_l_mu)
    #Store Current State
    P[i,] <- p         # Store weights
    z[i,] <- z_tmp     # Store assignments
    Mu[i,] <- Mu_tmp   # Store means
    Sigma2[i,] <- Sigma2_tmp # Store variances
  }
  #Post-Processing
  if(standardise) {
    Mu <- sd(y)*Mu + mean(y)      # Back-transform means
    Sigma2 <- var(y)*Sigma2       # Back-transform variances
  }
  
  keep <- seq(nburn+1, nsim, by = nskip) # Indices to keep
  list(
    P = P[keep,],     # Posterior weights
    Mu = Mu[keep,],   # Posterior means
    Sigma2 = Sigma2[keep,], # Posterior variances
    z = z[keep,]      # Posterior assignments
  )
}

# 2. ROC Calculation 
compute_roc_dpm <- function(fit_x, fit_y, p = seq(0, 1, length.out = 101)) {
  n_samples <- nrow(fit_x$Mu)  # Number of posterior samples
  L_x <- ncol(fit_x$Mu)       # Number of components in healthy population
  L_y <- ncol(fit_y$Mu)       # Number of components in diseased population
  roc <- matrix(NA, nrow = n_samples, ncol = length(p))  # Storage for ROC curves
  
  for(s in 1:n_samples) {
    #2. Four Cases of Mixture Combinations
    #Case 1: Both populations are single-component (L_x = 1 & L_y = 1)
    if(L_x == 1 & L_y == 1) {
      a <- (fit_x$Mu[s] - fit_y$Mu[s])/sqrt(fit_y$Sigma2[s])
      b <- sqrt(fit_x$Sigma2[s])/sqrt(fit_y$Sigma2[s])
      roc[s,] <- 1 - pnorm(a + b*qnorm(1-p))
    } 
    #Case 2: Healthy is single-component, diseased is mixture (L_x = 1 & L_y > 1)
    else if(L_x == 1 & L_y > 1) {
      aux1 <- norMix(mu = fit_y$Mu[s,], sigma = sqrt(fit_y$Sigma2[s,]), w = fit_y$P[s,])
      q0 <- qnorm(1-p, mean = fit_x$Mu[s], sd = sqrt(fit_x$Sigma2[s]))
      roc[s,] <- 1 - pnorMix(q0, aux1)
    }
    #Case 3: Healthy is mixture, diseased is single-component (L_x > 1 & L_y = 1)
    else if(L_x > 1 & L_y == 1) {
      aux0 <- norMix(mu = fit_x$Mu[s,], sigma = sqrt(fit_x$Sigma2[s,]), w = fit_x$P[s,])
      q0 <- qnorMix(1-p, aux0)
      roc[s,] <- 1 - pnorm(q0, mean = fit_y$Mu[s], sd = sqrt(fit_y$Sigma2[s]))
    }
    #Case 4: Both populations are mixtures (L_x > 1 & L_y > 1)
    else {
      aux0 <- norMix(mu = fit_x$Mu[s,], sigma = sqrt(fit_x$Sigma2[s,]), w = fit_x$P[s,])
      aux1 <- norMix(mu = fit_y$Mu[s,], sigma = sqrt(fit_y$Sigma2[s,]), w = fit_y$P[s,])
      q0 <- qnorMix(1-p, aux0)
      roc[s,] <- 1 - pnorMix(q0, aux1)
    }
  }
  roc
}

# 3. AUC Calculation 
compute_auc_dpm <- function(fit_x, fit_y) {
  n_samples <- nrow(fit_x$Mu)  # Number of posterior samples
  L_x <- ncol(fit_x$Mu)       # Number of healthy components
  L_y <- ncol(fit_y$Mu)       # Number of diseased components
  auc <- numeric(n_samples)   # Storage for AUC values
  
  for(s in 1:n_samples) {
    #Four Calculation Cases:
    #Case 1: Both Single Components (L_x=1, L_y=1)
    if(L_x == 1 & L_y == 1) {
      a <- (fit_x$Mu[s] - fit_y$Mu[s])/sqrt(fit_y$Sigma2[s])
      b <- sqrt(fit_x$Sigma2[s])/sqrt(fit_y$Sigma2[s])
      auc[s] <- 1 - pnorm(a/sqrt(1 + b^2))
    }
    #Case 2: Healthy Single, Diseased Mixture (L_x=1, L_y>1)
    else if(L_x == 1 & L_y > 1) {
      a <- outer(fit_y$Mu[s,], fit_x$Mu[s], "-")/sqrt(fit_y$Sigma2[s,])
      b <- outer(1/sqrt(fit_y$Sigma2[s,]), sqrt(fit_x$Sigma2[s]), "*")
      auc_aux <- pnorm(a/sqrt(1 + b^2))
      auc[s] <- sum(auc_aux * fit_y$P[s,])
    }
    #Case 3: Healthy Mixture, Diseased Single (L_x>1, L_y=1)
    else if(L_x > 1 & L_y == 1) {
      a <- outer(fit_y$Mu[s], fit_x$Mu[s,], "-")/sqrt(fit_y$Sigma2[s])
      b <- outer(1/sqrt(fit_y$Sigma2[s]), sqrt(fit_x$Sigma2[s,]), "*")
      auc_aux <- pnorm(a/sqrt(1 + b^2))
      auc[s] <- sum(auc_aux * fit_x$P[s,])
    }
    #Case 4: Both Mixtures (L_x>1, L_y>1)
    else {
      a <- outer(fit_y$Mu[s,], fit_x$Mu[s,], "-")/sqrt(fit_y$Sigma2[s,])
      b <- outer(1/sqrt(fit_y$Sigma2[s,]), sqrt(fit_x$Sigma2[s,]), "*")
      auc_aux <- pnorm(a/sqrt(1 + b^2))
      weights <- outer(fit_y$P[s,], fit_x$P[s,], "*")
      auc[s] <- sum(auc_aux * weights)
    }
  }
  auc
}

credibleble_interval_dpm <- function(AUC_k, alpha=0.05){
  lower_bound <- quantile(AUC_k, probs = alpha / 2)
  upper_bound <- quantile(AUC_k, probs = 1 - alpha / 2)
  return(c(lower_bound, upper_bound))
}

