# Cost and gradients for the constant
JOrdLogBiplotd <- function(par, C) {# Cost to estimate d
  I=dim(C)[1]
  L=dim(C)[2]
  A=matrix(1,nrow=I, ncol=1)
  H=sigmoide(A%*%t(par))
  J=sum(-1*C*log(H)-(1-C)*log(1-H), na.rm = TRUE)/2
  return(J)
}

grOrdLogBiplotd <- function(par, C) { ## Gradient to estimate B
  I=dim(C)[1]
  L=dim(C)[2]
  E=matrix(1,nrow=I, ncol=1)
  H=sigmoide(E%*%t(par))
  grad=t(H-C)%*%E
  return(grad)
}

# Coste general
JOrdLogBiplot <- function(C, d, A, B , lambda) {
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  cats=L/J
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H=sigmoide(H0+kronecker(H2,matrix(rep(1,cats), nrow=1)))
  J=sum(-1*C*log(H)-(1-C)*log(1-H), na.rm = TRUE)  + lambda*sum(B^2, na.rm = TRUE)+lambda*sum(A^2, na.rm = TRUE)
  return(J)
}

ExpectedOrdinalBiplot <- function(X, C, d, A, B) {
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  ncats=apply(X,2,max)
  
  
  
  d=matrix(d, ncol = 1)
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  P0=sigmoide(H0)
  H2=A %*% t(B)
  H1=NULL
  for (j in 1:J)
    H1=cbind(H1, kronecker(matrix(H2[,j]),matrix(rep(1,ncats[j]-1), nrow=1)))
  H=sigmoide(H0+H1)
  
  PredC=(H>=0.5)==C
  PercentC=100*apply(PredC, 2, sum)/I
  TotPercent=PercentC=100*sum(PredC)/(I*L)
  
  Z=matrix(0, L, J) 
  ncatscum=cumsum(ncats-1)
  Z[1:ncatscum[1], 1]=1
  for (j in 2:J){
    Z[(ncatscum[j-1]+1):ncatscum[j], j]=1
  }
  ACIER=PredC %*% Z
  
  PCCCumi=100*apply(ACIER, 2, sum)/(ncats*I)
  PCCCum=100*sum(ACIER)/(I*L)
  
  PEsp=NULL
  sumc=cumsum(ncats-1)
  Exp=matrix(0,I,J)
  HH=cbind(H[,1:sumc[1]], rep(1,I))
  HH2=matrix(0, nrow=I, ncol=ncats[1])
  HH2[,1]=HH[,1]
  
  for (k in 2:(ncats[1]))
    HH2[,k]=HH[,k]-HH[,(k-1)]
  
  PEsp=cbind(PEsp, HH2)
  Exp[,1]=apply(HH2,1,which.max)
  
  for (j in 2:J){
    HH=cbind(H[,(sumc[j-1]+1):sumc[j]], rep(1,I))
    HH2=matrix(0, nrow=I, ncol=ncats[1])
    HH2[,1]=HH[,1]
    
    for (k in 2:(ncats[j]))
      HH2[,k]=HH[,k]-HH[,(k-1)]
    PEsp=cbind(PEsp, HH2)
    Exp[,j]=apply(HH2,1,which.max)
  }
  
  PCC=100*sum(X==Exp)/(I*J)
  PCCi=100*apply(X==Exp, 2, sum)/I
  
  
  par = list()
  par$Expected=Exp
  par$Ncats=ncats
  par$PCCCum=PCCCum
  par$PCC=PCC
  
  
  Kappas=rep(0, J)
  for (j in 1:J){
    tabla=table(X[,j], Exp[,j])
    if (dim(tabla)[1]==dim(tabla)[2])
    Kappas[j]=Kappa(tabla)$Weighted[1]
    else
      Kappas[j]= NA
  }
  

  KappaGlobal=NA
  
  par$fit = array(0, c(J, 11))
  dimnames(par$fit)[[1]] = dimnames(X)[[2]]
  dimnames(par$fit)[[2]] = c("logLik", "Deviance", "df", "p-value", 
                             "PCC(Cum)", "CoxSnell", "Macfaden", "Nagelkerke", "NullDeviance", "PCC", "Kappa")
  par$coefficients = B
  
  # Z=kronecker(diag(J), matrix(rep(1,cats),ncol=1))
  d2=-2*apply(C*log(H)+(1-C)*log(1-H), 2, sum, na.rm = TRUE)%*%Z
  d1 = -2 * apply(C * log(P0) + (1 - C) * log(1 - P0),2,sum)%*%Z
  dev = d1 - d2

  par$fit[, 1] = d2
  par$fit[, 2] = dev
  par$fit[, 3] = rep(r,J)
  par$fit[, 4] = 1 - pchisq(dev, df = J)
  par$fit[, 5] = PCCCumi
  par$fit[, 6] = 1 - exp(-1 * dev/I)
  par$fit[, 7] = 1 - (d2/d1)
  par$fit[, 8] =  (1 - exp(-1 * dev/I))/(1 - exp((d1/(-2)))^(2/I))
  par$fit[, 9] = d1
  par$fit[, 10] = PCCi
  par$fit[, 11] = Kappas
  
  
  logLik=sum(d2)
  DevianceNull=sum(par$fit[,9])
  Deviance=sum(par$fit[,1])
  df=sum(par$fit[,3])
  Dif = DevianceNull - Deviance
  pval = 1 - pchisq(Dif, df = df)
  CoxSnell = 1 - exp(-1 * Dif/(I))
  Nagelkerke = CoxSnell/(1 - exp((DevianceNull/(-2)))^(2/(I)))
  MacFaden = 1 - (Deviance/DevianceNull)
  
  par$LogLikelihood = par$logLik
  
  par$AIC=-2*logLik + 2 * df
  par$BIC=-2*logLik + df * log(I*L)
  
  global=c(logLik, Dif, df, pval, PCCCum, NA, NA, NA, DevianceNull, PCC, KappaGlobal)
  
  par$fit=rbind(par$fit, global)
  return(par)
}




# Cost and gradients for the alternated algoritms
# Cost to estimate B
JOrdLogBiplotB <- function(par, C, d, A, B , lambda) {
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  cats=L/J
  B[, r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H=sigmoide(H0+kronecker(H2,matrix(rep(1,cats), nrow=1)))
  J=sum(-1*C*log(H)-(1-C)*log(1-H), na.rm = TRUE)  + lambda*sum(B^2, na.rm = TRUE)+lambda*sum(A^2, na.rm = TRUE)
  return(J)
}

# Cost and gradients for the alternated algoritms
# Cost to estimate A




JOrdLogBiplotA <- function(par, C, d, A, B , lambda) {
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  cats=L/J
  A[, r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H=sigmoide(H0+kronecker(H2,matrix(rep(1,cats), nrow=1)))
  J=sum(-1*C*log(H)-(1-C)*log(1-H), na.rm = TRUE)  + lambda*sum(B^2, na.rm = TRUE)+lambda*sum(A^2, na.rm = TRUE)
  return(J)
}


JOrdLogBiplotAdifcats <- function(par, C, d, A, B , ncats, lambda) {
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  A[, r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H1=NULL
  for (j in 1:J)
    H1=cbind(H1, kronecker(matrix(H2[,j]),matrix(rep(1,ncats[j]-1), nrow=1)))
  H=sigmoide(H0+H1)
  J=sum(-1*C*log(H)-(1-C)*log(1-H), na.rm = TRUE) +lambda*sum(A^2, na.rm = TRUE)
  return(J)
}


grOrdLogBiplotA <- function(par, C, d, A, B, lambda ) { ## Gradient to estimate A
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  cats=L/J
  A[,r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H=sigmoide(H0+kronecker(H2,matrix(rep(1,cats), nrow=1)))
  Z=kronecker(diag(J), matrix(rep(1,cats),ncol=1))
  R = (H-C)%*%Z 
  grad=R %*%  matrix(B[,r], ncol=1) + lambda*par
  return(grad)
}

grOrdLogBiplotAdifcats <- function(par, C, d, A, B, ncats, lambda ) { ## Gradient to estimate A
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  A[,r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H1=NULL
  for (j in 1:J)
    H1=cbind(H1, kronecker(matrix(H2[,j]),matrix(rep(1,ncats[j]-1), nrow=1)))
  H=sigmoide(H0+H1)
  Z=matrix(0,sum(ncats-1),J)
  sumc=cumsum(ncats-1)
  Z[1:sumc[1], 1]=1
  for (j in 2:J)
    Z[(sumc[j-1]+1):sumc[j], j]=1
  #Z=kronecker(diag(J), matrix(rep(1,ncats[j]),ncol=1))
  R = (H-C)%*%Z 
  grad=R %*%  matrix(B[,r], ncol=1) + lambda*par
  return(grad)
}




grOrdLogBiplotB <- function(par, C, d, A, B, lambda) { ## Gradient to estimate B
  I=dim(C)[1]
  L=dim(C)[2]
  J=dim(B)[1]
  r=dim(B)[2]
  cats=L/J
  B[, r]=par
  E=matrix(1,nrow=I, ncol=1)
  H0=E%*%t(d)
  H2=A %*% t(B)
  H=sigmoide(H0+kronecker(H2,matrix(rep(1,cats), nrow=1)))
  Z=kronecker(diag(J), matrix(rep(1,cats),ncol=1))
  R = (H-C)%*%Z 
  grad=t(R) %*%  matrix(A[,r], ncol=1) +lambda*par
  return(grad)
}

