# contains the functions: stat_raw, stat_paired, stat_mcr, stat_tcs, stat_ecs.

#'
#' Unmatched callback rates
#'
#'@description
#' Number and proportion of callbacks for all the candidates.
#'
#' @param x A \code{callback} object.
#' @param method estimation method, "cp" for Clopper-Pearson, "wilson" for Wilson
#' and "student" for Student (the default).
#' @param level A number, containing the level of the confidence intervals (0.95
#' by default).
#'
#' @return
#' A \code{callback_stat} object with 2 components: specif and props.
#' 
#' \code{specif}: A list containing 
#' \itemize{
#'   \item\bold{convention:} string "raw callback rates". 
#'   \item\bold{cid:} convention on 3 characters.
#'   \item\bold{method:} the estimation method, "cp" for Clopper-Pearson, 
#'   "wilson" for Wilson and "student" for "Student" (the default).
#'   \item\bold{level:} the level of the confidence intervals (0.95 by default).
#'   \item\bold{source:} the R stats function used to compute the statistics 
#'   (binom.test, prop.test or t.test).
#'   \item\bold{name:} the name of the statistic (Fisher, Pearson or Student).
#' }
#' \code{props}: a data frame containing the following variables.
#' \itemize{
#' \item\bold{tests: }number of tests
#' \item\bold{callback: }number of callbacks
#' \item\bold{inf_p_callback: }callback rate lower bound
#' \item\bold{p_callback: }callback rate
#' \item\bold{sup_p_callback: }callback rate upper bound
#' }
#'
#' @author Emmanuel Duguet
#'
#' @references
#' Clopper, C. J. & Pearson, E. S. (1934). The use of confidence or fiducial
#' limits illustrated in the case of the binomial. Biometrika, 26, 404–413.
#' doi:10.2307/2331986.
#'
#' Wilson, E.B. (1927). Probable inference, the law of succession, and
#' statistical inference. Journal of the American Statistical Association, 22,
#' 209–212. doi:10.2307/2276774.
#'
#' @examples
#' data(labour2)
#' x <- callback(data=labour2,cluster="offer",candid="hist",callback="callback")
#' str(stat_raw(x))
#'
#' @importFrom stats aggregate binom.test prop.test t.test
#'
#' @export

stat_raw <- function(x, method="student", level = 0.95) {
  method <- tolower(method)
  candid <- NULL
  if (level <= 0 | level >= 1) {
    level = 0.95
  }
  if (!(method %in% c("cp","wilson"))) method <- "student"
  
  specif <- data.frame(convention="raw callback rates",method=method,level=level,cid="raw")
  
  tests <- NULL
  alpha <- 1 - level
  m1 <- aggregate(data = x$fds, callback ~ candid, FUN = sum)
  m2 <- aggregate(data = x$fds, callback ~ candid, FUN = length)
  m <- merge(m1, m2, by = "candid")
  colnames(m)[2:3] <- c("callback", "tests")
  rownames(m) <- m$candid
  m <- m[, 2:3]
  m$p_callback <- m$callback / m$tests
  
  l_cand <- rownames(m)
  n_cand <- length(l_cand)
  s0 <- st <- NULL
  
  # Clopper-Pearson
  if (method=="cp"){
    specif$source <- "binom.test"
    specif$name <-"Fisher exact test"
    specif$confid <- "Clopper-Pearson"
  for (cand in l_cand) {
    s0 <- binom.test(m[cand, "callback"], m[cand, "tests"], conf.level = level)["conf.int"]
    st[[cand]] <- s0[[1]]
  }
  stats <- st[[1]]
  if (n_cand >= 2) {
    for (i in 2:n_cand) {
      stats <- rbind(stats, st[[i]])
    }
    stats <- as.data.frame(stats)
  } else {
    stats <- as.data.frame(t(stats))
  }
  rownames(stats) <- l_cand
  }# end of "cp"
  
  #Wilson
  if (method=="wilson"){
    specif$source <- "prop.test"
    specif$name <- "Pearson test"
    specif$confid <- "Wilson"
  for (cand in l_cand) {
    s0 <- prop.test(m[cand, "callback"], m[cand, "tests"], conf.level = level)["conf.int"]
    st[[cand]] <- s0[[1]]
  }
  stats <- st[[1]]
  if (n_cand >= 2) {
    for (i in 2:n_cand) {
      stats <- rbind(stats, st[[i]])
    }
    stats <- as.data.frame(stats)
  } else {
    stats <- as.data.frame(t(stats))
  }
  rownames(stats) <- l_cand
  }# end of "wilson"
  
  #Student
  if (method=="student"){
    specif$source <- "t.test"
    specif$name <-"Student test"
    specif$confid <- "Student"
  for (cand in l_cand) {
    s0 <- t.test(subset(x$fds, candid == cand)[["callback"]], conf.level = level)["conf.int"]
    st[[cand]] <- s0[[1]]
  }
  stats <- st[[1]]
  if (n_cand >= 2) {
    for (i in 2:n_cand) {
      stats <- rbind(stats, st[[i]])
    }
    stats <- as.data.frame(stats)
  } else {
    stats <- as.data.frame(t(stats))
  }
  rownames(stats) <- l_cand
  }#end of "student"
  
  props <- data.frame(tests=m$tests,
                      callback=m$callback,
                      inf_p_callback=pmax(0,stats[,1]),
                      p_callback=m$p_callback,
                      sup_p_callback=pmin(1,stats[,2]),
                      row.names = l_cand)
  z <- list(
    specif=specif,
    props=props
  )
  class(z) <- "callback_stat"
  return(z)
}

#'
#' Callback counts on paired data
#'
#'@description
#' Computes the callback count statistics from the paired data sets.
#'
#' @param x A \code{callback} object.
#'
#' @return
#' A list with class \code{"stat_paired"} containing two data frames: counts and
#' props.
#'
#' \code{counts}: a data frame with the callback counts.
#' \itemize{
#' \item\bold{tests:} number of tests.
#' \item\bold{callback:} number of tests with at least one callback for either
#' candidate.
#' \item\bold{callback1:} number of callbacks for candidate 1.
#' \item\bold{callback2:} number of callbacks for candidate 2.
#' \item\bold{calldif:} difference in callback numbers.
#' \item\bold{disc:} number of discrimination cases.
#' \item\bold{c00:} number of test without a callback.
#' \item\bold{c10:} number of tests with callbacks for candidate 1 only.
#' \item\bold{c01:} number of tests with callbacks for candidate 2 only.
#' \item\bold{c11:} number of tests with callbacks for both candidates.
#'  }
#'
#' \code{props}: a data frame with the following variables.
#' \itemize{
#' \item\bold{p_callback:} callback/tests.
#' \item\bold{p_cand1:} callback1/tests.
#' \item\bold{p_cand2:} callback2/tests.
#' \item\bold{p_c00:} c00/tests.
#' \item\bold{p_c10:} c10/tests.
#' \item\bold{p_c01:} c01/tests.
#' \item\bold{p_c11:} c11/tests.
#' \item\bold{p_cand_dif:} calldif/tests.
#' }
#'
#' @author Emmanuel Duguet
#'
#' @examples
#' data(labour2)
#' x <- callback(data=labour2,cluster="offer",candid="hist",callback="callback")
#' stat_paired(x)
#'
#'@export

stat_paired <- function(x) {
  c00 <- c10 <- c01 <- c11 <- NULL
  z <- t(as.data.frame(lapply(x$pfds, stat_colsums)))
  z <-
    z[, c("callback1",
          "callback2",
          "c00",
          "c10",
          "c01",
          "c11",
          "callback",
          "calldif")]
  rownames(z) <- names(x$pfds)
  z <- transform(z, tests = c00 + c10 + c01 + c11, disc=c10+c01)
  counts <-
    z[, c("tests",
          "callback",
          "disc",
          "callback1",
          "callback2",
          "c00",
          "c10",
          "c01",
          "c11",
          "calldif")]
  rownames(counts) <- names(x$pfds)
  
  props <- with(
    z,
    data.frame(
      p_callback = callback / tests,
      p_cand1 = callback1 / tests,
      p_cand2 = callback2 / tests,
      p_c00 = c00 / tests,
      p_c10 = c10 / tests,
      p_c01 = c01 / tests,
      p_c11 = c11 / tests,
      p_cand_dif = calldif / tests,
      #row.names = rownames(z)
      row.names=names(x$pfds)
    )
  )
  
  z <- list(counts = counts, props = props)
  
  class(z) <- "stat_paired"
  return (z)
}

#'
#' Matched callback rates
#'
#' @description
#' Computes the matched callback rates, their confidence intervals and performs
#' the equality tests between the candidates.
#'
#' @param x a \code{callback} object.
#' @param level the level of the confidence intervals (0.95 by default).
#' @param method estimation method, "cp" for Clopper-Pearson, "wilson" for Wilson
#' and "student" for Student (the default).
#'
#' @return
#' 
#' A list with class \code{"callback_stat"} containing 4 components: specif, 
#' counts, props and stats 
#'
#' \code{specif}: A list containing 
#' \itemize{
#'   \item\bold{convention:} string "matched callback rates".
#'   \item\bold{cid:} convention on 3 characters.
#'   \item\bold{method:} the estimation method, "cp" for Clopper-Pearson, 
#'   "wilson" for Wilson and "student" for "Student" (the default).
#'   \item\bold{level:} the level of the confidence intervals (0.95 by default).
#'   \item\bold{source:} the R stats function used to compute the statistics 
#'   (binom.test, prop.test or t.test).
#'   \item\bold{name:} the name of the statistic (Fisher, Pearson or Student).
#' }
#' 
#' \code{counts}: a data frame with the callback counts.
#' \itemize{
#' \item\bold{tests:} number of tests.
#' \item\bold{callback:} number of tests with at least one callback for either
#' candidate.
#' \item\bold{disc:} number of discrimination cases.
#' \item\bold{c00:} number of test without a callback.
#' \item\bold{c10:} number of tests with callbacks for candidate 1 only.
#' \item\bold{c01:} number of tests with callbacks for candidate 2 only.
#' \item\bold{c11:} number of tests with callbacks for both candidates.
#'  }
#'
#' \code{props}: A data frame containing the following proportions and their 
#' confidence intervals (when relevant)
#'  \itemize{
#'   \item\bold{inf_p_callback:} overall callback rate, lower bound.
#'   \item\bold{p_callback:} overall callback rate.
#'   \item\bold{sup_p_callback:} overall callback rate, upper bound.
#'   \item\bold{inf_p_cand1:} 1st candidate callback rate, lower bound.
#'   \item\bold{p_cand1:} 1st candidate callback rate.
#'   \item\bold{sup_p_cand1:} 1st candidate callback rate, upper bound.
#'   \item\bold{inf_p_cand2:} 2nd candidate callback rate, lower bound.
#'   \item\bold{p_cand2:} 2nd candidate callback rate.
#'   \item\bold{sup_p_cand2:} 2nd candidate callback rate, upper bound.
#'   \item\bold{inf_cand_dif:} p_cand1-p_cand2, lower bound.
#'   \item\bold{p_cand_dif:} callback proportion difference between the candidates.
#'   \item\bold{sup_cand_dif:} p_cand1-p_cand2, upper bound.
#'  }
#'
#' \code{stats}: a data frame containing the statistics for testing the equality 
#' of proportions.
#'  \itemize{
#'   \item\bold{statistic:} the value of the test statistic.
#'   \item\bold{p_stat:} the p-value of the test statistic.
#'   \item\bold{c_stat:} the significance code of the test statistic.
#'    }
#'
#' @author Emmanuel Duguet
#'
#' @references
#' Clopper, C. J. & Pearson, E. S. (1934). The use of confidence or fiducial
#' limits illustrated in the case of the binomial. Biometrika, 26, 404–413.
#' doi:10.2307/2331986.
#'
#' Student. (1908). The Probable Error of a Mean. Biometrika, 6(1), 1–25. 
#' doi:10.2307/2331554.
#'
#' Wilson, E.B. (1927). Probable inference, the law of succession, and
#' statistical inference. Journal of the American Statistical Association, 22,
#' 209–212. doi:10.2307/2276774.
#'
#' @examples
#' data(labour1)
#' x <- callback(data=labour1,cluster="offer",candid="hist",callback="callback")
#' str(stat_mcr(x))
#'
#' @importFrom stats binom.test prop.test t.test fisher.test
#'
#' @export

stat_mcr <- function(x, method="student", level = 0.95) {
  method <- tolower(method)
  inf_p_callback <- inf_p_cand1 <- inf_p_cand2 <- NULL
  sup_p_callback <- sup_p_cand1 <- sup_p_cand2 <- NULL
  p_cand_dif <- statistic <- p_stat <- n_stat <- c_stat <- NULL
  # level check
  if (level <= 0 | level >= 1) {
    level = 0.95
  }
  alpha <- 1 - level
  # common statistics
  counts <- stat_paired(x)$counts
  out_prop <- with(
    counts,
    data.frame(
      tests = tests,
      p_callback = callback / tests,
      p_cand1 = callback1 / tests,
      p_cand2 = callback2 / tests,
      p_cand_dif = (callback1 - callback2) / tests,
      row.names = rownames(counts)
    )
  )
  #method check
  if (!(method %in% c("cp","wilson"))) method <- "student"
  
  specif <- data.frame(convention="matched callback rates",method=method,level=level,cid="mcr")
  
  # Clopper-Pearson/Fisher exact test
  if (method == "cp"){
    specif$source <- "binom.test"
    specif$name <-"Fisher exact test"
    specif$confid <- "Clopper-Pearson"
    l_comp <- rownames(counts)
    n_comp <- length(l_comp)
    ct <- c0 <- c1 <- c2 <- c3 <- list()
    for (comp in l_comp) {
      c0 <- binom.test(counts[comp, "callback"], counts[comp, "tests"], conf.level =
                         level)["conf.int"]
      c1 <- binom.test(counts[comp, "callback1"], counts[comp, "tests"], conf.level =
                         level)["conf.int"]
      c2 <- binom.test(counts[comp, "callback2"], counts[comp, "tests"], conf.level =
                         level)["conf.int"]
      #contingency table in matrix form
      m <- matrix(
        c(counts[comp, "tests"] - counts[comp, "callback1"], counts[comp, "callback1"], counts[comp, "tests"] -
            counts[comp, "callback2"], counts[comp, "callback2"]),
        nrow = 2,
        ncol = 2,
        byrow = TRUE
      )
      c3 <- fisher.test(m)["p.value"]
      
      ct[[comp]] <- c(c0[[1]], c1[[1]], c2[[1]], c3[[1]])
    }
    
    cp <- ct[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        cp <- rbind(cp, ct[[i]])
      }
      cp <- as.data.frame(cp)
    } else {
      cp <- as.data.frame(t(cp))
    }
    rownames(cp) <- l_comp
    colnames(cp) <- c(
      "inf_p_callback",
      "sup_p_callback",
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "statistic"
    )
    
    props <- data.frame(tests=out_prop$tests,
                        inf_p_callback=cp$inf_p_callback,
                        p_callback=out_prop$p_callback,
                        sup_p_callback=cp$sup_p_callback,
                        inf_p_cand1=cp$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=cp$sup_p_cand1,
                        inf_p_cand2=cp$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=cp$sup_p_cand2,
                        p_cand_dif=out_prop$p_cand_dif
    )
    rownames(props) <- l_comp
    stats <- data.frame(statistic=cp$statistic,
                        p_stat=cp$statistic,
                        c_stat=stat_signif(cp$statistic)
    )
    rownames(stats) <- l_comp
  }#end of method="cp" 
  
  # Student
  if (method == "student"){
    specif$source <- "t.test"
    specif$name <-"Student test"
    specif$confid <- "Student"
    pfds <- x$pfds
    l_comp <- names(pfds)
    n_comp <- length(l_comp)
    st <- s0 <- s1 <- s2 <- s3 <- list()
    for (comp in l_comp) {
      s0 <- (t.test(pfds[[comp]][, "callback"], conf.level = level)["conf.int"])
      s1 <- (t.test(pfds[[comp]][, "callback1"], conf.level = level)["conf.int"])
      s2 <- (t.test(pfds[[comp]][, "callback2"], conf.level = level)["conf.int"])
      s3 <- (t.test(pfds[[comp]][, "calldif"], conf.level = level)["conf.int"])
      s4 <- (t.test(pfds[[comp]][, "calldif"], conf.level = level)["statistic"])
      s5 <- (t.test(pfds[[comp]][, "calldif"], conf.level = level)["p.value"])
      st[[comp]] <- c(s0[[1]], s1[[1]], s2[[1]], s3[[1]], s4[[1]], s5[[1]])
    }
    student <- st[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        student <- rbind(student, st[[i]])
      }
      student <- as.data.frame(student)
    } else {
      student <- as.data.frame(t(student))
    }
    rownames(student) <- l_comp
    colnames(student) <- c(
      "inf_p_callback",
      "sup_p_callback",
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "inf_cand_dif",
      "sup_cand_dif",
      "statistic",
      "p_stat"
    )
    student <- transform(
      student,
      inf_p_callback = pmax(0, inf_p_callback),
      inf_p_cand1 = pmax(0, inf_p_cand1),
      inf_p_cand2 = pmax(0, inf_p_cand2),
      sup_p_callback = pmin(1, sup_p_callback),
      sup_p_cand1 = pmin(1, sup_p_cand1),
      sup_p_cand2 = pmin(1, sup_p_cand2)
    )
    
    props <- data.frame(tests=out_prop$tests,
                        inf_p_callback=student$inf_p_callback,
                        p_callback=out_prop$p_callback,
                        sup_p_callback=student$sup_p_callback,
                        inf_p_cand1=student$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=student$sup_p_cand1,
                        inf_p_cand2=student$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=student$sup_p_cand2,
                        inf_cand_dif=student$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=student$sup_cand_dif
    ) 
    rownames(props) <- l_comp
    
    stats <- data.frame(statistic=student$statistic,
                        p_stat=student$p_stat,
                        c_stat=stat_signif(student$p_stat)
    )
    rownames(stats) <- l_comp
  }# end of method="student" 
  
  # Wilson/Pearson
  if (method == "wilson"){
    specif$source <- "prop.test"
    specif$name <-"Pearson test"
    specif$confid <- "Wilson"
    l_comp <- rownames(counts)
    n_comp <- length(l_comp)
    wt <- w0 <- w1 <- w2 <- w3 <- list()
    for (comp in l_comp) {
      w0 <- prop.test(counts[comp, "callback"], counts[comp, "tests"], conf.level =
                        level)["conf.int"]
      w1 <- prop.test(counts[comp, "callback1"], counts[comp, "tests"], conf.level =
                        level)["conf.int"]
      w2 <- prop.test(counts[comp, "callback2"], counts[comp, "tests"], conf.level =
                        level)["conf.int"]
      w3 <- prop.test(c(counts[comp, "callback1"], counts[comp, "callback2"]), c(counts[comp, "tests"], counts[comp, "tests"]))["p.value"]
      w4 <- prop.test(c(counts[comp, "callback1"], counts[comp, "callback2"]), c(counts[comp, "tests"], counts[comp, "tests"]))["statistic"]
      w5 <- prop.test(c(counts[comp, "callback1"], counts[comp, "callback2"]), c(counts[comp, "tests"], counts[comp, "tests"]))["conf.int"]
      
      wt[[comp]] <- c(w0[[1]], w1[[1]], w2[[1]], w3[[1]], w4[[1]], w5[[1]])
    }
    
    wilson <- wt[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        wilson <- rbind(wilson, wt[[i]])
      }
      wilson <- as.data.frame(wilson)
    } else {
      wilson <- as.data.frame(t(wilson))
    }
    rownames(wilson) <- l_comp
    colnames(wilson) <- c(
      "inf_p_callback",
      "sup_p_callback",
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "p_stat",
      "statistic",
      "inf_cand_dif",
      "sup_cand_dif"
    )
    
    props <- data.frame(tests=out_prop$tests,
                        inf_p_callback=wilson$inf_p_callback,
                        p_callback=out_prop$p_callback,
                        sup_p_callback=wilson$sup_p_callback,
                        inf_p_cand1=wilson$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=wilson$sup_p_cand1,
                        inf_p_cand2=wilson$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=wilson$sup_p_cand2,
                        inf_cand_dif=wilson$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=wilson$sup_cand_dif
    ) 
    rownames(props) <- l_comp
    
    stats <- data.frame(statistic=wilson$statistic,
                        p_stat=wilson$p_stat,
                        c_stat=stat_signif(wilson$p_stat)
    )
    rownames(stats) <- l_comp
  }# end of method="wilson"
  
  #output
  
  z <- list(
    specif = specif,
    props = props,
    stats = stats,
    counts = counts[,c("tests","callback","disc","c00","c10","c01","c11")]
  )
  class(z) <- "callback_stat"
  return (z)
}

#'
#' Total callback shares
#'
#' @description
#' Computes the callback shares and their confidence intervals. The analysis is
#' restricted to the tests with at least one callback. It is the definition
#' used in Riach and Rich (2006).
#'
#' @param x a \code{callback} object.
#' @param level the level of the confidence intervals (0.95 by default).
#' @param method estimation method, "cp" for Clopper-Pearson, "wilson" for Wilson
#' and "student" for Student (the default).
#' @return
#' 
#' A list with class \code{"callback_stat"} containing 4 components: specif, 
#' counts, props and stats 
#'
#' \code{specif}: A list containing 
#' \itemize{
#'   \item\bold{convention:} string "total callback shares".
#'   \item\bold{cid:} convention on 3 characters.
#'   \item\bold{method:} the estimation method, "cp" for Clopper-Pearson, 
#'   "wilson" for Wilson and "student" for "Student" (the default).
#'   \item\bold{level:} the level of the confidence intervals (0.95 by default).
#'   \item\bold{source:} the R stats function used to compute the statistics 
#'   (binom.test, prop.test or t.test).
#'   \item\bold{name:} the name of the statistic (Fisher, Pearson or Student).
#' }
#' 
#' \code{counts}: a data frame with the callback counts.
#' \itemize{
#' \item\bold{tests:} number of tests.
#' \item\bold{callback:} number of tests with at least one callback for either
#' candidate.
#' \item\bold{disc:} number of discrimination cases.
#' \item\bold{c00:} number of test without a callback.
#' \item\bold{c10:} number of tests with callbacks for candidate 1 only.
#' \item\bold{c01:} number of tests with callbacks for candidate 2 only.
#' \item\bold{c11:} number of tests with callbacks for both candidates.
#'  }
#'
#' \code{props}: A data frame containing the following proportions and their 
#' confidence intervals (when relevant)
#'  \itemize{
#'   \item\bold{inf_p_cand1:} 1st candidate total callback share, lower bound.
#'   \item\bold{p_cand1:} 1st candidate total callback share.
#'   \item\bold{sup_p_cand1:} 1st candidate total callback share, upper bound.
#'   \item\bold{inf_p_cand2:} 2nd candidate total callback share, lower bound.
#'   \item\bold{p_cand2:} 2nd candidate total callback share.
#'   \item\bold{sup_p_cand2:} 2nd candidate total callback share, upper bound.
#'   \item\bold{inf_p_equal:} equal treatment total callback share, lower bound.
#'   \item\bold{p_equal:} equal treatment total callback share.
#'   \item\bold{sup_p_equal:} equal treatment total callback share, upper bound.
#'   \item\bold{inf_cand_dif:} p_cand1-p_cand2, lower bound.
#'   \item\bold{p_cand_dif:} total callback share difference between the candidates.
#'   \item\bold{sup_cand_dif:} p_cand1-p_cand2, upper bound.
#'  }
#'
#' \code{stats}: a data frame containing the statistics for testing the equality 
#' of proportions.
#'  \itemize{
#'   \item\bold{statistic:} the value of the test statistic.
#'   \item\bold{p_stat:} the p-value of the test statistic.
#'   \item\bold{c_stat:} the significance code of the test statistic.
#'    }
#'
#' @author Emmanuel Duguet
#'
#' @references
#' Clopper, C. J. & Pearson, E. S. (1934). The use of confidence or fiducial
#' limits illustrated in the case of the binomial. Biometrika, 26, 404–413.
#' doi:10.2307/2331986.
#'
#' Riach, P. A., & Rich, J. (2006). An experimental investigation of sexual
#' discrimination in hiring in the English labor market. The BE Journal of
#' Economic Analysis & Policy, 6(2),
#'
#' Student. (1908). The Probable Error of a Mean. Biometrika, 6(1), 1–25. 
#' doi:10.2307/2331554.
#' 
#' Wilson, E.B. (1927). Probable inference, the law of succession, and
#' statistical inference. Journal of the American Statistical Association, 22,
#' 209–212. doi:10.2307/2276774.
#'
#' @examples
#' data(labour1)
#' x <- callback(data=labour1,cluster="offer",candid="hist",callback="callback")
#' str(stat_tcs(x))
#'
#' @importFrom stats binom.test prop.test fisher.test
#'
#' @export

stat_tcs <- function(x, method="student", level = 0.95) {
  method <- tolower(method)
  c10 <- c11 <- c01 <- c00 <- callback <- disc <- NULL  
  inf_p_callback <- p_callback <- sup_p_callback <- NULL
  inf_p_cand1 <- p_cand1 <- sup_p_cand1 <- NULL
  inf_p_cand2 <- p_cand2 <- sup_p_cand2 <- NULL
  inf_p_equal <- p_equal <- sup_p_equal <- NULL
  p_cand_dif <- statistic <- p_stat <- n_stat <- c_stat <- NULL
  # level check
  if (level <= 0 | level >= 1) {
    level = 0.95
  }
  alpha <- 1 - level
  # common statistics
  counts <- stat_paired(x)$counts
  
  out_prop <- with(
    counts,
    data.frame(
      callback=callback,
      c10 = c10,
      c01 = c01,
      c11 = c11,
      row.names = rownames(counts)
    )
  )
  
  out_prop <- transform(
    out_prop,
    p_cand1 = c10 / pmax(callback, 1),
    p_cand2 = c01 / pmax(callback, 1),
    p_equal = c11 / pmax(callback, 1)
  )
  
  out_prop <- transform(out_prop, 
                        p_cand_dif = ifelse(callback == 0, 0, p_cand1 - p_cand2))
  
  #method check
  if (!(method %in% c("cp","wilson"))) method <- "student"
  
  specif <- data.frame(convention="total callback shares",method=method,level=level,cid="tcs")
  
  # Clopper-Pearson/Fisher exact test
  if (method == "cp"){
    specif$source <- "binom.test"
    specif$name <-"Fisher exact test"
    specif$confid <- "Clopper-Pearson"
    l_comp <- rownames(out_prop)
    n_comp <- length(l_comp)
    ct <- c1 <- c2 <- c3 <- c4 <- list()
    for (comp in l_comp) {
      z <- subset(out_prop, callback > 0)
      c1 <- binom.test(z[comp, "c10"], z[comp, "callback"], conf.level =
                         level)["conf.int"]
      c2 <- binom.test(z[comp, "c01"], z[comp, "callback"], conf.level =
                         level)["conf.int"]
      c3 <- binom.test(z[comp, "c11"], z[comp, "callback"], conf.level =
                         level)["conf.int"]
      #contingency table in matrix form
      m <- matrix(
        c(z[comp, "callback"] - z[comp, "c10"], z[comp, "c10"], z[comp, "callback"] - z[comp, "c01"], z[comp, "c01"]),
        nrow = 2,
        ncol = 2,
        byrow = TRUE
      )
      c4 <- fisher.test(m)["p.value"]
      
      ct[[comp]] <- c(c1[[1]], c2[[1]], c3[[1]], c4[[1]])
    }
    
    cp <- ct[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        cp <- rbind(cp, ct[[i]])
      }
      cp <- as.data.frame(cp)
    } else {
      cp <- as.data.frame(t(cp))
    }
    rownames(cp) <- l_comp
    colnames(cp) <- c(
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "inf_p_equal",
      "sup_p_equal",
      "statistic"
    )
    props <- data.frame(callback=out_prop$callback,
                        inf_p_cand1=cp$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=cp$sup_p_cand1,
                        inf_p_cand2=cp$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=cp$sup_p_cand2,
                        inf_p_equal=cp$inf_p_equal,
                        p_equal=out_prop$p_equal,
                        sup_p_equal=cp$sup_p_equal,
                        p_cand_dif=out_prop$p_cand_dif
    )
    
    rownames(props) <- l_comp
    stats <- data.frame(statistic=cp$statistic,
                        p_stat=cp$statistic,
                        c_stat=stat_signif(cp$statistic)
    )
    rownames(stats) <- l_comp
  }#end of method="cp"
  
  # Student
  if (method == "student"){
    specif$source <- "t.test"
    specif$name <-"Student test"
    specif$confid <- "Student"
    pfds <- x$pfds
    l_comp <- names(pfds)
    n_comp <- length(l_comp)
    st <- s1 <- s2 <- s3 <- s4 <- list()
    for (comp in l_comp) {
      z <- subset(pfds[[comp]], c11 + c10 + c01 > 0)
      s1 <- t.test(z[, "c10"], conf.level = level)["conf.int"]
      s2 <- t.test(z[, "c01"], conf.level = level)["conf.int"]
      s3 <- t.test(z[, "c11"], conf.level = level)["conf.int"]
      s4 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["p.value"]
      s5 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["statistic"]
      s6 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["conf.int"]
      st[[comp]] <- c(s1[[1]], s2[[1]], s3[[1]], s4[[1]], s5[[1]], s6[[1]])
    }
    student <- st[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        student <- rbind(student, st[[i]])
      }
    }
    rownames(student) <- l_comp
    colnames(student) <- c(
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "inf_p_equal",
      "sup_p_equal",
      "p_stat",
      "statistic",
      "inf_cand_dif",
      "sup_cand_dif"
    )
    student <- transform(
      student,
      inf_p_cand1 = pmax(0, inf_p_cand1),
      inf_p_cand2 = pmax(0, inf_p_cand2),
      inf_p_equal = pmax(0, inf_p_equal),
      sup_p_cand1 = pmin(1, sup_p_cand1),
      sup_p_cand2 = pmin(1, sup_p_cand2),
      sup_p_equal = pmin(1, sup_p_equal)
    )
    props <- data.frame(callback=out_prop$callback,
                        inf_p_cand1=student$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=student$sup_p_cand1,
                        inf_p_cand2=student$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=student$sup_p_cand2,
                        inf_p_equal=student$inf_p_equal,
                        p_equal=out_prop$p_equal,
                        sup_p_equal=student$sup_p_equal,
                        inf_cand_dif=student$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=student$sup_cand_dif
    )
    rownames(props) <- l_comp
    
    stats <- data.frame(statistic=student$statistic,
                        p_stat=student$p_stat,
                        c_stat=stat_signif(student$p_stat)
    )
    rownames(stats) <- l_comp
  }
  
  # Wilson
  if (method == "wilson"){
    specif$source <- "prop.test"
    specif$name <-"Pearson test"
    specif$confid <- "Wilson"
    l_comp <- rownames(out_prop)
    n_comp <- length(l_comp)
    wt <- w1 <- w2 <- w3 <- w4 <- w5 <- list()
    for (comp in l_comp) {
      z <- subset(out_prop, callback > 0)
      w1 <- prop.test(z[comp, "c10"], z[comp, "callback"], conf.level =
                        level)["conf.int"]
      w2 <- prop.test(z[comp, "c01"], z[comp, "callback"], conf.level =
                        level)["conf.int"]
      w3 <- prop.test(z[comp, "c11"], z[comp, "callback"], conf.level =
                        level)["conf.int"]
      w4 <- prop.test(c(z[comp, "c10"], z[comp, "c01"], z[comp, "c11"]), c(z[comp, "callback"], z[comp, "callback"], z[comp, "callback"]))["p.value"]
      w5 <-
        prop.test(c(z[comp, "c10"], z[comp, "c01"], z[comp, "c11"]), c(z[comp, "callback"], z[comp, "callback"], z[comp, "callback"]))["statistic"]
      w6 <-
        prop.test(c(z[comp, "c10"], z[comp, "c01"]), c(z[comp, "callback"], z[comp, "callback"]))["conf.int"]
      
      wt[[comp]] <- c(w1[[1]], w2[[1]], w3[[1]], w4[[1]], w5[[1]], w6[[1]])
    }
    
    wilson <- wt[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        wilson <- rbind(wilson, wt[[i]])
      }
      wilson <- as.data.frame(wilson)
    } else {
      wilson <- as.data.frame(t(wilson))
    }
    rownames(wilson) <- l_comp
    colnames(wilson) <- c(
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "inf_p_equal",
      "sup_p_equal",
      "p_stat",
      "statistic",
      "inf_cand_dif",
      "sup_cand_dif"
    )
    
    props <- data.frame(callback=out_prop$callback,
                        inf_p_cand1=wilson$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=wilson$sup_p_cand1,
                        inf_p_cand2=wilson$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=wilson$sup_p_cand2,
                        inf_p_equal=wilson$inf_p_equal,
                        p_equal=out_prop$p_equal,
                        sup_p_equal=wilson$sup_p_equal,
                        inf_cand_dif=wilson$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=wilson$sup_cand_dif
    )
    rownames(props) <- l_comp
    
    stats <- data.frame(statistic=wilson$statistic,
                        p_stat=wilson$p_stat,
                        c_stat=stat_signif(wilson$p_stat)
    )
    rownames(stats) <- l_comp
  }
  #output
  z <- list(
    specif = specif,
    props = props,
    stats = stats,
    counts = counts[,c("tests","callback","disc","c00","c10","c01","c11")]
  )
  class(z) <- "callback_stat"
  return (z)
}

#'
#' Exclusive callback shares
#'
#' @description
#' Computes the exclusive callback shares and their confidence intervals. The
#' analysis is restricted to the tests with discrimination cases.
#'
#' @param x a \code{callback} object.
#' @param level the level of the confidence intervals (0.95 by default).
#' @param method estimation method, "cp" for Clopper-Pearson, "wilson" for Wilson
#' and "student" for Student (the default).
#'
#' @return
#' 
#' A list with class \code{"callback_stat"} containing 4 components: specif, 
#' counts, props and stats 
#'
#' \code{specif}: A list containing 
#' \itemize{
#'   \item\bold{convention:} string "exclusive callback shares". 
#'   \item\bold{cid:} convention on 3 characters.
#'   \item\bold{method:} the estimation method, "cp" for Clopper-Pearson, 
#'   "wilson" for Wilson and "student" for "Student" (the default).
#'   \item\bold{level:} the level of the confidence intervals (0.95 by default).
#'   \item\bold{source:} the R stats function used to compute the statistics 
#'   (binom.test, prop.test or t.test).
#'   \item\bold{name:} the name of the statistic (Fisher, Pearson or Student).
#' }
#' 
#' \code{counts}: a data frame with the callback counts.
#' \itemize{
#' \item\bold{tests:} number of tests.
#' \item\bold{callback:} number of tests with at least one callback for either
#' candidate.
#' \item\bold{disc:} number of discrimination cases.
#' \item\bold{c00:} number of test without a callback.
#' \item\bold{c10:} number of tests with callbacks for candidate 1 only.
#' \item\bold{c01:} number of tests with callbacks for candidate 2 only.
#' \item\bold{c11:} number of tests with callbacks for both candidates.
#'  }
#'
#' \code{props:} A data frame containing the following proportions and their 
#' confidence intervals (when relevant)
#'  \itemize{
#'   \item\bold{inf_p_cand1:} 1st candidate exclusive callback share, lower bound.
#'   \item\bold{p_cand1:} 1st candidate exclusive callback share.
#'   \item\bold{sup_p_cand1:} 1st candidate exclusive callback share, upper bound.
#'   \item\bold{inf_p_cand2:} 2nd candidate exclusive callback share, lower bound.
#'   \item\bold{p_cand2:} 2nd candidate exclusive callback share.
#'   \item\bold{sup_p_cand2:} 2nd candidate exclusive callback share, upper bound.
#'   \item\bold{inf_cand_dif:} p_cand1-p_cand2, lower bound.
#'   \item\bold{p_cand_dif:} exclusive callback share difference between the candidates.
#'   \item\bold{sup_cand_dif:} p_cand1-p_cand2, upper bound.
#'  }
#'
#' \code{stats}: a data frame containing the statistics for testing the equality 
#' of proportions.
#'  \itemize{
#'   \item\bold{statistic:} the value of the test statistic.
#'   \item\bold{p_stat:} the p-value of the test statistic.
#'   \item\bold{c_stat:} the significance code of the test statistic.
#'    }
#'
#' @author Emmanuel Duguet
#'
#' @references
#' Clopper, C. J. & Pearson, E. S. (1934). The use of confidence or fiducial
#' limits illustrated in the case of the binomial. Biometrika, 26, 404–413.
#' doi:10.2307/2331986.
#'
#' Student. (1908). The Probable Error of a Mean. Biometrika, 6(1), 1–25. 
#' doi:10.2307/2331554.
#' 
#' Wilson, E.B. (1927). Probable inference, the law of succession, and
#' statistical inference. Journal of the American Statistical Association, 22,
#' 209–212. doi:10.2307/2276774.
#'
#' @examples
#' data(labour1)
#' x <- callback(data=labour1,cluster="offer",candid="hist",callback="callback")
#' str(stat_ecs(x))
#'
#' @importFrom stats binom.test prop.test fisher.test
#'
#' @export

stat_ecs <- function(x, method="student", level = 0.95) {
  method <- tolower(method)
  c10 <- c11 <- c01 <- c00 <- callback <- disc <- NULL  
  inf_p_callback <- p_callback <- sup_p_callback <- NULL
  inf_p_cand1 <- p_cand1 <- sup_p_cand1 <- NULL
  inf_p_cand2 <- p_cand2 <- sup_p_cand2 <- NULL
  inf_p_equal <- p_equal <- sup_p_equal <- NULL
  p_cand_dif <- statistic <- p_stat <- n_stat <- c_stat <- NULL
  # level check
  if (level <= 0 | level >= 1) {
    level = 0.95
  }
  alpha <- 1 - level
  # common statistics
  counts <- stat_paired(x)$counts
  
  out_prop <- with(
    counts,
    data.frame(
      disc=disc,
      c10 = c10,
      c01 = c01,
      c11 = c11,
      row.names = rownames(counts)
    )
  )
  
  out_prop <- transform(
    out_prop,
    p_cand1 = c10 / pmax(disc, 1),
    p_cand2 = c01 / pmax(disc, 1)
  )
  
  out_prop <- transform(out_prop, 
                        p_cand_dif = ifelse(disc == 0, 0, p_cand1 - p_cand2))
  
  #method check
  if (!(method %in% c("cp","wilson"))) method <- "student"
  
  specif <- data.frame(convention="exclusive callback shares",method=method,level=level,cid="ecs")
  
  # Clopper-Pearson/Fisher exact test
  if (method == "cp"){
    specif$source <- "binom.test"
    specif$name <-"Fisher exact test"
    specif$confid <- "Clopper-Pearson"
    l_comp <- rownames(out_prop)
    n_comp <- length(l_comp)
    ct <- c1 <- c2 <- c3 <- list()
    for (comp in l_comp) {
      z <- subset(out_prop, c10 + c01 > 0)
      c1 <- binom.test(z[comp, "c10"], z[comp, "disc"], conf.level =
                         level)["conf.int"]
      c2 <- binom.test(z[comp, "c01"], z[comp, "disc"], conf.level =
                         level)["conf.int"]
      #contingency table in matrix form
      m <- matrix(
        c(z[comp, "disc"] - z[comp, "c10"], z[comp, "c10"], z[comp, "disc"] - z[comp, "c01"], z[comp, "c01"]),
        nrow = 2,
        ncol = 2,
        byrow = TRUE
      )
      c3 <- fisher.test(m)["p.value"]
      
      ct[[comp]] <- c(c1[[1]], c2[[1]], c3[[1]])
    }
    
    cp <- ct[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        cp <- rbind(cp, ct[[i]])
      }
      cp <- as.data.frame(cp)
    } else {
      cp <- as.data.frame(t(cp))
    }
    rownames(cp) <- l_comp
    colnames(cp) <- c("inf_p_cand1",
                      "sup_p_cand1",
                      "inf_p_cand2",
                      "sup_p_cand2",
                      "statistic")
    
    props <- data.frame(disc=out_prop$disc,
                        inf_p_cand1=cp$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=cp$sup_p_cand1,
                        inf_p_cand2=cp$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=cp$sup_p_cand2,
                        p_cand_dif=out_prop$p_cand_dif
    )
    rownames(props) <- l_comp
    
    stats <- data.frame(statistic=cp$statistic,
                        p_stat=cp$statistic,
                        c_stat=stat_signif(cp$statistic)
    )
    rownames(stats) <- l_comp
  }# end of method="cp"
  
  # Student
  if (method == "student"){
    specif$source <- "t.test"
    specif$name <-"Student test"
    specif$confid <- "Student"
    pfds <- x$pfds
    l_comp <- names(pfds)
    n_comp <- length(l_comp)
    st <- s1 <- s2 <- s3 <- list()
    for (comp in l_comp) {
      z <- subset(pfds[[comp]], c10 + c01 > 0)
      s1 <- t.test(z[, "c10"], conf.level = level)["conf.int"]
      s2 <- t.test(z[, "c01"], conf.level = level)["conf.int"]
      s3 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["p.value"]
      s4 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["statistic"]
      s5 <-
        t.test(z[, "c10"] - z[, "c01"], conf.level = level)["conf.int"]
      st[[comp]] <- c(s1[[1]], s2[[1]], s3[[1]], s4[[1]], s5[[1]])
    }
    student <- st[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        student <- rbind(student, st[[i]])
      }
    }
    rownames(student) <- l_comp
    colnames(student) <- c(
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "p_stat",
      "statistic",
      "inf_cand_dif",
      "sup_cand_dif"
    )
    student <- transform(
      student,
      inf_p_cand1 = pmax(0, inf_p_cand1),
      inf_p_cand2 = pmax(0, inf_p_cand2),
      sup_p_cand1 = pmin(1, sup_p_cand1),
      sup_p_cand2 = pmin(1, sup_p_cand2)
    )
    props <- data.frame(disc=out_prop$disc,
                        inf_p_cand1=student$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=student$sup_p_cand1,
                        inf_p_cand2=student$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=student$sup_p_cand2,
                        inf_cand_dif=student$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=student$sup_cand_dif
    )
    
    rownames(props) <- l_comp
    stats <- data.frame(statistic=student$statistic,
                        p_stat=student$p_stat,
                        c_stat=stat_signif(student$p_stat)
    )
    rownames(stats) <- l_comp
  }# end of method="student"
  
  # Wilson
  if (method == "wilson"){
    specif$source <- "prop.test"
    specif$name <-"Pearson test"
    specif$confid <- "Wilson"
    l_comp <- rownames(out_prop)
    n_comp <- length(l_comp)
    wt <- w1 <- w2 <- w3 <- list()
    for (comp in l_comp) {
      z <- subset(out_prop, c10 + c01 > 0)
      w1 <- prop.test(z[comp, "c10"], z[comp, "disc"], conf.level =
                        level)["conf.int"]
      w2 <- prop.test(z[comp, "c01"], z[comp, "disc"], conf.level =
                        level)["conf.int"]
      w3 <- prop.test(c(z[comp, "c10"], z[comp, "c01"]), c(z[comp, "disc"], z[comp, "disc"]))["p.value"]
      w4 <- prop.test(c(z[comp, "c10"], z[comp, "c01"]), c(z[comp, "disc"], z[comp, "disc"]))["statistic"]
      w5 <-
        prop.test(c(z[comp, "c10"], z[comp, "c01"]), c(z[comp, "disc"], z[comp, "disc"]))["conf.int"]
      
      wt[[comp]] <- c(w1[[1]], w2[[1]], w3[[1]], w4[[1]], w5[[1]])
    }
    
    wilson <- wt[[1]]
    if (n_comp >= 2) {
      for (i in 2:n_comp) {
        wilson <- rbind(wilson, wt[[i]])
      }
      wilson <- as.data.frame(wilson)
    } else {
      wilson <- as.data.frame(t(wilson))
    }
    rownames(wilson) <- l_comp
    colnames(wilson) <- c(
      "inf_p_cand1",
      "sup_p_cand1",
      "inf_p_cand2",
      "sup_p_cand2",
      "p_stat",
      "statistic",
      "inf_cand_dif",
      "sup_cand_dif"
    )
    props <- data.frame(disc=out_prop$disc,
                        inf_p_cand1=wilson$inf_p_cand1,
                        p_cand1=out_prop$p_cand1,
                        sup_p_cand1=wilson$sup_p_cand1,
                        inf_p_cand2=wilson$inf_p_cand2,
                        p_cand2=out_prop$p_cand2,
                        sup_p_cand2=wilson$sup_p_cand2,
                        inf_cand_dif=wilson$inf_cand_dif,
                        p_cand_dif=out_prop$p_cand_dif,
                        sup_cand_dif=wilson$sup_cand_dif
    )
    
    rownames(props) <- l_comp
    stats <- data.frame(statistic=wilson$statistic,
                        p_stat=wilson$p_stat,
                        c_stat=stat_signif(wilson$p_stat)
    )
    rownames(stats) <- l_comp
  }#end of method="wilson"
  
  #output
  z <- list(
    specif = specif,
    props = props,
    stats = stats,
    counts = counts[,c("tests","callback","disc","c00","c10","c01","c11")]
  )
  class(z) <- "callback_stat"
  return (z)
}
