\name{readData}
\alias{readData}
\alias{print.readData_passed}
\title{
  Read the original datasets
}
\description{
  Read the original input datasets to be learned for synthetic data generation. The package allows the input data to have missing values and impute them with the posterior predictive distribution, so no missing values exist in the synthetic data output.
}
\usage{
readData(Y_input, X_input, RandomSeed = 99)

\method{print}{readData_passed}(x, \dots)
}
\arguments{
  \item{Y_input}{
  data.frame consisting of continuous variables of the original data.
  It must contain only variables of class \code{numeric}. Non-numeric columns will cause an error.
}
  \item{X_input}{
  data.frame consisting of categorical variables of the original data. 
  It must contain only variables of class \code{factor} (ordered factors are allowed).
    Character or numeric variables are \emph{not} converted automatically;
    non-factor columns will cause an error. Convert them to factors in advance, e.g.,
    \code{X_input[] <- lapply(X_input, factor)}.
}
  \item{RandomSeed}{
  random seed number.
}
\item{x}{
object of class \code{readData_passed}; a result of a call to \code{readData()}.
}
\item{...}{
further arguments passed to or from other methods.
}
}
\value{
  \code{readData} returns an object of "\code{readData_passed}" class.
  
  An object of class "\code{readData_passed}" is a list containing the following components:
  \item{n_sample}{number of records in the input dataset.}
  \item{p_Y}{number of continuous variables.}
  \item{Y_mat_std}{matrix with standardized values of \code{Y_input}, with mean 0 and standard deviation 1.}
  \item{mean_Y_input}{mean vectors of original \code{Y_input}.}
  \item{sd_Y_input}{standard deviation vectors of original \code{Y_input}.}
  \item{NA_Y_mat}{matrix indicating missing values in \code{Y_input}.}
  \item{p_X}{number of categorical variables.}
  \item{D_l_vec}{numbers of levels of each categorical variable.}
  \item{X_mat_std}{matrix with the numeric-transformed values of \code{X_input}.}
  \item{levels_X_input}{list of levels of each categorical variable.}
  \item{NA_X_mat}{matrix indicating missing values in \code{X_input}.}
  \item{var_names}{list containing variable names of \code{X_input} and \code{Y_input}.}
  \item{orig_data}{original dataset.}
}

\examples{
## Example data: split into continuous (Y_input) and categorical (X_input)

### Continuous variables (numeric only)
Y_demo <- data.frame(
  Sepal.Length = iris$Sepal.Length,
  Sepal.Width  = iris$Sepal.Width,
  Petal.Length = iris$Petal.Length,
  Petal.Width  = iris$Petal.Width
)

### Categorical variables (factor)
X_demo_char <- data.frame(
  Species = as.character(iris$Species)
)

## Not run:
## This will produce an error because X_input is not a factor:
# readData(Y_input = Y_demo, X_input = X_demo_char)
## End(Not run)

## Proper conversion of X_input to factor:
X_demo <- data.frame(
  Species = factor(iris$Species)
)

dat_obj <- readData(Y_input = Y_demo, X_input = X_demo)
print(dat_obj)
}
\seealso{
  \code{\link{multipleSyn}}, \code{\link{createModel}}
} 

