--- title: "Getting Started with cloudosR" author: "Leila Mansouri " date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Getting Started with cloudosR} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ## Introduction The `cloudosR` package provides an R client for interacting with the Lifebit Platform. It enables you to: - Configure and manage Lifebit Platform profiles with credentials - Execute read-only SQL queries on cohort data - Retrieve results as R dataframes for analysis ## Installation ```{r eval=FALSE} # Install from CRAN (recommended) install.packages("cloudosR") # Or install development version from GitHub devtools::install_github("lifebit-ai/cloudosR") # Or install from local source install.packages("path/to/cloudosR", repos = NULL, type = "source") ``` ## Configuration Before using any API functions, you need to configure a profile with your Lifebit Platform credentials. ### Setting up a Profile ```{r eval=FALSE} library(cloudosR) # Configure a profile with your credentials cloudos.configure( profilename = "production", apikey = "your-api-key-here", workspace_id = "your-workspace-id", base_url = "https://cloudos.lifebit.ai", # Optional, this is the default set_default = TRUE # Set as default profile ) ``` The configuration is stored securely in your R user config directory with restricted permissions (0600). You can find the location with `tools::R_user_dir("cloudosR", "config")`. ### Using Default Profiles When you set `set_default = TRUE`, you don't need to specify the `profilename` parameter in subsequent function calls: ```{r eval=FALSE} # With default profile set, you can omit profilename results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM table LIMIT 10" ) ``` ### Managing Multiple Profiles You can configure multiple profiles for different environments: ```{r eval=FALSE} # Production profile cloudos.configure( profilename = "production", apikey = "prod-api-key", workspace_id = "prod-workspace-id", set_default = TRUE ) # Staging profile cloudos.configure( profilename = "staging", apikey = "staging-api-key", workspace_id = "staging-workspace-id" ) # List all configured profiles profiles <- cloudos.profile_list() print(profiles) ``` ## Querying Data The package provides both high-level and low-level functions for executing SQL queries. ### High-Level Query Execution (Recommended) The `cloudos.query()` function handles the entire query lifecycle automatically: ```{r eval=FALSE} # Simple query (uses default profile) results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT person_id, gender_concept_id, birth_datetime FROM person LIMIT 10" ) # View results head(results) str(results) # Access metadata attr(results, "total_rows") attr(results, "total_pages") ``` ### Low-Level Query Control For more control over the query process, you can use the individual steps: ```{r eval=FALSE} # Step 1: Submit query with pagination task <- cloudos.query_submit_async( profilename = "production", cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM table LIMIT 10", pagination = list(pageNumber = 0, pageSize = 100) # Optional ) task_id <- task$task_id print(task_id) # Step 2: Check status status <- cloudos.query_status( profilename = "production", task_id = task_id ) print(status$status) # "pending", "running", "completed", or "failed" print(status$count_of_results) # Step 3: Fetch results when completed results <- cloudos.query_results( profilename = "production", task_id = task_id ) ``` ### Working with Large Result Sets The package automatically handles pagination by submitting multiple async tasks: ```{r eval=FALSE} # Fetch all pages automatically (default) # Note: This submits separate tasks for each page all_results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM person", page_size = 1000, # Rows per page (default: 1000) all_pages = TRUE # Fetch all pages (default: TRUE) ) # This will: # 1. Submit query for page 0 # 2. Wait for completion and fetch results # 3. Calculate total pages from response metadata # 4. Submit separate tasks for pages 1, 2, 3... # 5. Wait for all tasks to complete # 6. Combine and return all results # Fetch only first page first_page <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT * FROM person", page_size = 100, all_pages = FALSE # Only fetch first page ) ``` ## Advanced Usage ### Custom Polling Parameters You can customize how the package polls for query completion: ```{r eval=FALSE} results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = "SELECT COUNT(*) FROM person", poll_interval = 5, # Check every 5 seconds (default: 2) max_wait = 900 # Wait up to 15 minutes (default: 600) ) ``` ### Complex SQL Queries The package supports any valid SQL query that your cohort allows: ```{r eval=FALSE} # Join multiple tables sql_query <- " SELECT p.person_id, p.gender_concept_id, c.condition_concept_id, c.condition_start_date FROM person p JOIN condition_occurrence c ON p.person_id = c.person_id WHERE p.birth_datetime > '1980-01-01' LIMIT 1000 " results <- cloudos.query( cohort_id = "1a2b3c4d5e6f7g8h9i10j11k", sql = sql_query ) ``` ## Security Best Practices 1. **Never commit credentials**: Add `.cloudos_config.json` to your `.gitignore` file 2. **Use environment-specific profiles**: Separate production and development credentials 3. **Rotate API keys regularly**: Update profiles when keys change 4. **Restrict file permissions**: The package automatically sets config file permissions to 0600 ## Error Handling The package provides informative error messages: ```{r eval=FALSE} # Authentication errors tryCatch({ cloudos.query( profilename = "invalid_profile", cohort_id = "123", sql = "SELECT 1" ) }, error = function(e) { message("Error caught: ", e$message) }) # Query timeout tryCatch({ cloudos.query( cohort_id = "123", sql = "SELECT * FROM large_table", max_wait = 5 # Very short timeout ) }, error = function(e) { message("Query timed out: ", e$message) }) ```