Genentech · Jan 9, 2024
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎DESCRIPTION
Lines changed: 24 additions & 0 deletions b/‎DESCRIPTION
Lines changed: 24 additions & 0 deletions
diff --git a/‎LICENSE.md
Lines changed: 4 additions & 0 deletions b/‎LICENSE.md
Lines changed: 4 additions & 0 deletions
diff --git a/‎NAMESPACE
Lines changed: 5 additions & 0 deletions b/‎NAMESPACE
Lines changed: 5 additions & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 3 additions & 0 deletions b/‎NEWS.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎R/cast_clones_to_long.R
Lines changed: 166 additions & 0 deletions b/‎R/cast_clones_to_long.R
Lines changed: 166 additions & 0 deletions
diff --git a/‎R/create_clones.R
Lines changed: 142 additions & 0 deletions b/‎R/create_clones.R
Lines changed: 142 additions & 0 deletions
diff --git a/‎R/create_clones_check_inputs.R
Lines changed: 93 additions & 0 deletions b/‎R/create_clones_check_inputs.R
Lines changed: 93 additions & 0 deletions
diff --git a/‎R/data.R
Lines changed: 77 additions & 0 deletions b/‎R/data.R
Lines changed: 77 additions & 0 deletions
diff --git a/‎R/generate_ccw.R
Lines changed: 85 additions & 0 deletions b/‎R/generate_ccw.R
Lines changed: 85 additions & 0 deletions
diff --git a/‎R/generate_ccw_calc_weights.R
Lines changed: 52 additions & 0 deletions b/‎R/generate_ccw_calc_weights.R
Lines changed: 52 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 3 additions & 0 deletions b/‎README.md
Lines changed: 3 additions & 0 deletions
diff --git a/‎cran-comments.md
Lines changed: 5 additions & 0 deletions b/‎cran-comments.md
Lines changed: 5 additions & 0 deletions
diff --git a/‎data/data_final.rda
92.8 KB b/‎data/data_final.rda
92.8 KB
diff --git a/‎data/data_long.rda
172 KB b/‎data/data_long.rda
172 KB
diff --git a/‎data/data_long_2.rda
194 KB b/‎data/data_long_2.rda
194 KB
diff --git a/‎data/data_long_cox.rda
398 KB b/‎data/data_long_cox.rda
398 KB
diff --git a/‎data/tab.rda
1.55 KB b/‎data/tab.rda
1.55 KB
diff --git a/‎data/toy_df.rda
3.52 KB b/‎data/toy_df.rda
3.52 KB
diff --git a/‎man/cast_clones_to_long.Rd
Lines changed: 37 additions & 0 deletions b/‎man/cast_clones_to_long.Rd
Lines changed: 37 additions & 0 deletions
diff --git a/‎man/create_clones.Rd
Lines changed: 55 additions & 0 deletions b/‎man/create_clones.Rd
Lines changed: 55 additions & 0 deletions
diff --git a/‎man/create_clones_check_inputs.Rd
Lines changed: 38 additions & 0 deletions b/‎man/create_clones_check_inputs.Rd
Lines changed: 38 additions & 0 deletions
diff --git a/‎man/data_final.Rd
Lines changed: 32 additions & 0 deletions b/‎man/data_final.Rd
Lines changed: 32 additions & 0 deletions
diff --git a/‎man/data_long.Rd
Lines changed: 22 additions & 0 deletions b/‎man/data_long.Rd
Lines changed: 22 additions & 0 deletions
diff --git a/‎man/data_long_2.Rd
Lines changed: 22 additions & 0 deletions b/‎man/data_long_2.Rd
Lines changed: 22 additions & 0 deletions
diff --git a/‎man/data_long_cox.Rd
Lines changed: 22 additions & 0 deletions b/‎man/data_long_cox.Rd
Lines changed: 22 additions & 0 deletions
diff --git a/‎man/generate_ccw.Rd
Lines changed: 40 additions & 0 deletions b/‎man/generate_ccw.Rd
Lines changed: 40 additions & 0 deletions
diff --git a/‎man/generate_ccw_calc_weights.Rd
Lines changed: 21 additions & 0 deletions b/‎man/generate_ccw_calc_weights.Rd
Lines changed: 21 additions & 0 deletions
diff --git a/‎man/tab.Rd
Lines changed: 22 additions & 0 deletions b/‎man/tab.Rd
Lines changed: 22 additions & 0 deletions
diff --git a/‎man/toy_df.Rd
Lines changed: 38 additions & 0 deletions b/‎man/toy_df.Rd
Lines changed: 38 additions & 0 deletions
diff --git a/‎survivalCCW.Rproj
Lines changed: 22 additions & 0 deletions b/‎survivalCCW.Rproj
Lines changed: 22 additions & 0 deletions
diff --git a/‎tests/testthat.R
Lines changed: 12 additions & 0 deletions b/‎tests/testthat.R
Lines changed: 12 additions & 0 deletions
diff --git a/‎tests/testthat/test-cast_clones_to_long.R
Lines changed: 61 additions & 0 deletions b/‎tests/testthat/test-cast_clones_to_long.R
Lines changed: 61 additions & 0 deletions
diff --git a/‎tests/testthat/test-check_inputs_create_clones.R
Lines changed: 248 additions & 0 deletions b/‎tests/testthat/test-check_inputs_create_clones.R
Lines changed: 248 additions & 0 deletions
diff --git a/‎tests/testthat/test-create_clones.R
Lines changed: 257 additions & 0 deletions b/‎tests/testthat/test-create_clones.R
Lines changed: 257 additions & 0 deletions
diff --git a/‎tests/testthat/test-generate_ccw.R
Lines changed: 32 additions & 0 deletions b/‎tests/testthat/test-generate_ccw.R
Lines changed: 32 additions & 0 deletions
diff --git a/‎tests/testthat/test-generate_ccw_on_long_df.R
Lines changed: 128 additions & 0 deletions b/‎tests/testthat/test-generate_ccw_on_long_df.R
Lines changed: 128 additions & 0 deletions
diff --git a/‎vignettes/.gitignore
Lines changed: 2 additions & 0 deletions b/‎vignettes/.gitignore
Lines changed: 2 additions & 0 deletions
diff --git a/‎vignettes/conduct-ccw-analysis.Rmd
Lines changed: 222 additions & 0 deletions b/‎vignettes/conduct-ccw-analysis.Rmd
Lines changed: 222 additions & 0 deletions
@@ -0,0 +1 @@
+.DS_Store
@@ -0,0 +1,24 @@
+Package: survivalCCW
+Title: An R Package for Clone Censor Weighting (CCW) Survival Analyses
+Version: 0.0.1
+Authors@R: 
+    person("Matthew", "Secrest", , "secrmatt@gmail.com", role = c("aut", "cre"),
+           comment = c(ORCID = "0000-0002-0939-4902"))
+Description: This is a work-in-progress package that conducts clone censor weight analyses in R. Please use at your own risk. Consider filing a bug report or reaching out to [Matt](mailto:secrmatt@gmail.com) for questions/comments/suggestions.
+License: CC BY 4.0
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.2.3
+Suggests: 
+    knitr,
+    rmarkdown,
+    boot,
+    testthat (>= 3.0.0)
+VignetteBuilder: knitr
+Depends: 
+    R (>= 4.1.0)
+LazyData: true
+Config/testthat/edition: 3
+Imports: 
+    checkmate,
+    survival
@@ -0,0 +1,4 @@
+This work is licensed under the Creative Commons 
+Attribution-NonCommercial 4.0 International.
+
+https://creativecommons.org/licenses/by-nc/4.0/
@@ -0,0 +1,5 @@
+# Generated by roxygen2: do not edit by hand
+
+export(cast_clones_to_long)
+export(create_clones)
+export(generate_ccw)
@@ -0,0 +1,3 @@
+# survivalCCW 0.0.1
+
+* Initial functionality and CRAN submission
@@ -0,0 +1,166 @@
+#' Cast one-row-per-clone data to long format
+#' 
+#' @param df A data.frame with one row per clone as returned by [create_clones()]
+#' 
+#' @return A data.frame with one row per patient per time period per clone.
+#' 
+#' @export
+#' 
+#' @examples 
+#' 
+#' # Load the toy dataset
+#' data(toy_df)
+#' 
+#' # Create clones
+#' clones <- create_clones(toy_df, 
+#'                         id = "id", 
+#'                         event = "death", 
+#'                         time_to_event = "fup_obs", 
+#'                         exposure = "surgery", 
+#'                         time_to_exposure = "timetosurgery", 
+#'                         ced_window = 365.25/2)
+#' 
+#' clones_long <- cast_clones_to_long(clones)
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+cast_clones_to_long <- function(df) {
+
+   # Check inputs
+   checkmate::assert_class(df, "ccw_clones")
+   if (!all(c("outcome", "fup_outcome", "censor", "fup_censor", "clone") %in% names(df))) {
+      stop("The input data.frame is missing at least one of the required columns: outcome, fup_outcome, censor, fup_censor, clone. Did you remove this?")
+   }
+   if (!all(c("id", "event", "time_to_event", "exposure", "time_to_exposure", "ced_window") %in% names(attributes(df)))) {
+      stop("The input data.frame is missing at least one attribute: id, event, time_to_event, exposure, time_to_exposure, ced_window. Did you remove these or try to make a custom data.frame?")
+   }
+
+   id <- attributes(df)$id
+   event <- attributes(df)$event
+   exposure <- attributes(df)$exposure
+   time_to_event <- attributes(df)$time_to_event
+   time_to_exposure <- attributes(df)$time_to_exposure
+   ced_window <- attributes(df)$ced_window
+
+   # Now convert to long
+   event_times <- sort(unique(c(df[,time_to_event], df[,time_to_exposure], ced_window)))
+   event_times_df <- data.frame(
+      t_event = event_times,
+      time_id = 1:NROW(event_times)
+   )
+
+   # Exposed
+   ## Outcome
+   df_1 <- df[df[, "clone"] == 1L, ]
+   
+   df_1$t_start <- 0.
+
+   df_1_long_outcome <- survival::survSplit(
+      df_1, 
+      cut = event_times, 
+      end = "fup_outcome",
+      start = "t_start",
+      event = "outcome"
+   )
+
+   df_1_long_outcome <- df_1_long_outcome[order(df_1_long_outcome[, id], df_1_long_outcome[, "fup_outcome"]), ]
+
+   ## Censor
+   df_1_long_censor <- survival::survSplit(
+      df_1, 
+      cut = event_times, 
+      end = "fup_outcome", # Not a typo, we want to expand at the outcome time still
+      start = "t_start",
+      event = "censor"
+   )
+
+   df_1_long_censor <- df_1_long_censor[order(df_1_long_censor[, id], df_1_long_censor[, "fup_outcome"]), ]
+   
+   ## Replace censoring variable in df_1_long_outcome
+   df_1_long_outcome$censor <- df_1_long_censor$censor
+
+   df_1_long_outcome$t_stop <- df_1_long_outcome$fup_outcome
+
+   df_1_long <- merge(
+      x = df_1_long_outcome,
+      y = event_times_df, 
+      by.x = "t_start",
+      by.y = "t_event",
+      all.x = TRUE)
+
+   df_1_long <- df_1_long[order(df_1_long[,id], df_1_long[, "fup_outcome"]), ]
+   df_1_long$time_id[is.na(df_1_long$time_id)] <- 0
+   rownames(df_1_long) <- NULL
+
+   # Unexposed
+   ## Outcome
+   df_0 <- df[df[, "clone"] == 0L, ]
+   
+   df_0$t_start <- 0.
+
+   df_0_long_outcome <- survival::survSplit(
+      df_0, 
+      cut = event_times, 
+      end = "fup_outcome",
+      start = "t_start",
+      event = "outcome"
+   )
+
+   df_0_long_outcome <- df_0_long_outcome[order(df_0_long_outcome[, id], df_0_long_outcome[, "fup_outcome"]), ]
+
+   ## Censor
+   df_0_long_censor <- survival::survSplit(
+      df_0, 
+      cut = event_times, 
+      end = "fup_outcome", # Not a typo, we want to expand at the outcome time still
+      start = "t_start",
+      event = "censor"
+   )
+
+   df_0_long_censor <- df_0_long_censor[order(df_0_long_censor[, id], df_0_long_censor[, "fup_outcome"]), ]
+   
+   ## Replace censoring variable in df_1_long_outcome
+   df_0_long_outcome$censor <- df_0_long_censor$censor
+
+   df_0_long_outcome$t_stop <- df_0_long_outcome$fup_outcome
+
+   df_0_long <- merge(
+      x = df_0_long_outcome,
+      y = event_times_df, 
+      by.x = "t_start",
+      by.y = "t_event",
+      all.x = TRUE)
+
+   df_0_long <- df_0_long[order(df_0_long[,id], df_0_long[, "fup_outcome"]), ]
+   df_0_long$time_id[is.na(df_0_long$time_id)] <- 0
+   rownames(df_0_long) <- NULL 
+
+   # Combine
+   df_long <- rbind(
+      df_1_long,
+      df_0_long
+   )
+
+   df_long <- merge( 
+      x = df_long,
+      y = event_times_df,
+      by = "time_id",
+      all.x = TRUE
+   )
+   
+   df_long <- df_long[order(df_long[,id], df_long[, "clone"], df_long[, "fup_outcome"]), ]
+
+   # Add attributes and return
+   class(df_long) <- c("ccw_clones_long", class(df_long))
+   attributes(df_long)$id <- id
+   attributes(df_long)$event <- event
+   attributes(df_long)$time_to_event <- time_to_event
+   attributes(df_long)$exposure <- exposure
+   attributes(df_long)$time_to_exposure <- time_to_exposure
+   attributes(df_long)$ced_window <- ced_window
+   attributes(df_long)$event_times_df <- event_times_df
+
+   # Remove rownames
+   rownames(df_long) <- NULL
+
+   return(df_long)
+}
@@ -0,0 +1,142 @@
+#' Create clones for CCW analysis
+#' 
+#' Pass a one-row-per-patient data.frame and get back a data.frame with one row per clone.
+#' 
+#' @param df A data.frame with one row per patient.
+#' @param id The name of the column in `df` that contains the patient identifier.
+#' @param event The name of the column in `df` that contains the event of interest.
+#' @param time_to_event The name of the column in `df` that contains the time to event.
+#' @param exposure The name of the column in `df` that contains the exposure.
+#' @param time_to_exposure The name of the column in `df` that contains the time to exposure.
+#' @param ced_window The date at which the clinical eligibility window closes. Can be left empty, in which case the clinical eligibility window is assumed to be part of 
+#' `exposure` and `time_to_exposure`
+#' 
+#' @return A data.frame with one row per clone.
+#' 
+#' @export 
+#' 
+#' @examples 
+#' 
+#' # Load the toy dataset
+#' data(toy_df)
+#' 
+#' # Create clones
+#' clones <- create_clones(toy_df, 
+#'                         id = "id", 
+#'                         event = "death", 
+#'                         time_to_event = "fup_obs", 
+#'                         exposure = "surgery", 
+#'                         time_to_exposure = "timetosurgery", 
+#'                         ced_window = 365.25/2)
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+create_clones <- function(
+   df, 
+   id,
+   event,
+   time_to_event,
+   exposure,
+   time_to_exposure,
+   ced_window = NULL
+) {
+
+   # Check inputs
+   valid_inputs <- create_clones_check_inputs(df = df, 
+                                              id = id,
+                                              event = event,
+                                              time_to_event = time_to_event,
+                                              exposure = exposure,
+                                              time_to_exposure = time_to_exposure,
+                                              ced_window = ced_window)
+
+   if (!valid_inputs) stop("something went wrong")
+
+   # Update exposure and time-to-exposure based on CED window
+   n_pts_to_update <- sum(!is.na(df[, time_to_exposure]) &  df[, time_to_exposure] > ced_window)
+   if (n_pts_to_update > 0) {
+      message(paste0("Updating ", n_pts_to_update, " patients' exposure and time-to-exposure based on CED window"))
+      ced_window_na_type <- ifelse(is.integer(ced_window), NA_integer_, NA_real_)
+      df[, exposure] <- ifelse(!is.na(df[, time_to_exposure]) &  df[, time_to_exposure] > ced_window, 0L, df[, exposure])
+      df[, time_to_exposure] <- ifelse(!is.na(df[, time_to_exposure]) &  df[, time_to_exposure] > ced_window, ced_window_na_type, df[, time_to_exposure])
+   }
+
+   # Create clones
+   df_0 <- df_1 <- df
+   df_0$outcome <- df_1$outcome <- rep(NA_integer_, NROW(df))
+   df_0$fup_outcome <- df_1$fup_outcome <- rep(NA_real_, NROW(df))
+   df_0$censor <- df_1$censor <- rep(NA_integer_, NROW(df))
+   df_0$fup_censor <- df_1$fup_censor <- rep(NA_real_, NROW(df))
+   df_0$clone <- 0L
+   df_1$clone <- 1L
+
+   # OUTCOMES
+   ## EXPOSED
+   ### Truly exposed --> keep outcomes
+   df_1[df_1[, exposure] == 1L, "outcome"] <- df_1[df_1[, exposure] == 1L, event]
+   df_1[df_1[, exposure] == 1L, "fup_outcome"] <- df_1[df_1[, exposure] == 1L, time_to_event]
+   
+   ### Truly not exposed, follow-up ends before CED --> keep outcomes
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "outcome"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, event]
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "fup_outcome"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, time_to_event]
+
+   ### Truly not exposed, follow-up ends after CED --> censor
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "outcome"] <- 0L
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "fup_outcome"] <- ced_window
+
+   ## UNEXPOSED
+   ### Truly unexposed --> keep outcomes
+   df_0[df_0[, exposure] == 0L, "outcome"] <- df_0[df_0[, exposure] == 0L, event]
+   df_0[df_0[, exposure] == 0L, "fup_outcome"] <- df_0[df_0[, exposure] == 0L, time_to_event]
+
+   ### Truly not exposed --> censor at exposure
+   df_0[df_0[, exposure] == 1L, "outcome"] <- 0L
+   df_0[df_0[, exposure] == 1L, "fup_outcome"] <- df_0[df_0[, exposure] == 1L, time_to_exposure]
+
+   # CENSORING
+   ## EXPOSED
+   ### Truly exposed --> Do not censor. Risk of censoring ends at exposure date
+   df_1[df_1[, exposure] == 1L, "censor"] <- 0L
+   df_1[df_1[, exposure] == 1L, "fup_censor"] <- df_1[df_1[, exposure] == 1L, time_to_exposure]
+
+   ### Truly not exposed, true censorship before/on CED --> Do not censor. Risk of censoring ends at event date
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "censor"] <- 0L
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "fup_censor"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, time_to_event]
+
+   ### Truly not exposed, true censorship on/after CED --> Censor at CED.
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "censor"] <- 1L
+   df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "fup_censor"] <- ced_window
+
+   ## UNEXPOSED
+   ### Truly exposed --> Censored at time of exposure.
+   df_0[df_0[, exposure] == 1L, "censor"] <- 1L
+   df_0[df_0[, exposure] == 1L, "fup_censor"] <- df_0[df_0[, exposure] == 1L, time_to_exposure]
+
+   ### Truly not exposed, true censorship before/on CED --> Do not censor. Risk of censoring ends at event date
+   df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, "censor"] <- 0L
+   df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, "fup_censor"] <- df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, time_to_event]
+
+   ### Truly not exposed, true censorship on/after CED --> Do not censor. Risk of censoring ends at CED.
+   df_0[df_0[, exposure] == 0L & df_0[, time_to_event] > ced_window, "censor"] <- 0L
+   df_0[df_0[, exposure] == 0L & df_0[, time_to_event] > ced_window, "fup_censor"] <- ced_window
+
+   # Combine and return 
+   df_clones <- rbind(df_1, df_0)
+   df_clones <- df_clones[order(df_clones[, id], df_clones[, "clone"]), ]
+
+   # Add class
+   class(df_clones) <- c("ccw_clones", class(df_clones))
+
+   # Pass attributes
+   attributes(df_clones)$id <- id
+   attributes(df_clones)$event <- event
+   attributes(df_clones)$time_to_event <- time_to_event
+   attributes(df_clones)$exposure <- exposure
+   attributes(df_clones)$time_to_exposure <- time_to_exposure
+   attributes(df_clones)$ced_window <- ced_window
+
+   # Remove rownames
+   rownames(df_clones) <- NULL
+
+   # Return
+   return(df_clones)
+   
+}
@@ -0,0 +1,93 @@
+#' Check inputs to create clones
+#' 
+#' @param df A data.frame with one row per patient.
+#' @param id The name of the column in `df` that contains the patient identifier.
+#' @param event The name of the column in `df` that contains the event of interest.
+#' @param time_to_event The name of the column in `df` that contains the time to event.
+#' @param exposure The name of the column in `df` that contains the exposure.
+#' @param time_to_exposure The name of the column in `df` that contains the time to exposure.
+#' @param ced_window The date at which the clinical eligibility window closes. 
+#' `exposure` and `time_to_exposure`
+#' 
+#' @return TRUE if inputs are valid else false
+create_clones_check_inputs <- function(
+   df, 
+   id,
+   event,
+   time_to_event,
+   exposure,
+   time_to_exposure,
+   ced_window
+) {
+
+   inputs_good <- FALSE 
+
+   # Check all input types 
+   checkmate::assert_class(df, "data.frame")
+   checkmate::assert_class(id, "character")
+   checkmate::assert_class(event, "character")
+   checkmate::assert_class(time_to_event, "character")
+   checkmate::assert_class(exposure, "character")
+   checkmate::assert_class(time_to_exposure, "character")
+   checkmate::assert_class(ced_window, "numeric")
+
+   # Check that all columns are in data
+   checkmate::assert_subset(c(id, event, time_to_event, exposure, time_to_exposure), names(df))
+
+   # Check that there are no missing data in the study columns (except time to exposure)
+   cc_sum <- sum(stats::complete.cases(df[, c(id, event, time_to_event, exposure)]))
+   if (cc_sum != NROW(df)) {
+      stop("There are missing data in the study columns")
+   }
+
+   # Check time to exposure is missing just for when exposure is 0/F
+   if (any(!is.na(df[df[, exposure] == 0L, time_to_exposure]))) {
+      stop("Time to exposure should only be for patients who received the exposure at some time")
+   }
+
+   if (any(is.na(df[df[, exposure] == 1L, time_to_exposure]))) {
+      stop("Time to exposure should be complete for patients who have exposure = 1")
+   }
+
+   # Check exposure and event are just 0/1 or T/F
+   if (any(df[, exposure] != 0L & df[, exposure] != 1L)) {
+      stop("Exposure should be 0/1 or T/F")
+   }
+
+   if (any(df[, event] != 0L & df[, event] != 1L)) {
+      stop("Event should be 0/1 or T/F")
+   }
+
+   # Make sure the user did not pass the same column name twice
+   if (NROW(unique(c(id, event, time_to_event, exposure, time_to_exposure))) != NROW(c(id, event, time_to_event, exposure, time_to_exposure))) {
+      stop("You passed the same column name twice")
+   }
+
+   # Check that the respective columns are numeric
+   checkmate::assert_numeric(df[, time_to_event])
+   checkmate::assert_numeric(df[, time_to_exposure])
+   checkmate::assert_true(class(df[,event]) %in% c("integer", "logical"))
+   checkmate::assert_true(class(df[,exposure]) %in% c("integer", "logical"))
+   checkmate::assert_true(class(df[,id]) %in% c("integer", "numeric", "character"))
+
+   # Some protected names
+   protected_names <- c("clone", "outcome", "fup_outcome", "censor", "fup_censor", 
+                        "t_start", "t_stop", "time_id", "t_event", "weight_cox",
+                        "p_uncens", "hazard", "lp")
+   for (name in protected_names) {
+      if (name %in% names(df)) {
+         stop("'", name, "' is a protected collumn name and will be used by the function. Please rename this column")
+      }
+   }
+
+   # Check no outcomes are before exposure dates
+   if (any(df[!is.na(df[,time_to_exposure]), time_to_event] < df[!is.na(df[,time_to_exposure]), time_to_exposure])) {
+      stop("There are outcomes before exposure dates")
+   }
+
+   inputs_good <- TRUE 
+   
+   return(inputs_good)
+
+}
+
@@ -0,0 +1,77 @@
+#' Toy dataset from Maringe et al. (2020)
+#'
+#' A toy dataset provided in Maringe et al. (2020) to demonstrate the clone-censor weight approach.
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+#'
+#' @format ## `toy_df`
+#' A data frame with 200 rows and 12 columns:
+#' \describe{
+#'   \item{id}{patient identifier}
+#'   \item{fup_obs}{observed follow-up time (time to death or 1 year if censored alive)}
+#'   \item{death}{observed event of interest (all-cause death) 1: dead, 0:alive}
+#'   \item{timetosurgery}{time to surgery (NA if no surgery)}
+#'   \item{age}{age at diagnosis}
+#'   \item{sex}{patient's sex}
+#'   \item{perf}{performance status at diagnosis}
+#'   \item{stage}{stage at diagnosis}
+#'   \item{deprivation}{deprivation score}
+#'   \item{charlson}{Charlson's comorbidity index}
+#'   \item{emergency}{route to diagnosis}
+#' }
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"toy_df"
+
+#' Testing data for `create_clones`
+#'
+#' The clones dataset from Maringe et al. (2020) named `tab`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"tab"
+
+#' Testing data for `cast_clones_to_long`
+#'
+#' The long clones dataset from Maringe et al. (2020) named `data_final`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"data_final"
+
+#' Testing data for `cast_clones_to_long`
+#'
+#' The long clones dataset from Maringe et al. (2020) named `data_final`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"data_final"
+
+#' Testing data for `generate_ccw_calc_weights`, surgery recipients
+#'
+#' The long clones dataset with weights for surgery recipients from Maringe et al. (2020) named `data_long`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"data_long"
+
+#' Testing data for `generate_ccw_calc_weights`, no surgery recipients
+#'
+#' The long clones dataset with weights for no surgery recipients from Maringe et al. (2020) named `data_long_2`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"data_long_2"
+
+#' Testing data for `generate_ccw`
+#'
+#' The long clones dataset with weights for all recipients from Maringe et al. (2020) named `data_long_cox`
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+
+#' @source <https://doi.org/10.1093/ije/dyaa057>
+"data_long_cox"
@@ -0,0 +1,85 @@
+#' Generate clone censor weights (CCW) on the long data.frame
+#' 
+#' Currently, the only way to generate weights is via multivariable Cox, as described in Maringe et al. 2020
+#' 
+#' @param df A data.frame with one row per clone per observation period as returned by [cast_clones_to_long()]
+#' @param predvars The variables that will be used to derive weights (subset of those in your data.frame originally). At least one covariate must be used.
+#' 
+#' @return The same data.frame with weights added.
+#' 
+#' @export
+#' 
+#' @examples 
+#' 
+#' # Load the toy dataset
+#' data(toy_df)
+#' 
+#' # Create clones
+#' clones <- create_clones(toy_df, 
+#'                         id = "id", 
+#'                         event = "death", 
+#'                         time_to_event = "fup_obs", 
+#'                         exposure = "surgery", 
+#'                         time_to_exposure = "timetosurgery", 
+#'                         ced_window = 365.25/2)
+#' 
+#' clones_long <- cast_clones_to_long(clones)
+#' clones_long_w <- generate_ccw(clones_long, predvars = c("age"))
+#' 
+#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
+generate_ccw <- function(df, predvars) {
+   
+   # Check inputs
+   checkmate::assert_class(df, "ccw_clones_long")
+   if (!all(c("outcome", "fup_outcome", "censor", "fup_censor", "clone", "t_start", "t_stop", "time_id", "t_event") %in% names(df))) {
+      stop("The input data.frame is missing at least one of the required columns: outcome, fup_outcome, censor, fup_censor, clone, t_start, t_stop, time_id, t_event. Did you remove this?")
+   }
+   if (!all(c("id", "event", "time_to_event", "exposure", "time_to_exposure", "ced_window") %in% names(attributes(df)))) {
+      stop("The input data.frame is missing at least one attribute: id, event, time_to_event, exposure, time_to_exposure, ced_window. Did you remove these or try to make a custom data.frame?")
+   }
+
+   id <- attributes(df)$id
+   event <- attributes(df)$event
+   exposure <- attributes(df)$exposure
+   time_to_event <- attributes(df)$time_to_event
+   time_to_exposure <- attributes(df)$time_to_exposure
+   ced_window <- attributes(df)$ced_window
+   event_times_df <- attributes(df)$event_times_df
+
+   # Check predvars to make sure the columns are there
+   if (!all(predvars %in% names(df))) {
+      stop("At least one of these predvars columns is not on the data.frame: ", paste(predvars, collapse = ", "), ".")
+   }
+
+   # Make sure predvars is not NULL
+   if (is.null(predvars)) {
+      stop("predvars cannot be NULL. Please specify at least one variable to use for weights.")
+   }
+
+   # Make sure no predvars are character/factor
+   if (any(sapply(df[, predvars], is.character) | sapply(df[, predvars], is.factor))) {
+      stop("At least one of the predvars columns is character/factor. In this early version of `survivalCCW`, only numeric variables are considered. Please make dummy vars on your own! :)")
+   }
+
+   # Create weights
+   df_1 <- generate_ccw_calc_weights(df[df$clone == 1L, ], event_times_df, predvars)
+
+   df_0 <- generate_ccw_calc_weights(df[df$clone == 0L, ], event_times_df, predvars)
+
+   # Combine 
+   df <- rbind(df_0, df_1)
+
+   # Check that all clones have weights
+   if (any(is.na(df$weight_cox))) {
+      stop("At least one clone is missing a weight. Please file a bug fix.")
+   }
+
+   # Update class
+   class(df) <- c("ccw_clones_long_weights", class(df))
+
+   # Remove rownames
+   rownames(df) <- NULL
+
+   return(df)
+
+}
@@ -0,0 +1,52 @@
+#' Calculate weights for each arm of a long data.frame
+#' 
+#' @param df the data.frame for a single arm
+#' @param event_times_df the event times data.frame
+#' @param predvars the baseline variables for adjustment
+#' 
+#' @return a data.frame with weight columns included
+generate_ccw_calc_weights <- function(df,  event_times_df, predvars) {
+
+   model_fmla <- stats::as.formula(
+      paste0(
+         "survival::Surv(t_start, t_stop, censor) ~ ",
+         paste(predvars, collapse = " + ")
+      )
+   )
+
+   cens_model <- survival::coxph(model_fmla, data = df, ties = "efron")
+
+   #@TODO allow factors and carry forward through previous functions
+   # ref_df <- setNames(data.frame(matrix(0, nrow = 1, ncol = length(predvars))), predvars)
+   df$lp <- as.matrix(df[, predvars]) %*% stats::coef(cens_model)
+   baseline_hazard <- data.frame(
+      survival::basehaz(cens_model, centered = FALSE)
+   )
+   names(baseline_hazard) <- c("hazard", "t")
+   
+   dat_base_times <- unique(
+      merge(
+         x = baseline_hazard,
+         y = event_times_df,
+         by.x = "t",
+         by.y = "t_event",
+         all.x = TRUE
+      )
+   )
+
+   df <- merge(
+      x = df, 
+      y = dat_base_times,
+      by = "time_id",
+      all.x = TRUE
+   )
+
+   df <- df[order(df$id,df$fup_outcome),]
+   df$hazard <- ifelse(is.na(df$hazard), 0, df$hazard)
+   df$p_uncens <- exp(-(df$hazard) * exp(df$lp))
+   df$weight_cox  <- 1 / df$p_uncens
+   df$weight_cox[df$time_id == 0] <- 1
+
+   row.names(df) <- NULL   
+   return(df)
+}
@@ -0,0 +1,3 @@
+# survivalCCW
+
+This is a work-in-progress package that conducts clone censor weight analyses in R. Please use at your own risk. Consider filing a bug report or reaching out to [Matt](mailto:secrmatt@gmail.com) for questions/comments/suggestions.
@@ -0,0 +1,5 @@
+## R CMD check results
+
+0 errors | 0 warnings | 1 note
+
+* This is a new release.
@@ -0,0 +1,22 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 4
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+LineEndingConversion: Posix
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(survivalCCW)
+
+test_check("survivalCCW")
@@ -0,0 +1,61 @@
+test_that("casting clones requires ccw_clones class", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    cast_clones_to_long(df)
+  )
+
+  ccw_df <- create_clones(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+
+  attributes(ccw_df)$id <- NULL
+
+  expect_error(
+    cast_clones_to_long(ccw_df)
+  )
+
+  ccw_df$outcome <- NULL
+
+  expect_error(
+    cast_clones_to_long(ccw_df)
+  )
+})
+
+test_that("long format was created correctly", {
+
+  df_long <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
+    cast_clones_to_long()
+
+  expect_true(TRUE)
+  #@TODO more test cases
+})
+
+
+test_that("Compare results to Maringe", {
+
+  df <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
+    cast_clones_to_long()
+
+  df <- df[order(df$id, df$clone, df$time_id),]
+  row.names(df) <- NULL
+
+  data_final <- data_final[order(data_final$id, data_final$clone, data_final$time_id),]
+  row.names(data_final) <- NULL
+  
+  for (col in names(df)) {
+    expect_equal(
+      df[[col]],
+      data_final[[col]],
+      tolerance = 1e-6
+    )
+  }
+
+})
@@ -0,0 +1,248 @@
+test_that("input types correct", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = id, event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = df$event, time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = 2.0, exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event",  exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = "ced_window")
+  )
+
+})
+
+test_that("columns are in data", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "hamburger", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "sausage", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "pumpkin", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("no missing data exists in these columns (other than time to exposure)", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, NA_integer_, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("the same column name is not passed >1 time", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("the respective columns have the right classes", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c('ham', 'is', 'good', 'food'),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(1.0, 1.0, 0.0, 1.0),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(1.0, 1.0, 0.0, 1.0),
+    time_to_exposure = c(2.2, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c('ham', 'is', 'great', 'food'),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = factor(c('ham', 'is', 'great', 'food'))
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("for all pts with an exposure, time to exposure is complete", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(T, T, F, T),
+    time_to_exposure = c(1.1, NA_real_, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(T, T, F, T),
+    time_to_exposure = c(1.1, 3.3, 9.4, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("protected column names are blocked", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    clone = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(T, T, F, T),
+    time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "clone", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    censor = c(T, T, F, T),
+    time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "censor", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("exposure and event are just 0/1 or T/F", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 2L, 3L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(1L, 1L, 0L, 1L),
+    time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 0L, 0L, 0L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 2L, 3L),
+    time_to_exposure = c(NA_real_, 2.2, 3.3, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
+  )
+
+})
+
+test_that("No outcomes before exposure dates", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(1L, 1L, 0L, 0L),
+    time_to_event = c(1, 2, 1, 2),
+    exposure = c(1L, 1L, 0L, 1L),
+    time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 1.0)
+  )
+
+})
@@ -0,0 +1,257 @@
+test_that("CED trims time to exposure appropriately", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(200L, 250L, 9900L, 100L),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_message(
+    ccw_df <- create_clones(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 2.5),
+    "Updating 1 patients' exposure and time-to-exposure based on CED window"
+  )
+
+  expect_equal(0L, unique(ccw_df[ccw_df$id==4, "exposure"]))
+
+})
+
+
+test_that("Spot check that outcomes are correctly assigned", {
+
+  df <- data.frame(
+    id = 1:6,
+    event = c(1L, 1L, 1L, 1L, 1L, 1L),
+    time_to_event = c(10, 100, 100, 10, 100, 100),
+    exposure = c(rep(0L, 3), rep(1L, 3)),
+    time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
+  )
+
+  ccw_df <- create_clones(df,
+                          id = "id", 
+                          event = "event", 
+                          time_to_event = "time_to_event", 
+                          exposure = "exposure", 
+                          time_to_exposure = "time_to_exposure", 
+                          ced_window = 20)
+
+  # Exposed clones
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "outcome"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "fup_outcome"],
+    10
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "outcome"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "fup_outcome"],
+    10
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "outcome"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "fup_outcome"],
+    20
+  )
+
+  # Unexposed clones
+  # Exposed clones
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "outcome"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "fup_outcome"],
+    2
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "outcome"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "fup_outcome"],
+    10
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "outcome"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "fup_outcome"],
+    100
+  )
+
+})
+
+
+test_that("Spot check that censoring statuses are correctly assigned",{
+
+  df <- data.frame(
+    id = 1:6,
+    event = c(1L, 1L, 1L, 1L, 1L, 1L),
+    time_to_event = c(10, 100, 100, 10, 100, 100),
+    exposure = c(rep(0L, 3), rep(1L, 3)),
+    time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
+  )
+
+  ccw_df <- create_clones(df,
+                          id = "id", 
+                          event = "event", 
+                          time_to_event = "time_to_event", 
+                          exposure = "exposure", 
+                          time_to_exposure = "time_to_exposure", 
+                          ced_window = 20)
+
+  # Exposed clones
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "censor"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "fup_censor"],
+    2
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "censor"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "fup_censor"],
+    10
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "censor"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "fup_censor"],
+    20
+  )
+
+  # Unexposed clones
+  # Exposed clones
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "censor"],
+    1L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "fup_censor"],
+    2
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "censor"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "fup_censor"],
+    10
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "censor"],
+    0L
+  )
+
+  expect_equal(
+    ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "fup_censor"],
+    20
+  )
+
+})
+
+test_that("Compare results to Maringe", {
+
+  df <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62)
+  df <- df[order(df$id, df$clone),]
+
+  tab <- tab[order(tab$id, tab$clone),]
+
+  # Compare each 
+  for (col in c("outcome", "fup_outcome", "censor", "fup_censor")) {
+    row.names(df) <- NULL
+    row.names(tab) <- NULL
+    expect_equal(
+      df[[col]],
+      tab[[col]],
+      tolerance = 1e-6
+    )
+  }
+
+})
+
+test_that("attribute are passed correctly", {
+
+  df <- data.frame(
+    id = 1:6,
+    event = c(1L, 1L, 1L, 1L, 1L, 1L),
+    time_to_event = c(10, 100, 100, 10, 100, 100),
+    exposure = c(rep(0L, 3), rep(1L, 3)),
+    time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
+  )
+
+  ccw_df <- create_clones(df,
+                          id = "id", 
+                          event = "event", 
+                          time_to_event = "time_to_event", 
+                          exposure = "exposure", 
+                          time_to_exposure = "time_to_exposure", 
+                          ced_window = 20)
+
+  expect_equal(
+    attributes(ccw_df)$id,
+    "id"
+  )
+
+  expect_equal(
+    attributes(ccw_df)$event,
+    "event"
+  )
+
+  expect_equal(
+    attributes(ccw_df)$time_to_event,
+    "time_to_event"
+  )
+
+  expect_equal(
+    attributes(ccw_df)$exposure,
+    "exposure"
+  )
+
+  expect_equal(
+    attributes(ccw_df)$time_to_exposure,
+    "time_to_exposure"
+  )
+
+  expect_equal(
+    attributes(ccw_df)$ced_window,
+    20
+  )
+
+})
@@ -0,0 +1,32 @@
+
+test_that("weights are adequately calculated compared to Maringe", {
+
+  df <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
+    cast_clones_to_long() |>
+    generate_ccw(predvars = c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency"))
+
+  df <- df[order(df$id, df$clone, df$time_id),]
+  row.names(df) <- NULL
+
+  # Compare exposed
+  data_long_cox <- data_long_cox[order(data_long_cox$id, data_long_cox$clone, data_long_cox$time_id),]
+  row.names(data_long_cox) <- NULL
+
+  # Compare all columns
+  for (col in colnames(data_long_cox)[colnames(data_long_cox) %in% colnames(df)]) {
+    row.names(df[[col]]) <- NULL
+    row.names(data_long_cox[[col]]) <- NULL
+    expect_equal(
+      df[[col]],
+      data_long_cox[[col]],
+      tolerance = 1e-6
+    )
+  }
+
+  cox_df <- survival::coxph(survival::Surv(t_start, t_stop, outcome) ~ clone, data = df, weights = weight_cox)
+  cox_data_long_cox <- survival::coxph(survival::Surv(t_start, t_stop, outcome) ~ clone, data = data_long_cox, weights = weight_cox)
+
+  expect_equal(cox_df$coefficients, cox_data_long_cox$coefficients, tolerance = 1e-6)
+
+})
@@ -0,0 +1,128 @@
+test_that("casting clones requires ccw_clones_long class", {
+
+  df <- data.frame(
+    id = c(1, 2, 3, 4),
+    event = c(0L, 1L, 0L, 1L),
+    time_to_event = c(100, 200, 100, 200),
+    exposure = c(0L, 1L, 0L, 1L),
+    time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
+  )
+
+  expect_error(
+    generate_ccw_on_long_df(df)
+  )
+
+  df_long <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
+    cast_clones_to_long()
+
+  attributes(df_long)$id <- NULL
+
+  expect_error(
+    generate_ccw_on_long_df(df_long)
+  )
+
+  df_long$t_start <- NULL
+
+  expect_error(
+    generate_ccw_on_long_df(df_long)
+  )
+})
+
+test_that("when predvars columns are missing, an error is thrown", {
+
+  df_long <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
+    cast_clones_to_long()
+
+  expect_error(
+    generate_ccw_on_long_df(df_long, predvars = "hamburger")
+  )
+
+  expect_error(
+    generate_ccw_on_long_df(df_long, predvars = NULL)
+  )
+
+})
+
+test_that("categorical vars are dealt with", {
+
+  toy_df_s <- toy_df
+  toy_df_s$sandwich <- rep(c("ham", "turkey", "cheese"), length.out = NROW(toy_df_s))
+
+  df_long <- toy_df_s |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
+    cast_clones_to_long()
+
+  expect_error(
+    generate_ccw_on_long_df(df_long, predvars = "sandwich")
+  )
+
+  df_long$sandwich_f <- factor(df_long$sandwich)
+
+  expect_error(
+    generate_ccw_on_long_df(df_long, predvars = "sandwich_f")
+  )
+
+  expect_error(
+    generate_ccw_on_long_df(df_long, predvars = NULL)
+  )
+
+})
+
+test_that("weights are adequately calculated compared to Maringe", {
+  
+  df <- toy_df |>
+    create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
+    cast_clones_to_long()
+
+  id <- attributes(df)$id
+  event <- attributes(df)$event
+  exposure <- attributes(df)$exposure
+  time_to_event <- attributes(df)$time_to_event
+  time_to_exposure <- attributes(df)$time_to_exposure
+  ced_window <- attributes(df)$ced_window
+  event_times_df <- attributes(df)$event_times_df
+
+  # Create weights
+  predvars <- c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency")
+
+  df_1 <- generate_ccw_calc_weights(df[df$clone == 1L, ], event_times_df, predvars)
+  df_1 <- df_1[order(df_1$id, df_1$time_id),]
+  row.names(df_1) <- NULL
+
+  df_0 <- generate_ccw_calc_weights(df[df$clone == 0L, ], event_times_df, predvars)
+  df_0 <- df_0[order(df_0$id, df_0$time_id),]
+  row.names(df_0) <- NULL
+
+  # Compare exposed
+  data_long <- data_long[order(data_long$id, data_long$time_id),]
+  row.names(data_long) <- NULL
+
+  data_long_2 <- data_long_2[order(data_long_2$id, data_long_2$time_id),]
+  row.names(data_long_2) <- NULL
+
+  # Compare all columns
+  for (col in c("time_id", "lp", "t", "hazard", "p_uncens", "weight_cox")) {
+    row.names(df_1[[col]]) <- NULL
+    row.names(data_long[[col]]) <- NULL
+    expect_equal(
+      df_1[[col]],
+      data_long[[col]],
+      tolerance = 1e-6
+    )
+  }
+
+  # Compare all columns
+  for (col in c("time_id", "lp", "t", "hazard", "p_uncens", "weight_cox")) {
+    row.names(df_0[[col]]) <- NULL
+    row.names(data_long_2[[col]]) <- NULL
+    expect_equal(
+      df_0[[col]],
+      data_long_2[[col]],
+      tolerance = 1e-6
+    )
+  }
+  
+})
+
@@ -0,0 +1,2 @@
+*.html
+*.R
@@ -0,0 +1,222 @@
+---
+title: "How to Conduct a Clone Censor-Weight Survival Analysis using survivalCCW"
+author: Matthew Secrest
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{How to Conduct a Clone Censor-Weight Survival Analysis using survivalCCW}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+
+# Clone Censor Weighting
+
+This lightweight package describes how to conduct clone-censor weighting (CCW) to address the problem of immortal time bias in survival analysis. This vignette will walk through the applied tutorial published by [Maringe et al 2020](https://academic.oup.com/ije/article/49/5/1719/5835351). Refer to [Hernan and Robins 2016](https://doi.org/10.1093/aje/kwv254) and [Hernan et al 2016](https://doi.org/10.1016/j.jclinepi.2016.04.014) for more technical details.
+
+# Context
+
+CCW is useful in the presence of immortal person-time bias in observational studies. For instance, when comparing surgery recipients vs non-recipients in non-small cell lung cancer (NSCLC), the surgery group will have a longer survival time than the non-surgery group because the non-surgery group includes patients who died before they could receive surgery. This is a form of immortal time bias.
+
+The CCW toy dataset published by Maringe uses this exact setting as the motivating example. Let's explore the dataset, which comes with `survivalCCW`.
+
+```{r}
+library(survivalCCW)
+head(toy_df)
+```
+
+Column descriptions can be found with `?toy_df`:
+
+- `id`: patient identifier
+- `fup_obs`: observed follow-up time (time to death or 1 year if censored alive)
+- `death`: observed event of interest (all-cause death) 1: dead, 0: alive
+- `timetosurgery`: time to surgery (NA if no surgery)
+- `age`: age at diagnosis
+- `sex`: patient's sex
+- `perf`: performance status at diagnosis
+- `stage`: stage at diagnosis
+- `deprivation`: deprivation score
+- `charlson`: Charlson's comorbidity index
+- `emergency`: route to diagnosis
+
+Note that this package addresses situations in which the covariates are all defined at baseline.
+
+# Create clones
+
+The first step is to create the clones. This can be done for any time-to-event outcome using the `survivalCCW` function `create_clones`. For `create_clones` to work, we need to pass a one-row-per-patient `data.frame` with the following columns:
+
+- The id variable (in this case, `id`)
+- The traditional outcome variable which denotes censorship (0) or event (1) (in this case, `death`). Note that additional values are not yet permitted.
+- The time to the first event (in this case, `fup_obs`)
+- The exposure variable, with exposure defined **at any time prior to censorship/event** (in this case, `surgery`). Must be (0) or (1),
+- The time to exposure variable (in this case, `timetosurgery`)
+- The clinical eligibility window (in this case, we'll do 6 months)
+
+All other columns will be propogated for each patient. Let's see what this looks like in practice.
+
+```{r}
+
+# Create clones
+clones <- create_clones(
+  df = toy_df,
+  id = 'id',
+  event = 'death',
+  time_to_event = 'fup_obs',
+  exposure = 'surgery',
+  time_to_exposure = 'timetosurgery',
+  ced_window = 182.62
+)
+
+head(clones)
+```
+
+
+Note that this object is just a `data.frame` with an additional custom class which future functions will evaluate:
+```{r}
+class(clones)
+```
+
+
+# Cast to long format
+Now we simply need to cast the data to long format. The `survivalCCW` function `cast_to_long` will do this for us. 
+No additional arguments are needed (the `clones` object is an artifact that allows you to better see and understand the method):
+
+```{r}
+clones_long <- cast_clones_to_long(clones)
+
+head(clones_long, row.names = FALSE)
+```
+
+Let's pick out a single patient and look at their data:
+
+```{r}
+print(clones_long[clones_long$id == "P5913", ], row.names = FALSE)
+```
+
+# Generate weights
+
+Now we simply need to generate the weights. The `survivalCCW` function `generate_ccw()` will do this for us.
+
+```{r}
+clones_long_weights <- generate_ccw(clones_long, predvars = c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency"))
+
+head(clones_long_weights, row.names = FALSE)
+```
+
+Let's pick out a single patient and look at their data:
+
+```{r}
+print(clones_long_weights[clones_long_weights$id == "P5913", ], row.names = FALSE)
+```
+
+# Evaluate outcomes 
+
+We now have everything we need to conduct a CCW analysis. For instance, we can pipe things together to evaluate the hazard ratio for surgery vs no surgery:
+
+```{r}
+library(survival)
+df <- toy_df |>
+  create_clones(
+    id = 'id',
+    event = 'death',
+    time_to_event = 'fup_obs',
+    exposure = 'surgery',
+    time_to_exposure = 'timetosurgery',
+    ced_window = 365.25/2
+  ) |>
+  cast_clones_to_long() |>
+  generate_ccw(c('age', 'sex', 'perf', 'stage', 'deprivation', 'charlson', 'emergency'))
+
+coxph(Surv(t_start, t_stop, outcome) ~ clone, data = df, weights = weight_cox)
+
+```
+
+Note that we used `outcome` and not `death` in the `coxph()` model. Still, there is of course a problem with this analysis, as the cloning process renders the variance invalid. The simplest approach to addressing this is 
+to bootstrap the variance. I have not made a function to do this yet, but leave the below as an example of how to do this.
+
+```{r}
+library(boot)
+
+boot_cox <- function(data, indices) {
+  
+  # Make long data.frame with weights
+  ccw_df <- data[indices, ] |>
+    create_clones(
+      id = 'id',
+      event = 'death',
+      time_to_event = 'fup_obs',
+      exposure = 'surgery',
+      time_to_exposure = 'timetosurgery',
+      ced_window = 182.62
+    ) |>
+    cast_clones_to_long() |>
+    generate_ccw(c('age', 'sex', 'perf', 'stage', 'deprivation', 'charlson', 'emergency'))
+    
+
+  # Extract HR from CoxPH
+  cox_ccw <- coxph(Surv(t_start, t_stop, outcome) ~ clone, data = ccw_df, weights = weight_cox) 
+   
+  hr <- cox_ccw |>
+    coef() |>
+    exp()
+
+  out <- c("hr" = hr)
+
+  # Create survfit objects for each of treated and untreated
+  surv_1 <- survfit(Surv(t_start, t_stop, outcome) ~ 1L, data = ccw_df[ccw_df$clone == 1, ], weights = weight_cox)
+
+  surv_0 <- survfit(Surv(t_start, t_stop, outcome) ~ 1L, data = ccw_df[ccw_df$clone == 0, ], weights = weight_cox)
+
+  # RMST difference
+  rmst_1 <- surv_1 |>
+    summary(rmean = 365) |>
+    (\(summary) summary$table)() |>
+    (\(table) table["rmean"])()
+  
+  rmst_0 <- surv_0 |>
+    summary(rmean = 365) |>
+    (\(summary) summary$table)() |>
+    (\(table) table["rmean"])()
+
+  rmst_diff <- rmst_1 - rmst_0
+
+  out <- c(out, "rmst_diff" = rmst_diff)
+
+  # 1-year survival difference
+  # Find the index of the time point closest to 1 year
+  index_1yr_1 <- which.min(abs(surv_1$time - 365))
+  index_1yr_0 <- which.min(abs(surv_0$time - 365))
+
+  # Get the 1-year survival probabilities
+  surv_1_1yr <- surv_1$surv[index_1yr_1]
+  surv_0_1yr <- surv_0$surv[index_1yr_0]
+
+  surv_diff_1yr <- surv_1_1yr - surv_0_1yr
+
+  out <- c(out, "surv_diff_1yr" = surv_diff_1yr)
+
+}
+
+boot_out <- boot(data = toy_df, statistic = boot_cox, R = 10)
+```
+
+## Hazard ratios
+```{r}
+boot.ci(boot_out, type = "norm", index = 1)
+```
+
+## RMST
+```{r}
+boot.ci(boot_out, type = "norm", index = 2)
+```
+
+## 1-year survival
+```{r}
+boot.ci(boot_out, type = "norm", index = 3)
+```
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# survivalCCW 0.0.1`
	`2`	`+`
	`3`	`+* Initial functionality and CRAN submission`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# survivalCCW`
	`2`	`+`
	`3`	`+This is a work-in-progress package that conducts clone censor weight analyses in R. Please use at your own risk. Consider filing a bug report or reaching out to [Matt](mailto:secrmatt@gmail.com) for questions/comments/suggestions.`