Skip to content

Commit cf4e756

Browse files
authoredJan 9, 2024
Merge pull request #1 from mattsecrest/v0.0.1
v0.0.1
2 parents 6c0aa68 + b9f9847 commit cf4e756

39 files changed

+1993
-0
lines changed
 

‎.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.DS_Store

‎DESCRIPTION

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Package: survivalCCW
2+
Title: An R Package for Clone Censor Weighting (CCW) Survival Analyses
3+
Version: 0.0.1
4+
Authors@R:
5+
person("Matthew", "Secrest", , "secrmatt@gmail.com", role = c("aut", "cre"),
6+
comment = c(ORCID = "0000-0002-0939-4902"))
7+
Description: This is a work-in-progress package that conducts clone censor weight analyses in R. Please use at your own risk. Consider filing a bug report or reaching out to [Matt](mailto:secrmatt@gmail.com) for questions/comments/suggestions.
8+
License: CC BY 4.0
9+
Encoding: UTF-8
10+
Roxygen: list(markdown = TRUE)
11+
RoxygenNote: 7.2.3
12+
Suggests:
13+
knitr,
14+
rmarkdown,
15+
boot,
16+
testthat (>= 3.0.0)
17+
VignetteBuilder: knitr
18+
Depends:
19+
R (>= 4.1.0)
20+
LazyData: true
21+
Config/testthat/edition: 3
22+
Imports:
23+
checkmate,
24+
survival

‎LICENSE.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
This work is licensed under the Creative Commons
2+
Attribution-NonCommercial 4.0 International.
3+
4+
https://creativecommons.org/licenses/by-nc/4.0/

‎NAMESPACE

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(cast_clones_to_long)
4+
export(create_clones)
5+
export(generate_ccw)

‎NEWS.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# survivalCCW 0.0.1
2+
3+
* Initial functionality and CRAN submission

‎R/cast_clones_to_long.R

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#' Cast one-row-per-clone data to long format
2+
#'
3+
#' @param df A data.frame with one row per clone as returned by [create_clones()]
4+
#'
5+
#' @return A data.frame with one row per patient per time period per clone.
6+
#'
7+
#' @export
8+
#'
9+
#' @examples
10+
#'
11+
#' # Load the toy dataset
12+
#' data(toy_df)
13+
#'
14+
#' # Create clones
15+
#' clones <- create_clones(toy_df,
16+
#' id = "id",
17+
#' event = "death",
18+
#' time_to_event = "fup_obs",
19+
#' exposure = "surgery",
20+
#' time_to_exposure = "timetosurgery",
21+
#' ced_window = 365.25/2)
22+
#'
23+
#' clones_long <- cast_clones_to_long(clones)
24+
#'
25+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
26+
cast_clones_to_long <- function(df) {
27+
28+
# Check inputs
29+
checkmate::assert_class(df, "ccw_clones")
30+
if (!all(c("outcome", "fup_outcome", "censor", "fup_censor", "clone") %in% names(df))) {
31+
stop("The input data.frame is missing at least one of the required columns: outcome, fup_outcome, censor, fup_censor, clone. Did you remove this?")
32+
}
33+
if (!all(c("id", "event", "time_to_event", "exposure", "time_to_exposure", "ced_window") %in% names(attributes(df)))) {
34+
stop("The input data.frame is missing at least one attribute: id, event, time_to_event, exposure, time_to_exposure, ced_window. Did you remove these or try to make a custom data.frame?")
35+
}
36+
37+
id <- attributes(df)$id
38+
event <- attributes(df)$event
39+
exposure <- attributes(df)$exposure
40+
time_to_event <- attributes(df)$time_to_event
41+
time_to_exposure <- attributes(df)$time_to_exposure
42+
ced_window <- attributes(df)$ced_window
43+
44+
# Now convert to long
45+
event_times <- sort(unique(c(df[,time_to_event], df[,time_to_exposure], ced_window)))
46+
event_times_df <- data.frame(
47+
t_event = event_times,
48+
time_id = 1:NROW(event_times)
49+
)
50+
51+
# Exposed
52+
## Outcome
53+
df_1 <- df[df[, "clone"] == 1L, ]
54+
55+
df_1$t_start <- 0.
56+
57+
df_1_long_outcome <- survival::survSplit(
58+
df_1,
59+
cut = event_times,
60+
end = "fup_outcome",
61+
start = "t_start",
62+
event = "outcome"
63+
)
64+
65+
df_1_long_outcome <- df_1_long_outcome[order(df_1_long_outcome[, id], df_1_long_outcome[, "fup_outcome"]), ]
66+
67+
## Censor
68+
df_1_long_censor <- survival::survSplit(
69+
df_1,
70+
cut = event_times,
71+
end = "fup_outcome", # Not a typo, we want to expand at the outcome time still
72+
start = "t_start",
73+
event = "censor"
74+
)
75+
76+
df_1_long_censor <- df_1_long_censor[order(df_1_long_censor[, id], df_1_long_censor[, "fup_outcome"]), ]
77+
78+
## Replace censoring variable in df_1_long_outcome
79+
df_1_long_outcome$censor <- df_1_long_censor$censor
80+
81+
df_1_long_outcome$t_stop <- df_1_long_outcome$fup_outcome
82+
83+
df_1_long <- merge(
84+
x = df_1_long_outcome,
85+
y = event_times_df,
86+
by.x = "t_start",
87+
by.y = "t_event",
88+
all.x = TRUE)
89+
90+
df_1_long <- df_1_long[order(df_1_long[,id], df_1_long[, "fup_outcome"]), ]
91+
df_1_long$time_id[is.na(df_1_long$time_id)] <- 0
92+
rownames(df_1_long) <- NULL
93+
94+
# Unexposed
95+
## Outcome
96+
df_0 <- df[df[, "clone"] == 0L, ]
97+
98+
df_0$t_start <- 0.
99+
100+
df_0_long_outcome <- survival::survSplit(
101+
df_0,
102+
cut = event_times,
103+
end = "fup_outcome",
104+
start = "t_start",
105+
event = "outcome"
106+
)
107+
108+
df_0_long_outcome <- df_0_long_outcome[order(df_0_long_outcome[, id], df_0_long_outcome[, "fup_outcome"]), ]
109+
110+
## Censor
111+
df_0_long_censor <- survival::survSplit(
112+
df_0,
113+
cut = event_times,
114+
end = "fup_outcome", # Not a typo, we want to expand at the outcome time still
115+
start = "t_start",
116+
event = "censor"
117+
)
118+
119+
df_0_long_censor <- df_0_long_censor[order(df_0_long_censor[, id], df_0_long_censor[, "fup_outcome"]), ]
120+
121+
## Replace censoring variable in df_1_long_outcome
122+
df_0_long_outcome$censor <- df_0_long_censor$censor
123+
124+
df_0_long_outcome$t_stop <- df_0_long_outcome$fup_outcome
125+
126+
df_0_long <- merge(
127+
x = df_0_long_outcome,
128+
y = event_times_df,
129+
by.x = "t_start",
130+
by.y = "t_event",
131+
all.x = TRUE)
132+
133+
df_0_long <- df_0_long[order(df_0_long[,id], df_0_long[, "fup_outcome"]), ]
134+
df_0_long$time_id[is.na(df_0_long$time_id)] <- 0
135+
rownames(df_0_long) <- NULL
136+
137+
# Combine
138+
df_long <- rbind(
139+
df_1_long,
140+
df_0_long
141+
)
142+
143+
df_long <- merge(
144+
x = df_long,
145+
y = event_times_df,
146+
by = "time_id",
147+
all.x = TRUE
148+
)
149+
150+
df_long <- df_long[order(df_long[,id], df_long[, "clone"], df_long[, "fup_outcome"]), ]
151+
152+
# Add attributes and return
153+
class(df_long) <- c("ccw_clones_long", class(df_long))
154+
attributes(df_long)$id <- id
155+
attributes(df_long)$event <- event
156+
attributes(df_long)$time_to_event <- time_to_event
157+
attributes(df_long)$exposure <- exposure
158+
attributes(df_long)$time_to_exposure <- time_to_exposure
159+
attributes(df_long)$ced_window <- ced_window
160+
attributes(df_long)$event_times_df <- event_times_df
161+
162+
# Remove rownames
163+
rownames(df_long) <- NULL
164+
165+
return(df_long)
166+
}

‎R/create_clones.R

+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#' Create clones for CCW analysis
2+
#'
3+
#' Pass a one-row-per-patient data.frame and get back a data.frame with one row per clone.
4+
#'
5+
#' @param df A data.frame with one row per patient.
6+
#' @param id The name of the column in `df` that contains the patient identifier.
7+
#' @param event The name of the column in `df` that contains the event of interest.
8+
#' @param time_to_event The name of the column in `df` that contains the time to event.
9+
#' @param exposure The name of the column in `df` that contains the exposure.
10+
#' @param time_to_exposure The name of the column in `df` that contains the time to exposure.
11+
#' @param ced_window The date at which the clinical eligibility window closes. Can be left empty, in which case the clinical eligibility window is assumed to be part of
12+
#' `exposure` and `time_to_exposure`
13+
#'
14+
#' @return A data.frame with one row per clone.
15+
#'
16+
#' @export
17+
#'
18+
#' @examples
19+
#'
20+
#' # Load the toy dataset
21+
#' data(toy_df)
22+
#'
23+
#' # Create clones
24+
#' clones <- create_clones(toy_df,
25+
#' id = "id",
26+
#' event = "death",
27+
#' time_to_event = "fup_obs",
28+
#' exposure = "surgery",
29+
#' time_to_exposure = "timetosurgery",
30+
#' ced_window = 365.25/2)
31+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
32+
create_clones <- function(
33+
df,
34+
id,
35+
event,
36+
time_to_event,
37+
exposure,
38+
time_to_exposure,
39+
ced_window = NULL
40+
) {
41+
42+
# Check inputs
43+
valid_inputs <- create_clones_check_inputs(df = df,
44+
id = id,
45+
event = event,
46+
time_to_event = time_to_event,
47+
exposure = exposure,
48+
time_to_exposure = time_to_exposure,
49+
ced_window = ced_window)
50+
51+
if (!valid_inputs) stop("something went wrong")
52+
53+
# Update exposure and time-to-exposure based on CED window
54+
n_pts_to_update <- sum(!is.na(df[, time_to_exposure]) & df[, time_to_exposure] > ced_window)
55+
if (n_pts_to_update > 0) {
56+
message(paste0("Updating ", n_pts_to_update, " patients' exposure and time-to-exposure based on CED window"))
57+
ced_window_na_type <- ifelse(is.integer(ced_window), NA_integer_, NA_real_)
58+
df[, exposure] <- ifelse(!is.na(df[, time_to_exposure]) & df[, time_to_exposure] > ced_window, 0L, df[, exposure])
59+
df[, time_to_exposure] <- ifelse(!is.na(df[, time_to_exposure]) & df[, time_to_exposure] > ced_window, ced_window_na_type, df[, time_to_exposure])
60+
}
61+
62+
# Create clones
63+
df_0 <- df_1 <- df
64+
df_0$outcome <- df_1$outcome <- rep(NA_integer_, NROW(df))
65+
df_0$fup_outcome <- df_1$fup_outcome <- rep(NA_real_, NROW(df))
66+
df_0$censor <- df_1$censor <- rep(NA_integer_, NROW(df))
67+
df_0$fup_censor <- df_1$fup_censor <- rep(NA_real_, NROW(df))
68+
df_0$clone <- 0L
69+
df_1$clone <- 1L
70+
71+
# OUTCOMES
72+
## EXPOSED
73+
### Truly exposed --> keep outcomes
74+
df_1[df_1[, exposure] == 1L, "outcome"] <- df_1[df_1[, exposure] == 1L, event]
75+
df_1[df_1[, exposure] == 1L, "fup_outcome"] <- df_1[df_1[, exposure] == 1L, time_to_event]
76+
77+
### Truly not exposed, follow-up ends before CED --> keep outcomes
78+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "outcome"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, event]
79+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "fup_outcome"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, time_to_event]
80+
81+
### Truly not exposed, follow-up ends after CED --> censor
82+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "outcome"] <- 0L
83+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "fup_outcome"] <- ced_window
84+
85+
## UNEXPOSED
86+
### Truly unexposed --> keep outcomes
87+
df_0[df_0[, exposure] == 0L, "outcome"] <- df_0[df_0[, exposure] == 0L, event]
88+
df_0[df_0[, exposure] == 0L, "fup_outcome"] <- df_0[df_0[, exposure] == 0L, time_to_event]
89+
90+
### Truly not exposed --> censor at exposure
91+
df_0[df_0[, exposure] == 1L, "outcome"] <- 0L
92+
df_0[df_0[, exposure] == 1L, "fup_outcome"] <- df_0[df_0[, exposure] == 1L, time_to_exposure]
93+
94+
# CENSORING
95+
## EXPOSED
96+
### Truly exposed --> Do not censor. Risk of censoring ends at exposure date
97+
df_1[df_1[, exposure] == 1L, "censor"] <- 0L
98+
df_1[df_1[, exposure] == 1L, "fup_censor"] <- df_1[df_1[, exposure] == 1L, time_to_exposure]
99+
100+
### Truly not exposed, true censorship before/on CED --> Do not censor. Risk of censoring ends at event date
101+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "censor"] <- 0L
102+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, "fup_censor"] <- df_1[df_1[, exposure] == 0L & df_1[, time_to_event] <= ced_window, time_to_event]
103+
104+
### Truly not exposed, true censorship on/after CED --> Censor at CED.
105+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "censor"] <- 1L
106+
df_1[df_1[, exposure] == 0L & df_1[, time_to_event] > ced_window, "fup_censor"] <- ced_window
107+
108+
## UNEXPOSED
109+
### Truly exposed --> Censored at time of exposure.
110+
df_0[df_0[, exposure] == 1L, "censor"] <- 1L
111+
df_0[df_0[, exposure] == 1L, "fup_censor"] <- df_0[df_0[, exposure] == 1L, time_to_exposure]
112+
113+
### Truly not exposed, true censorship before/on CED --> Do not censor. Risk of censoring ends at event date
114+
df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, "censor"] <- 0L
115+
df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, "fup_censor"] <- df_0[df_0[, exposure] == 0L & df_0[, time_to_event] <= ced_window, time_to_event]
116+
117+
### Truly not exposed, true censorship on/after CED --> Do not censor. Risk of censoring ends at CED.
118+
df_0[df_0[, exposure] == 0L & df_0[, time_to_event] > ced_window, "censor"] <- 0L
119+
df_0[df_0[, exposure] == 0L & df_0[, time_to_event] > ced_window, "fup_censor"] <- ced_window
120+
121+
# Combine and return
122+
df_clones <- rbind(df_1, df_0)
123+
df_clones <- df_clones[order(df_clones[, id], df_clones[, "clone"]), ]
124+
125+
# Add class
126+
class(df_clones) <- c("ccw_clones", class(df_clones))
127+
128+
# Pass attributes
129+
attributes(df_clones)$id <- id
130+
attributes(df_clones)$event <- event
131+
attributes(df_clones)$time_to_event <- time_to_event
132+
attributes(df_clones)$exposure <- exposure
133+
attributes(df_clones)$time_to_exposure <- time_to_exposure
134+
attributes(df_clones)$ced_window <- ced_window
135+
136+
# Remove rownames
137+
rownames(df_clones) <- NULL
138+
139+
# Return
140+
return(df_clones)
141+
142+
}

‎R/create_clones_check_inputs.R

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#' Check inputs to create clones
2+
#'
3+
#' @param df A data.frame with one row per patient.
4+
#' @param id The name of the column in `df` that contains the patient identifier.
5+
#' @param event The name of the column in `df` that contains the event of interest.
6+
#' @param time_to_event The name of the column in `df` that contains the time to event.
7+
#' @param exposure The name of the column in `df` that contains the exposure.
8+
#' @param time_to_exposure The name of the column in `df` that contains the time to exposure.
9+
#' @param ced_window The date at which the clinical eligibility window closes.
10+
#' `exposure` and `time_to_exposure`
11+
#'
12+
#' @return TRUE if inputs are valid else false
13+
create_clones_check_inputs <- function(
14+
df,
15+
id,
16+
event,
17+
time_to_event,
18+
exposure,
19+
time_to_exposure,
20+
ced_window
21+
) {
22+
23+
inputs_good <- FALSE
24+
25+
# Check all input types
26+
checkmate::assert_class(df, "data.frame")
27+
checkmate::assert_class(id, "character")
28+
checkmate::assert_class(event, "character")
29+
checkmate::assert_class(time_to_event, "character")
30+
checkmate::assert_class(exposure, "character")
31+
checkmate::assert_class(time_to_exposure, "character")
32+
checkmate::assert_class(ced_window, "numeric")
33+
34+
# Check that all columns are in data
35+
checkmate::assert_subset(c(id, event, time_to_event, exposure, time_to_exposure), names(df))
36+
37+
# Check that there are no missing data in the study columns (except time to exposure)
38+
cc_sum <- sum(stats::complete.cases(df[, c(id, event, time_to_event, exposure)]))
39+
if (cc_sum != NROW(df)) {
40+
stop("There are missing data in the study columns")
41+
}
42+
43+
# Check time to exposure is missing just for when exposure is 0/F
44+
if (any(!is.na(df[df[, exposure] == 0L, time_to_exposure]))) {
45+
stop("Time to exposure should only be for patients who received the exposure at some time")
46+
}
47+
48+
if (any(is.na(df[df[, exposure] == 1L, time_to_exposure]))) {
49+
stop("Time to exposure should be complete for patients who have exposure = 1")
50+
}
51+
52+
# Check exposure and event are just 0/1 or T/F
53+
if (any(df[, exposure] != 0L & df[, exposure] != 1L)) {
54+
stop("Exposure should be 0/1 or T/F")
55+
}
56+
57+
if (any(df[, event] != 0L & df[, event] != 1L)) {
58+
stop("Event should be 0/1 or T/F")
59+
}
60+
61+
# Make sure the user did not pass the same column name twice
62+
if (NROW(unique(c(id, event, time_to_event, exposure, time_to_exposure))) != NROW(c(id, event, time_to_event, exposure, time_to_exposure))) {
63+
stop("You passed the same column name twice")
64+
}
65+
66+
# Check that the respective columns are numeric
67+
checkmate::assert_numeric(df[, time_to_event])
68+
checkmate::assert_numeric(df[, time_to_exposure])
69+
checkmate::assert_true(class(df[,event]) %in% c("integer", "logical"))
70+
checkmate::assert_true(class(df[,exposure]) %in% c("integer", "logical"))
71+
checkmate::assert_true(class(df[,id]) %in% c("integer", "numeric", "character"))
72+
73+
# Some protected names
74+
protected_names <- c("clone", "outcome", "fup_outcome", "censor", "fup_censor",
75+
"t_start", "t_stop", "time_id", "t_event", "weight_cox",
76+
"p_uncens", "hazard", "lp")
77+
for (name in protected_names) {
78+
if (name %in% names(df)) {
79+
stop("'", name, "' is a protected collumn name and will be used by the function. Please rename this column")
80+
}
81+
}
82+
83+
# Check no outcomes are before exposure dates
84+
if (any(df[!is.na(df[,time_to_exposure]), time_to_event] < df[!is.na(df[,time_to_exposure]), time_to_exposure])) {
85+
stop("There are outcomes before exposure dates")
86+
}
87+
88+
inputs_good <- TRUE
89+
90+
return(inputs_good)
91+
92+
}
93+

‎R/data.R

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#' Toy dataset from Maringe et al. (2020)
2+
#'
3+
#' A toy dataset provided in Maringe et al. (2020) to demonstrate the clone-censor weight approach.
4+
#'
5+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
6+
#'
7+
#' @format ## `toy_df`
8+
#' A data frame with 200 rows and 12 columns:
9+
#' \describe{
10+
#' \item{id}{patient identifier}
11+
#' \item{fup_obs}{observed follow-up time (time to death or 1 year if censored alive)}
12+
#' \item{death}{observed event of interest (all-cause death) 1: dead, 0:alive}
13+
#' \item{timetosurgery}{time to surgery (NA if no surgery)}
14+
#' \item{age}{age at diagnosis}
15+
#' \item{sex}{patient's sex}
16+
#' \item{perf}{performance status at diagnosis}
17+
#' \item{stage}{stage at diagnosis}
18+
#' \item{deprivation}{deprivation score}
19+
#' \item{charlson}{Charlson's comorbidity index}
20+
#' \item{emergency}{route to diagnosis}
21+
#' }
22+
#' @source <https://doi.org/10.1093/ije/dyaa057>
23+
"toy_df"
24+
25+
#' Testing data for `create_clones`
26+
#'
27+
#' The clones dataset from Maringe et al. (2020) named `tab`
28+
#'
29+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
30+
31+
#' @source <https://doi.org/10.1093/ije/dyaa057>
32+
"tab"
33+
34+
#' Testing data for `cast_clones_to_long`
35+
#'
36+
#' The long clones dataset from Maringe et al. (2020) named `data_final`
37+
#'
38+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
39+
40+
#' @source <https://doi.org/10.1093/ije/dyaa057>
41+
"data_final"
42+
43+
#' Testing data for `cast_clones_to_long`
44+
#'
45+
#' The long clones dataset from Maringe et al. (2020) named `data_final`
46+
#'
47+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
48+
49+
#' @source <https://doi.org/10.1093/ije/dyaa057>
50+
"data_final"
51+
52+
#' Testing data for `generate_ccw_calc_weights`, surgery recipients
53+
#'
54+
#' The long clones dataset with weights for surgery recipients from Maringe et al. (2020) named `data_long`
55+
#'
56+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
57+
58+
#' @source <https://doi.org/10.1093/ije/dyaa057>
59+
"data_long"
60+
61+
#' Testing data for `generate_ccw_calc_weights`, no surgery recipients
62+
#'
63+
#' The long clones dataset with weights for no surgery recipients from Maringe et al. (2020) named `data_long_2`
64+
#'
65+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
66+
67+
#' @source <https://doi.org/10.1093/ije/dyaa057>
68+
"data_long_2"
69+
70+
#' Testing data for `generate_ccw`
71+
#'
72+
#' The long clones dataset with weights for all recipients from Maringe et al. (2020) named `data_long_cox`
73+
#'
74+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
75+
76+
#' @source <https://doi.org/10.1093/ije/dyaa057>
77+
"data_long_cox"

‎R/generate_ccw.R

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
#' Generate clone censor weights (CCW) on the long data.frame
2+
#'
3+
#' Currently, the only way to generate weights is via multivariable Cox, as described in Maringe et al. 2020
4+
#'
5+
#' @param df A data.frame with one row per clone per observation period as returned by [cast_clones_to_long()]
6+
#' @param predvars The variables that will be used to derive weights (subset of those in your data.frame originally). At least one covariate must be used.
7+
#'
8+
#' @return The same data.frame with weights added.
9+
#'
10+
#' @export
11+
#'
12+
#' @examples
13+
#'
14+
#' # Load the toy dataset
15+
#' data(toy_df)
16+
#'
17+
#' # Create clones
18+
#' clones <- create_clones(toy_df,
19+
#' id = "id",
20+
#' event = "death",
21+
#' time_to_event = "fup_obs",
22+
#' exposure = "surgery",
23+
#' time_to_exposure = "timetosurgery",
24+
#' ced_window = 365.25/2)
25+
#'
26+
#' clones_long <- cast_clones_to_long(clones)
27+
#' clones_long_w <- generate_ccw(clones_long, predvars = c("age"))
28+
#'
29+
#' @references Maringe, Camille, et al. "Reflection on modern methods: trial emulation in the presence of immortal-time bias. Assessing the benefit of major surgery for elderly lung cancer patients using observational data." International journal of epidemiology 49.5 (2020): 1719-1729.
30+
generate_ccw <- function(df, predvars) {
31+
32+
# Check inputs
33+
checkmate::assert_class(df, "ccw_clones_long")
34+
if (!all(c("outcome", "fup_outcome", "censor", "fup_censor", "clone", "t_start", "t_stop", "time_id", "t_event") %in% names(df))) {
35+
stop("The input data.frame is missing at least one of the required columns: outcome, fup_outcome, censor, fup_censor, clone, t_start, t_stop, time_id, t_event. Did you remove this?")
36+
}
37+
if (!all(c("id", "event", "time_to_event", "exposure", "time_to_exposure", "ced_window") %in% names(attributes(df)))) {
38+
stop("The input data.frame is missing at least one attribute: id, event, time_to_event, exposure, time_to_exposure, ced_window. Did you remove these or try to make a custom data.frame?")
39+
}
40+
41+
id <- attributes(df)$id
42+
event <- attributes(df)$event
43+
exposure <- attributes(df)$exposure
44+
time_to_event <- attributes(df)$time_to_event
45+
time_to_exposure <- attributes(df)$time_to_exposure
46+
ced_window <- attributes(df)$ced_window
47+
event_times_df <- attributes(df)$event_times_df
48+
49+
# Check predvars to make sure the columns are there
50+
if (!all(predvars %in% names(df))) {
51+
stop("At least one of these predvars columns is not on the data.frame: ", paste(predvars, collapse = ", "), ".")
52+
}
53+
54+
# Make sure predvars is not NULL
55+
if (is.null(predvars)) {
56+
stop("predvars cannot be NULL. Please specify at least one variable to use for weights.")
57+
}
58+
59+
# Make sure no predvars are character/factor
60+
if (any(sapply(df[, predvars], is.character) | sapply(df[, predvars], is.factor))) {
61+
stop("At least one of the predvars columns is character/factor. In this early version of `survivalCCW`, only numeric variables are considered. Please make dummy vars on your own! :)")
62+
}
63+
64+
# Create weights
65+
df_1 <- generate_ccw_calc_weights(df[df$clone == 1L, ], event_times_df, predvars)
66+
67+
df_0 <- generate_ccw_calc_weights(df[df$clone == 0L, ], event_times_df, predvars)
68+
69+
# Combine
70+
df <- rbind(df_0, df_1)
71+
72+
# Check that all clones have weights
73+
if (any(is.na(df$weight_cox))) {
74+
stop("At least one clone is missing a weight. Please file a bug fix.")
75+
}
76+
77+
# Update class
78+
class(df) <- c("ccw_clones_long_weights", class(df))
79+
80+
# Remove rownames
81+
rownames(df) <- NULL
82+
83+
return(df)
84+
85+
}

‎R/generate_ccw_calc_weights.R

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#' Calculate weights for each arm of a long data.frame
2+
#'
3+
#' @param df the data.frame for a single arm
4+
#' @param event_times_df the event times data.frame
5+
#' @param predvars the baseline variables for adjustment
6+
#'
7+
#' @return a data.frame with weight columns included
8+
generate_ccw_calc_weights <- function(df, event_times_df, predvars) {
9+
10+
model_fmla <- stats::as.formula(
11+
paste0(
12+
"survival::Surv(t_start, t_stop, censor) ~ ",
13+
paste(predvars, collapse = " + ")
14+
)
15+
)
16+
17+
cens_model <- survival::coxph(model_fmla, data = df, ties = "efron")
18+
19+
#@TODO allow factors and carry forward through previous functions
20+
# ref_df <- setNames(data.frame(matrix(0, nrow = 1, ncol = length(predvars))), predvars)
21+
df$lp <- as.matrix(df[, predvars]) %*% stats::coef(cens_model)
22+
baseline_hazard <- data.frame(
23+
survival::basehaz(cens_model, centered = FALSE)
24+
)
25+
names(baseline_hazard) <- c("hazard", "t")
26+
27+
dat_base_times <- unique(
28+
merge(
29+
x = baseline_hazard,
30+
y = event_times_df,
31+
by.x = "t",
32+
by.y = "t_event",
33+
all.x = TRUE
34+
)
35+
)
36+
37+
df <- merge(
38+
x = df,
39+
y = dat_base_times,
40+
by = "time_id",
41+
all.x = TRUE
42+
)
43+
44+
df <- df[order(df$id,df$fup_outcome),]
45+
df$hazard <- ifelse(is.na(df$hazard), 0, df$hazard)
46+
df$p_uncens <- exp(-(df$hazard) * exp(df$lp))
47+
df$weight_cox <- 1 / df$p_uncens
48+
df$weight_cox[df$time_id == 0] <- 1
49+
50+
row.names(df) <- NULL
51+
return(df)
52+
}

‎README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# survivalCCW
2+
3+
This is a work-in-progress package that conducts clone censor weight analyses in R. Please use at your own risk. Consider filing a bug report or reaching out to [Matt](mailto:secrmatt@gmail.com) for questions/comments/suggestions.

‎cran-comments.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
## R CMD check results
2+
3+
0 errors | 0 warnings | 1 note
4+
5+
* This is a new release.

‎data/data_final.rda

92.8 KB
Binary file not shown.

‎data/data_long.rda

172 KB
Binary file not shown.

‎data/data_long_2.rda

194 KB
Binary file not shown.

‎data/data_long_cox.rda

398 KB
Binary file not shown.

‎data/tab.rda

1.55 KB
Binary file not shown.

‎data/toy_df.rda

3.52 KB
Binary file not shown.

‎man/cast_clones_to_long.Rd

+37
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/create_clones.Rd

+55
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/create_clones_check_inputs.Rd

+38
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/data_final.Rd

+32
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/data_long.Rd

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/data_long_2.Rd

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/data_long_cox.Rd

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/generate_ccw.Rd

+40
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/generate_ccw_calc_weights.Rd

+21
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/tab.Rd

+22
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎man/toy_df.Rd

+38
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎survivalCCW.Rproj

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
Version: 1.0
2+
3+
RestoreWorkspace: No
4+
SaveWorkspace: No
5+
AlwaysSaveHistory: Default
6+
7+
EnableCodeIndexing: Yes
8+
UseSpacesForTab: Yes
9+
NumSpacesForTab: 4
10+
Encoding: UTF-8
11+
12+
RnwWeave: Sweave
13+
LaTeX: pdfLaTeX
14+
15+
AutoAppendNewline: Yes
16+
StripTrailingWhitespace: Yes
17+
LineEndingConversion: Posix
18+
19+
BuildType: Package
20+
PackageUseDevtools: Yes
21+
PackageInstallArgs: --no-multiarch --with-keep.source
22+
PackageRoxygenize: rd,collate,namespace

‎tests/testthat.R

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# This file is part of the standard setup for testthat.
2+
# It is recommended that you do not modify it.
3+
#
4+
# Where should you do additional test configuration?
5+
# Learn more about the roles of various files in:
6+
# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
7+
# * https://testthat.r-lib.org/articles/special-files.html
8+
9+
library(testthat)
10+
library(survivalCCW)
11+
12+
test_check("survivalCCW")
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
test_that("casting clones requires ccw_clones class", {
2+
3+
df <- data.frame(
4+
id = c(1, 2, 3, 4),
5+
event = c(0L, 1L, 0L, 1L),
6+
time_to_event = c(100, 200, 100, 200),
7+
exposure = c(0L, 1L, 0L, 1L),
8+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
9+
)
10+
11+
expect_error(
12+
cast_clones_to_long(df)
13+
)
14+
15+
ccw_df <- create_clones(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
16+
17+
attributes(ccw_df)$id <- NULL
18+
19+
expect_error(
20+
cast_clones_to_long(ccw_df)
21+
)
22+
23+
ccw_df$outcome <- NULL
24+
25+
expect_error(
26+
cast_clones_to_long(ccw_df)
27+
)
28+
})
29+
30+
test_that("long format was created correctly", {
31+
32+
df_long <- toy_df |>
33+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
34+
cast_clones_to_long()
35+
36+
expect_true(TRUE)
37+
#@TODO more test cases
38+
})
39+
40+
41+
test_that("Compare results to Maringe", {
42+
43+
df <- toy_df |>
44+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
45+
cast_clones_to_long()
46+
47+
df <- df[order(df$id, df$clone, df$time_id),]
48+
row.names(df) <- NULL
49+
50+
data_final <- data_final[order(data_final$id, data_final$clone, data_final$time_id),]
51+
row.names(data_final) <- NULL
52+
53+
for (col in names(df)) {
54+
expect_equal(
55+
df[[col]],
56+
data_final[[col]],
57+
tolerance = 1e-6
58+
)
59+
}
60+
61+
})
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
test_that("input types correct", {
2+
3+
df <- data.frame(
4+
id = c(1, 2, 3, 4),
5+
event = c(0L, 1L, 0L, 1L),
6+
time_to_event = c(100, 200, 100, 200),
7+
exposure = c(0L, 1L, 0L, 1L),
8+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
9+
)
10+
11+
expect_error(
12+
create_clones_check_inputs(df, id = id, event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
13+
)
14+
15+
expect_error(
16+
create_clones_check_inputs(df, id = "id", event = df$event, time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
17+
)
18+
19+
expect_error(
20+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = 2.0, exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
21+
)
22+
23+
expect_error(
24+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = "ced_window")
25+
)
26+
27+
})
28+
29+
test_that("columns are in data", {
30+
31+
df <- data.frame(
32+
id = c(1, 2, 3, 4),
33+
event = c(0L, 1L, 0L, 1L),
34+
time_to_event = c(100, 200, 100, 200),
35+
exposure = c(0L, 1L, 0L, 1L),
36+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
37+
)
38+
39+
expect_error(
40+
create_clones_check_inputs(df, id = "hamburger", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
41+
)
42+
43+
expect_error(
44+
create_clones_check_inputs(df, id = "id", event = "sausage", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
45+
)
46+
47+
expect_error(
48+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "pumpkin", time_to_exposure = "time_to_exposure", ced_window = 200)
49+
)
50+
51+
})
52+
53+
test_that("no missing data exists in these columns (other than time to exposure)", {
54+
55+
df <- data.frame(
56+
id = c(1, 2, 3, 4),
57+
event = c(0L, 1L, NA_integer_, 1L),
58+
time_to_event = c(100, 200, 100, 200),
59+
exposure = c(0L, 1L, 0L, 1L),
60+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
61+
)
62+
63+
expect_error(
64+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
65+
)
66+
67+
})
68+
69+
test_that("the same column name is not passed >1 time", {
70+
71+
df <- data.frame(
72+
id = c(1, 2, 3, 4),
73+
event = c(0L, 1L, 0L, 1L),
74+
time_to_event = c(100, 200, 100, 200),
75+
exposure = c(0L, 1L, 0L, 1L),
76+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
77+
)
78+
79+
expect_error(
80+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
81+
)
82+
83+
})
84+
85+
test_that("the respective columns have the right classes", {
86+
87+
df <- data.frame(
88+
id = c(1, 2, 3, 4),
89+
event = c('ham', 'is', 'good', 'food'),
90+
time_to_event = c(100, 200, 100, 200),
91+
exposure = c(0L, 1L, 0L, 1L),
92+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
93+
)
94+
95+
expect_error(
96+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
97+
)
98+
99+
df <- data.frame(
100+
id = c(1, 2, 3, 4),
101+
event = c(1.0, 1.0, 0.0, 1.0),
102+
time_to_event = c(100, 200, 100, 200),
103+
exposure = c(0L, 1L, 0L, 1L),
104+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
105+
)
106+
107+
expect_error(
108+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
109+
)
110+
111+
df <- data.frame(
112+
id = c(1, 2, 3, 4),
113+
event = c(0L, 1L, 0L, 1L),
114+
time_to_event = c(100, 200, 100, 200),
115+
exposure = c(1.0, 1.0, 0.0, 1.0),
116+
time_to_exposure = c(2.2, 2.3, NA_real_, 3.3)
117+
)
118+
119+
expect_error(
120+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
121+
)
122+
123+
124+
df <- data.frame(
125+
id = c(1, 2, 3, 4),
126+
event = c(0L, 1L, 0L, 1L),
127+
time_to_event = c('ham', 'is', 'great', 'food'),
128+
exposure = c(0L, 1L, 0L, 1L),
129+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
130+
)
131+
132+
expect_error(
133+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
134+
)
135+
136+
df <- data.frame(
137+
id = c(1, 2, 3, 4),
138+
event = c(0L, 1L, 0L, 1L),
139+
time_to_event = c(100, 200, 100, 200),
140+
exposure = c(0L, 1L, 0L, 1L),
141+
time_to_exposure = factor(c('ham', 'is', 'great', 'food'))
142+
)
143+
144+
expect_error(
145+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
146+
)
147+
148+
})
149+
150+
test_that("for all pts with an exposure, time to exposure is complete", {
151+
152+
df <- data.frame(
153+
id = c(1, 2, 3, 4),
154+
event = c(0L, 1L, 0L, 1L),
155+
time_to_event = c(100, 200, 100, 200),
156+
exposure = c(T, T, F, T),
157+
time_to_exposure = c(1.1, NA_real_, NA_real_, 3.3)
158+
)
159+
160+
expect_error(
161+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
162+
)
163+
164+
df <- data.frame(
165+
id = c(1, 2, 3, 4),
166+
event = c(0L, 1L, 0L, 1L),
167+
time_to_event = c(100, 200, 100, 200),
168+
exposure = c(T, T, F, T),
169+
time_to_exposure = c(1.1, 3.3, 9.4, 3.3)
170+
)
171+
172+
expect_error(
173+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
174+
)
175+
176+
})
177+
178+
test_that("protected column names are blocked", {
179+
180+
df <- data.frame(
181+
id = c(1, 2, 3, 4),
182+
clone = c(0L, 1L, 0L, 1L),
183+
time_to_event = c(100, 200, 100, 200),
184+
exposure = c(T, T, F, T),
185+
time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
186+
)
187+
188+
expect_error(
189+
create_clones_check_inputs(df, id = "id", event = "clone", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
190+
)
191+
192+
df <- data.frame(
193+
id = c(1, 2, 3, 4),
194+
event = c(0L, 1L, 0L, 1L),
195+
time_to_event = c(100, 200, 100, 200),
196+
censor = c(T, T, F, T),
197+
time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
198+
)
199+
200+
expect_error(
201+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "censor", time_to_exposure = "time_to_exposure", ced_window = 200)
202+
)
203+
204+
})
205+
206+
test_that("exposure and event are just 0/1 or T/F", {
207+
208+
df <- data.frame(
209+
id = c(1, 2, 3, 4),
210+
event = c(0L, 1L, 2L, 3L),
211+
time_to_event = c(100, 200, 100, 200),
212+
exposure = c(1L, 1L, 0L, 1L),
213+
time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
214+
)
215+
216+
expect_error(
217+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
218+
)
219+
220+
df <- data.frame(
221+
id = c(1, 2, 3, 4),
222+
event = c(0L, 0L, 0L, 0L),
223+
time_to_event = c(100, 200, 100, 200),
224+
exposure = c(0L, 1L, 2L, 3L),
225+
time_to_exposure = c(NA_real_, 2.2, 3.3, 3.3)
226+
)
227+
228+
expect_error(
229+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 200)
230+
)
231+
232+
})
233+
234+
test_that("No outcomes before exposure dates", {
235+
236+
df <- data.frame(
237+
id = c(1, 2, 3, 4),
238+
event = c(1L, 1L, 0L, 0L),
239+
time_to_event = c(1, 2, 1, 2),
240+
exposure = c(1L, 1L, 0L, 1L),
241+
time_to_exposure = c(1.1, 3.3, NA_real_, 3.3)
242+
)
243+
244+
expect_error(
245+
create_clones_check_inputs(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 1.0)
246+
)
247+
248+
})

‎tests/testthat/test-create_clones.R

+257
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
test_that("CED trims time to exposure appropriately", {
2+
3+
df <- data.frame(
4+
id = c(1, 2, 3, 4),
5+
event = c(0L, 1L, 0L, 1L),
6+
time_to_event = c(200L, 250L, 9900L, 100L),
7+
exposure = c(0L, 1L, 0L, 1L),
8+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
9+
)
10+
11+
expect_message(
12+
ccw_df <- create_clones(df, id = "id", event = "event", time_to_event = "time_to_event", exposure = "exposure", time_to_exposure = "time_to_exposure", ced_window = 2.5),
13+
"Updating 1 patients' exposure and time-to-exposure based on CED window"
14+
)
15+
16+
expect_equal(0L, unique(ccw_df[ccw_df$id==4, "exposure"]))
17+
18+
})
19+
20+
21+
test_that("Spot check that outcomes are correctly assigned", {
22+
23+
df <- data.frame(
24+
id = 1:6,
25+
event = c(1L, 1L, 1L, 1L, 1L, 1L),
26+
time_to_event = c(10, 100, 100, 10, 100, 100),
27+
exposure = c(rep(0L, 3), rep(1L, 3)),
28+
time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
29+
)
30+
31+
ccw_df <- create_clones(df,
32+
id = "id",
33+
event = "event",
34+
time_to_event = "time_to_event",
35+
exposure = "exposure",
36+
time_to_exposure = "time_to_exposure",
37+
ced_window = 20)
38+
39+
# Exposed clones
40+
expect_equal(
41+
ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "outcome"],
42+
1L
43+
)
44+
45+
expect_equal(
46+
ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "fup_outcome"],
47+
10
48+
)
49+
50+
expect_equal(
51+
ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "outcome"],
52+
1L
53+
)
54+
55+
expect_equal(
56+
ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "fup_outcome"],
57+
10
58+
)
59+
60+
expect_equal(
61+
ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "outcome"],
62+
0L
63+
)
64+
65+
expect_equal(
66+
ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "fup_outcome"],
67+
20
68+
)
69+
70+
# Unexposed clones
71+
# Exposed clones
72+
expect_equal(
73+
ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "outcome"],
74+
0L
75+
)
76+
77+
expect_equal(
78+
ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "fup_outcome"],
79+
2
80+
)
81+
82+
expect_equal(
83+
ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "outcome"],
84+
1L
85+
)
86+
87+
expect_equal(
88+
ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "fup_outcome"],
89+
10
90+
)
91+
92+
expect_equal(
93+
ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "outcome"],
94+
1L
95+
)
96+
97+
expect_equal(
98+
ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "fup_outcome"],
99+
100
100+
)
101+
102+
})
103+
104+
105+
test_that("Spot check that censoring statuses are correctly assigned",{
106+
107+
df <- data.frame(
108+
id = 1:6,
109+
event = c(1L, 1L, 1L, 1L, 1L, 1L),
110+
time_to_event = c(10, 100, 100, 10, 100, 100),
111+
exposure = c(rep(0L, 3), rep(1L, 3)),
112+
time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
113+
)
114+
115+
ccw_df <- create_clones(df,
116+
id = "id",
117+
event = "event",
118+
time_to_event = "time_to_event",
119+
exposure = "exposure",
120+
time_to_exposure = "time_to_exposure",
121+
ced_window = 20)
122+
123+
# Exposed clones
124+
expect_equal(
125+
ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "censor"],
126+
0L
127+
)
128+
129+
expect_equal(
130+
ccw_df[ccw_df$id==4 & ccw_df$clone == 1, "fup_censor"],
131+
2
132+
)
133+
134+
expect_equal(
135+
ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "censor"],
136+
0L
137+
)
138+
139+
expect_equal(
140+
ccw_df[ccw_df$id==1 & ccw_df$clone == 1, "fup_censor"],
141+
10
142+
)
143+
144+
expect_equal(
145+
ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "censor"],
146+
1L
147+
)
148+
149+
expect_equal(
150+
ccw_df[ccw_df$id==2 & ccw_df$clone == 1, "fup_censor"],
151+
20
152+
)
153+
154+
# Unexposed clones
155+
# Exposed clones
156+
expect_equal(
157+
ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "censor"],
158+
1L
159+
)
160+
161+
expect_equal(
162+
ccw_df[ccw_df$id==4 & ccw_df$clone == 0, "fup_censor"],
163+
2
164+
)
165+
166+
expect_equal(
167+
ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "censor"],
168+
0L
169+
)
170+
171+
expect_equal(
172+
ccw_df[ccw_df$id==1 & ccw_df$clone == 0, "fup_censor"],
173+
10
174+
)
175+
176+
expect_equal(
177+
ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "censor"],
178+
0L
179+
)
180+
181+
expect_equal(
182+
ccw_df[ccw_df$id==2 & ccw_df$clone == 0, "fup_censor"],
183+
20
184+
)
185+
186+
})
187+
188+
test_that("Compare results to Maringe", {
189+
190+
df <- toy_df |>
191+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62)
192+
df <- df[order(df$id, df$clone),]
193+
194+
tab <- tab[order(tab$id, tab$clone),]
195+
196+
# Compare each
197+
for (col in c("outcome", "fup_outcome", "censor", "fup_censor")) {
198+
row.names(df) <- NULL
199+
row.names(tab) <- NULL
200+
expect_equal(
201+
df[[col]],
202+
tab[[col]],
203+
tolerance = 1e-6
204+
)
205+
}
206+
207+
})
208+
209+
test_that("attribute are passed correctly", {
210+
211+
df <- data.frame(
212+
id = 1:6,
213+
event = c(1L, 1L, 1L, 1L, 1L, 1L),
214+
time_to_event = c(10, 100, 100, 10, 100, 100),
215+
exposure = c(rep(0L, 3), rep(1L, 3)),
216+
time_to_exposure = c(rep(NA_real_, 3), 2, 8, 12)
217+
)
218+
219+
ccw_df <- create_clones(df,
220+
id = "id",
221+
event = "event",
222+
time_to_event = "time_to_event",
223+
exposure = "exposure",
224+
time_to_exposure = "time_to_exposure",
225+
ced_window = 20)
226+
227+
expect_equal(
228+
attributes(ccw_df)$id,
229+
"id"
230+
)
231+
232+
expect_equal(
233+
attributes(ccw_df)$event,
234+
"event"
235+
)
236+
237+
expect_equal(
238+
attributes(ccw_df)$time_to_event,
239+
"time_to_event"
240+
)
241+
242+
expect_equal(
243+
attributes(ccw_df)$exposure,
244+
"exposure"
245+
)
246+
247+
expect_equal(
248+
attributes(ccw_df)$time_to_exposure,
249+
"time_to_exposure"
250+
)
251+
252+
expect_equal(
253+
attributes(ccw_df)$ced_window,
254+
20
255+
)
256+
257+
})

‎tests/testthat/test-generate_ccw.R

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
2+
test_that("weights are adequately calculated compared to Maringe", {
3+
4+
df <- toy_df |>
5+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
6+
cast_clones_to_long() |>
7+
generate_ccw(predvars = c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency"))
8+
9+
df <- df[order(df$id, df$clone, df$time_id),]
10+
row.names(df) <- NULL
11+
12+
# Compare exposed
13+
data_long_cox <- data_long_cox[order(data_long_cox$id, data_long_cox$clone, data_long_cox$time_id),]
14+
row.names(data_long_cox) <- NULL
15+
16+
# Compare all columns
17+
for (col in colnames(data_long_cox)[colnames(data_long_cox) %in% colnames(df)]) {
18+
row.names(df[[col]]) <- NULL
19+
row.names(data_long_cox[[col]]) <- NULL
20+
expect_equal(
21+
df[[col]],
22+
data_long_cox[[col]],
23+
tolerance = 1e-6
24+
)
25+
}
26+
27+
cox_df <- survival::coxph(survival::Surv(t_start, t_stop, outcome) ~ clone, data = df, weights = weight_cox)
28+
cox_data_long_cox <- survival::coxph(survival::Surv(t_start, t_stop, outcome) ~ clone, data = data_long_cox, weights = weight_cox)
29+
30+
expect_equal(cox_df$coefficients, cox_data_long_cox$coefficients, tolerance = 1e-6)
31+
32+
})
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
test_that("casting clones requires ccw_clones_long class", {
2+
3+
df <- data.frame(
4+
id = c(1, 2, 3, 4),
5+
event = c(0L, 1L, 0L, 1L),
6+
time_to_event = c(100, 200, 100, 200),
7+
exposure = c(0L, 1L, 0L, 1L),
8+
time_to_exposure = c(NA_real_, 2.3, NA_real_, 3.3)
9+
)
10+
11+
expect_error(
12+
generate_ccw_on_long_df(df)
13+
)
14+
15+
df_long <- toy_df |>
16+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
17+
cast_clones_to_long()
18+
19+
attributes(df_long)$id <- NULL
20+
21+
expect_error(
22+
generate_ccw_on_long_df(df_long)
23+
)
24+
25+
df_long$t_start <- NULL
26+
27+
expect_error(
28+
generate_ccw_on_long_df(df_long)
29+
)
30+
})
31+
32+
test_that("when predvars columns are missing, an error is thrown", {
33+
34+
df_long <- toy_df |>
35+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
36+
cast_clones_to_long()
37+
38+
expect_error(
39+
generate_ccw_on_long_df(df_long, predvars = "hamburger")
40+
)
41+
42+
expect_error(
43+
generate_ccw_on_long_df(df_long, predvars = NULL)
44+
)
45+
46+
})
47+
48+
test_that("categorical vars are dealt with", {
49+
50+
toy_df_s <- toy_df
51+
toy_df_s$sandwich <- rep(c("ham", "turkey", "cheese"), length.out = NROW(toy_df_s))
52+
53+
df_long <- toy_df_s |>
54+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 365.25/2) |>
55+
cast_clones_to_long()
56+
57+
expect_error(
58+
generate_ccw_on_long_df(df_long, predvars = "sandwich")
59+
)
60+
61+
df_long$sandwich_f <- factor(df_long$sandwich)
62+
63+
expect_error(
64+
generate_ccw_on_long_df(df_long, predvars = "sandwich_f")
65+
)
66+
67+
expect_error(
68+
generate_ccw_on_long_df(df_long, predvars = NULL)
69+
)
70+
71+
})
72+
73+
test_that("weights are adequately calculated compared to Maringe", {
74+
75+
df <- toy_df |>
76+
create_clones(id = "id", event = "death", time_to_event = "fup_obs", exposure = "surgery", time_to_exposure = "timetosurgery", ced_window = 182.62) |>
77+
cast_clones_to_long()
78+
79+
id <- attributes(df)$id
80+
event <- attributes(df)$event
81+
exposure <- attributes(df)$exposure
82+
time_to_event <- attributes(df)$time_to_event
83+
time_to_exposure <- attributes(df)$time_to_exposure
84+
ced_window <- attributes(df)$ced_window
85+
event_times_df <- attributes(df)$event_times_df
86+
87+
# Create weights
88+
predvars <- c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency")
89+
90+
df_1 <- generate_ccw_calc_weights(df[df$clone == 1L, ], event_times_df, predvars)
91+
df_1 <- df_1[order(df_1$id, df_1$time_id),]
92+
row.names(df_1) <- NULL
93+
94+
df_0 <- generate_ccw_calc_weights(df[df$clone == 0L, ], event_times_df, predvars)
95+
df_0 <- df_0[order(df_0$id, df_0$time_id),]
96+
row.names(df_0) <- NULL
97+
98+
# Compare exposed
99+
data_long <- data_long[order(data_long$id, data_long$time_id),]
100+
row.names(data_long) <- NULL
101+
102+
data_long_2 <- data_long_2[order(data_long_2$id, data_long_2$time_id),]
103+
row.names(data_long_2) <- NULL
104+
105+
# Compare all columns
106+
for (col in c("time_id", "lp", "t", "hazard", "p_uncens", "weight_cox")) {
107+
row.names(df_1[[col]]) <- NULL
108+
row.names(data_long[[col]]) <- NULL
109+
expect_equal(
110+
df_1[[col]],
111+
data_long[[col]],
112+
tolerance = 1e-6
113+
)
114+
}
115+
116+
# Compare all columns
117+
for (col in c("time_id", "lp", "t", "hazard", "p_uncens", "weight_cox")) {
118+
row.names(df_0[[col]]) <- NULL
119+
row.names(data_long_2[[col]]) <- NULL
120+
expect_equal(
121+
df_0[[col]],
122+
data_long_2[[col]],
123+
tolerance = 1e-6
124+
)
125+
}
126+
127+
})
128+

‎vignettes/.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.html
2+
*.R

‎vignettes/conduct-ccw-analysis.Rmd

+222
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
---
2+
title: "How to Conduct a Clone Censor-Weight Survival Analysis using survivalCCW"
3+
author: Matthew Secrest
4+
output: rmarkdown::html_vignette
5+
vignette: >
6+
%\VignetteIndexEntry{How to Conduct a Clone Censor-Weight Survival Analysis using survivalCCW}
7+
%\VignetteEngine{knitr::rmarkdown}
8+
%\VignetteEncoding{UTF-8}
9+
---
10+
11+
```{r, include = FALSE}
12+
knitr::opts_chunk$set(
13+
collapse = TRUE,
14+
comment = "#>"
15+
)
16+
```
17+
18+
19+
# Clone Censor Weighting
20+
21+
This lightweight package describes how to conduct clone-censor weighting (CCW) to address the problem of immortal time bias in survival analysis. This vignette will walk through the applied tutorial published by [Maringe et al 2020](https://academic.oup.com/ije/article/49/5/1719/5835351). Refer to [Hernan and Robins 2016](https://doi.org/10.1093/aje/kwv254) and [Hernan et al 2016](https://doi.org/10.1016/j.jclinepi.2016.04.014) for more technical details.
22+
23+
# Context
24+
25+
CCW is useful in the presence of immortal person-time bias in observational studies. For instance, when comparing surgery recipients vs non-recipients in non-small cell lung cancer (NSCLC), the surgery group will have a longer survival time than the non-surgery group because the non-surgery group includes patients who died before they could receive surgery. This is a form of immortal time bias.
26+
27+
The CCW toy dataset published by Maringe uses this exact setting as the motivating example. Let's explore the dataset, which comes with `survivalCCW`.
28+
29+
```{r}
30+
library(survivalCCW)
31+
head(toy_df)
32+
```
33+
34+
Column descriptions can be found with `?toy_df`:
35+
36+
- `id`: patient identifier
37+
- `fup_obs`: observed follow-up time (time to death or 1 year if censored alive)
38+
- `death`: observed event of interest (all-cause death) 1: dead, 0: alive
39+
- `timetosurgery`: time to surgery (NA if no surgery)
40+
- `age`: age at diagnosis
41+
- `sex`: patient's sex
42+
- `perf`: performance status at diagnosis
43+
- `stage`: stage at diagnosis
44+
- `deprivation`: deprivation score
45+
- `charlson`: Charlson's comorbidity index
46+
- `emergency`: route to diagnosis
47+
48+
Note that this package addresses situations in which the covariates are all defined at baseline.
49+
50+
# Create clones
51+
52+
The first step is to create the clones. This can be done for any time-to-event outcome using the `survivalCCW` function `create_clones`. For `create_clones` to work, we need to pass a one-row-per-patient `data.frame` with the following columns:
53+
54+
- The id variable (in this case, `id`)
55+
- The traditional outcome variable which denotes censorship (0) or event (1) (in this case, `death`). Note that additional values are not yet permitted.
56+
- The time to the first event (in this case, `fup_obs`)
57+
- The exposure variable, with exposure defined **at any time prior to censorship/event** (in this case, `surgery`). Must be (0) or (1),
58+
- The time to exposure variable (in this case, `timetosurgery`)
59+
- The clinical eligibility window (in this case, we'll do 6 months)
60+
61+
All other columns will be propogated for each patient. Let's see what this looks like in practice.
62+
63+
```{r}
64+
65+
# Create clones
66+
clones <- create_clones(
67+
df = toy_df,
68+
id = 'id',
69+
event = 'death',
70+
time_to_event = 'fup_obs',
71+
exposure = 'surgery',
72+
time_to_exposure = 'timetosurgery',
73+
ced_window = 182.62
74+
)
75+
76+
head(clones)
77+
```
78+
79+
80+
Note that this object is just a `data.frame` with an additional custom class which future functions will evaluate:
81+
```{r}
82+
class(clones)
83+
```
84+
85+
86+
# Cast to long format
87+
Now we simply need to cast the data to long format. The `survivalCCW` function `cast_to_long` will do this for us.
88+
No additional arguments are needed (the `clones` object is an artifact that allows you to better see and understand the method):
89+
90+
```{r}
91+
clones_long <- cast_clones_to_long(clones)
92+
93+
head(clones_long, row.names = FALSE)
94+
```
95+
96+
Let's pick out a single patient and look at their data:
97+
98+
```{r}
99+
print(clones_long[clones_long$id == "P5913", ], row.names = FALSE)
100+
```
101+
102+
# Generate weights
103+
104+
Now we simply need to generate the weights. The `survivalCCW` function `generate_ccw()` will do this for us.
105+
106+
```{r}
107+
clones_long_weights <- generate_ccw(clones_long, predvars = c("age", "sex", "perf", "stage", "deprivation", "charlson", "emergency"))
108+
109+
head(clones_long_weights, row.names = FALSE)
110+
```
111+
112+
Let's pick out a single patient and look at their data:
113+
114+
```{r}
115+
print(clones_long_weights[clones_long_weights$id == "P5913", ], row.names = FALSE)
116+
```
117+
118+
# Evaluate outcomes
119+
120+
We now have everything we need to conduct a CCW analysis. For instance, we can pipe things together to evaluate the hazard ratio for surgery vs no surgery:
121+
122+
```{r}
123+
library(survival)
124+
df <- toy_df |>
125+
create_clones(
126+
id = 'id',
127+
event = 'death',
128+
time_to_event = 'fup_obs',
129+
exposure = 'surgery',
130+
time_to_exposure = 'timetosurgery',
131+
ced_window = 365.25/2
132+
) |>
133+
cast_clones_to_long() |>
134+
generate_ccw(c('age', 'sex', 'perf', 'stage', 'deprivation', 'charlson', 'emergency'))
135+
136+
coxph(Surv(t_start, t_stop, outcome) ~ clone, data = df, weights = weight_cox)
137+
138+
```
139+
140+
Note that we used `outcome` and not `death` in the `coxph()` model. Still, there is of course a problem with this analysis, as the cloning process renders the variance invalid. The simplest approach to addressing this is
141+
to bootstrap the variance. I have not made a function to do this yet, but leave the below as an example of how to do this.
142+
143+
```{r}
144+
library(boot)
145+
146+
boot_cox <- function(data, indices) {
147+
148+
# Make long data.frame with weights
149+
ccw_df <- data[indices, ] |>
150+
create_clones(
151+
id = 'id',
152+
event = 'death',
153+
time_to_event = 'fup_obs',
154+
exposure = 'surgery',
155+
time_to_exposure = 'timetosurgery',
156+
ced_window = 182.62
157+
) |>
158+
cast_clones_to_long() |>
159+
generate_ccw(c('age', 'sex', 'perf', 'stage', 'deprivation', 'charlson', 'emergency'))
160+
161+
162+
# Extract HR from CoxPH
163+
cox_ccw <- coxph(Surv(t_start, t_stop, outcome) ~ clone, data = ccw_df, weights = weight_cox)
164+
165+
hr <- cox_ccw |>
166+
coef() |>
167+
exp()
168+
169+
out <- c("hr" = hr)
170+
171+
# Create survfit objects for each of treated and untreated
172+
surv_1 <- survfit(Surv(t_start, t_stop, outcome) ~ 1L, data = ccw_df[ccw_df$clone == 1, ], weights = weight_cox)
173+
174+
surv_0 <- survfit(Surv(t_start, t_stop, outcome) ~ 1L, data = ccw_df[ccw_df$clone == 0, ], weights = weight_cox)
175+
176+
# RMST difference
177+
rmst_1 <- surv_1 |>
178+
summary(rmean = 365) |>
179+
(\(summary) summary$table)() |>
180+
(\(table) table["rmean"])()
181+
182+
rmst_0 <- surv_0 |>
183+
summary(rmean = 365) |>
184+
(\(summary) summary$table)() |>
185+
(\(table) table["rmean"])()
186+
187+
rmst_diff <- rmst_1 - rmst_0
188+
189+
out <- c(out, "rmst_diff" = rmst_diff)
190+
191+
# 1-year survival difference
192+
# Find the index of the time point closest to 1 year
193+
index_1yr_1 <- which.min(abs(surv_1$time - 365))
194+
index_1yr_0 <- which.min(abs(surv_0$time - 365))
195+
196+
# Get the 1-year survival probabilities
197+
surv_1_1yr <- surv_1$surv[index_1yr_1]
198+
surv_0_1yr <- surv_0$surv[index_1yr_0]
199+
200+
surv_diff_1yr <- surv_1_1yr - surv_0_1yr
201+
202+
out <- c(out, "surv_diff_1yr" = surv_diff_1yr)
203+
204+
}
205+
206+
boot_out <- boot(data = toy_df, statistic = boot_cox, R = 10)
207+
```
208+
209+
## Hazard ratios
210+
```{r}
211+
boot.ci(boot_out, type = "norm", index = 1)
212+
```
213+
214+
## RMST
215+
```{r}
216+
boot.ci(boot_out, type = "norm", index = 2)
217+
```
218+
219+
## 1-year survival
220+
```{r}
221+
boot.ci(boot_out, type = "norm", index = 3)
222+
```

0 commit comments

Comments
 (0)
Please sign in to comment.