Appendix B — The Love-431.R script

B.1 R setup for this appendix

Note

Appendix A lists all R packages and data sets used in the book, and also provides R session information.

B.2 Contents of Love-431.R script

B.3 The lovedist() function

lovedist <- function(x) {
  tibble::tibble(
    n = length(x),
    miss = sum(is.na(x)),
    mean = mean(x, na.rm = TRUE),
    sd = sd(x, na.rm = TRUE),
    med = median(x, na.rm = TRUE),
    mad = mad(x, na.rm = TRUE),
    min = min(x, na.rm = TRUE),
    q25 = quantile(x, 0.25, na.rm = TRUE),
    q75 = quantile(x, 0.75, na.rm = TRUE),
    max = max(x, na.rm = TRUE),
  )
}

B.3.1 Sample Use

palmerpenguins::penguins |>
  reframe(lovedist(bill_depth_mm))
# A tibble: 1 × 10
      n  miss  mean    sd   med   mad   min   q25   q75   max
  <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1   344     2  17.2  1.97  17.3  2.22  13.1  15.6  18.7  21.5
palmerpenguins::penguins |>
  reframe(lovedist(bill_depth_mm), .by = species)
# A tibble: 3 × 11
  species       n  miss  mean    sd   med   mad   min   q25   q75   max
  <fct>     <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Adelie      152     1  18.3 1.22   18.4  1.19  15.5  17.5  19    21.5
2 Gentoo      124     1  15.0 0.981  15    1.19  13.1  14.2  15.7  17.3
3 Chinstrap    68     0  18.4 1.14   18.4  1.41  16.4  17.5  19.4  20.8

B.4 The twobytwo function

twobytwo <-
  function(a, b, c, d,
           namer1 = "Row1", namer2 = "Row2",
           namec1 = "Col1", namec2 = "Col2",
           conf.level = 0.95)
           # build 2 by 2 table and run Epi library's twoby2 command to summarize
           # from the row-by-row counts in a cross-tab
           # upper left cell is a, upper right is b,
           # lower left is c, lower right is d
           # names are then given in order down the rows then across the columns
           # use standard epidemiological format:
  # outcomes in columns, treatments in rows
  {
    .Table <- matrix(c(a, b, c, d), 2, 2,
      byrow = T,
      dimnames = list(
        c(namer1, namer2),
        c(namec1, namec2)
      )
    )
    Epi::twoby2(.Table, alpha = 1 - conf.level)
  }

B.4.1 Sample Use

twobytwo(23, 14, 12, 9, "Treatment 1", "Treatment 2", "Out 1", "Out 2",
  conf.level = 0.90
)
2 by 2 table analysis: 
------------------------------------------------------ 
Outcome   : Out 1 
Comparing : Treatment 1 vs. Treatment 2 

            Out 1 Out 2    P(Out 1) 90% conf. interval
Treatment 1    23    14      0.6216    0.4847   0.7415
Treatment 2    12     9      0.5714    0.3923   0.7336

                                   90% conf. interval
             Relative Risk: 1.0878    0.7472   1.5839
         Sample Odds Ratio: 1.2321    0.4936   3.0759
Conditional MLE Odds Ratio: 1.2277    0.4265   3.5045
    Probability difference: 0.0502   -0.1587   0.2620

             Exact P-value: 0.7834 
        Asymptotic P-value: 0.7074 
------------------------------------------------------

B.5 The saifs_ci() function

saifs_ci <-
  function(x, n, conf.level = 0.95, dig = 3) {
    p.sample <- round(x / n, digits = dig)

    p1 <- x / (n + 1)
    p2 <- (x + 1) / (n + 1)

    var1 <- (p1 * (1 - p1)) / n
    se1 <- sqrt(var1)
    var2 <- (p2 * (1 - p2)) / n
    se2 <- sqrt(var2)

    lowq <- (1 - conf.level) / 2
    tcut <- qt(lowq, df = n - 1, lower.tail = FALSE)

    lower.bound <- round(p1 - tcut * se1, digits = dig)
    upper.bound <- round(p2 + tcut * se2, digits = dig)
    tibble(
      sample_x = x,
      sample_n = n,
      sample_p = p.sample,
      lower = lower.bound,
      upper = upper.bound,
      conf_level = conf.level
    )
  }

B.5.1 Sample Usage

saifs_ci(x = 19, n = 25, conf.level = 0.95)
# A tibble: 1 × 6
  sample_x sample_n sample_p lower upper conf_level
     <dbl>    <dbl>    <dbl> <dbl> <dbl>      <dbl>
1       19       25     0.76 0.548 0.943       0.95