-
Notifications
You must be signed in to change notification settings - Fork 0
/
Stat_boxplot_custom.R
164 lines (142 loc) · 8.28 KB
/
Stat_boxplot_custom.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# modified from
# https://github.com/tidyverse/ggplot2/blob/master/R/stat-boxplot.r
# now takes qs argument instead of coef to extend the whiskers to a specific
# percentile
#' A box and whiskers plot that uses percentiles instead of IQR
#'
#' `stat_boxplot_custom()` modifies [ggplot2::stat_boxplot()] so that it
#' computes the extents of the whiskers based on specified percentiles, rather
#' than a multiple of the IQR.
#'
#' The upper and lower whiskers extend to the first, and last entries of the
#' `qs` parameter, respectively. Data beyond the whiskers are "outliers".
#'
#' @param qs The percentiles that are used to create the lower whisker, lower
#' hinge, middle bar, upper hinge, and upper whisker, respectively. The lower
#' and upper whiskers default to the 5th and 95th percentiles. The hinges
#' default to the 25th and 75th percentiles, and the middle bar defaults to
#' the median. These values should be in increasing order, i.e., the whisker
#' should be a smaller percentile than the lower hinge, and can only span the
#' values from 0 to 1 (inclusive).
#'
#' @inheritParams ggplot2::stat_boxplot
#'
#' @examples
#'
#' library(ggplot2)
#' p <- ggplot(mpg, aes(class, hwy))
#'
#' # show whiskers at 5th and 95th percentiles
#' p + stat_boxplot_custom(qs = c(.05, .25, .5, .75, .95))
#'
#' # show whiskers at 10th and 90th percentiles
#' p + stat_boxplot_custom(qs = c(.1, .25, .5, .75, .9))
#'
#' @export
#' ## Fixing boxplots
# Code adapted from: https://rdrr.io/github/BoulderCodeHub/CRSSIO/src/R/stat_boxplot_custom.R
# Full explanation can be found in the stat_boxplot_custom.R file
stat_boxplot_custom <- function(mapping = NULL, data = NULL,
geom = "boxplot", position = "dodge",
...,
qs = c(.05, .25, 0.5, 0.75, 0.95),
na.rm = FALSE,
orientation = NA,
show.legend = NA,
inherit.aes = TRUE) {
assert_that(
length(qs) == 5 && is.numeric(qs),
msg = "`qs` should be a numeric vector with 5 values."
)
assert_that(
all(qs == sort(qs)),
msg = "`qs` should be provided in ascending order."
)
assert_that(
all(qs <= 1) && all(qs >= 0),
msg = "`qs` should only span values [0, 1]."
)
ggplot2::layer(
data = data,
mapping = mapping,
stat = StatBoxplotCustom,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list(
na.rm = na.rm,
orientation = orientation,
qs = qs,
...
)
)
}
StatBoxplotCustom <- ggplot2::ggproto("StatBoxplotCustom", ggplot2::Stat,
required_aes = c("y|x"),
non_missing_aes = "weight",
setup_data = function(data, params) {
data <- ggplot2::flip_data(data, params$flipped_aes)
data$x <- ggplot2:::"%||%"(data$x, 0)
data <- ggplot2::remove_missing(
data,
na.rm = params$na.rm,
vars = "x",
name = "stat_boxplot_custom"
)
ggplot2::flip_data(data, params$flipped_aes)
},
setup_params = function(data, params) {
params$flipped_aes <- ggplot2::has_flipped_aes(data, params,
main_is_orthogonal = TRUE,
group_has_equal = TRUE,
main_is_optional = TRUE)
data <- ggplot2::flip_data(data, params$flipped_aes)
has_x <- !(is.null(data$x) && is.null(params$x))
has_y <- !(is.null(data$y) && is.null(params$y))
if (!has_x && !has_y) {
abort("stat_boxplot() requires an x or y aesthetic.")
}
params$width <- ggplot2:::"%||%"(
params$width,
(ggplot2::resolution(ggplot2:::"%||%"(data$x, 0) * 0.75))
)
if (is.double(data$x) && !ggplot2:::has_groups(data) && any(data$x != data$x[1L])) {
rlang::warn(glue::glue(
"Continuous {flipped_names(params$flipped_aes)$x} aesthetic -- did you forget aes(group=...)?"
))
}
params
},
extra_params = c("na.rm", "orientation"),
compute_group = function(data, scales, width = NULL, na.rm = FALSE,
qs = c(.05, .25, 0.5, 0.75, 0.95), flipped_aes = FALSE) {
data <- ggplot2::flip_data(data, flipped_aes)
if (!is.null(data$weight)) {
mod <- quantreg::rq(y ~ 1, weights = weight, data = data, tau = qs)
stats <- as.numeric(stats::coef(mod))
} else {
stats <- as.numeric(stats::quantile(data$y, qs))
}
names(stats) <- c("ymin", "lower", "middle", "upper", "ymax")
iqr <- diff(stats[c(2, 4)])
outliers <- (data$y < stats[1]) | (data$y > stats[5])
if (length(unique(data$x)) > 1)
width <- diff(range(data$x)) * 0.9
df <- as.data.frame(as.list(stats))
df$outliers <- list(data$y[outliers])
if (is.null(data$weight)) {
n <- sum(!is.na(data$y))
} else {
# Sum up weights for non-NA positions of y and weight
n <- sum(data$weight[!is.na(data$y) & !is.na(data$weight)])
}
df$notchupper <- df$middle + 1.58 * iqr / sqrt(n)
df$notchlower <- df$middle - 1.58 * iqr / sqrt(n)
df$x <- if (is.factor(data$x)) data$x[1] else mean(range(data$x))
df$width <- width
df$relvarwidth <- sqrt(n)
df$flipped_aes <- flipped_aes
ggplot2::flip_data(df, flipped_aes)
}
)