# © 2018 - Guillaume Desagulier # Companion file to "Why corpus linguists should be wary of kidney stones and Simpson’s paradox," in Around the word, 10/01/2018, https://corpling.hypotheses.org/?p=326. # install and load the car package install.packages("car") library(car) # we want 100 data points n <- 100 set.seed(43) # first group of data points x1 <- rnorm(n, 50, 15) y1 <- -.6*x1 + rnorm(n, 50, 30) # second group of data points x2 <- rnorm(n, 100, 15) y2 <- -.6*x2 + rnorm(n, 100, 30) # third group of data points x3 <- rnorm(n, 150, 15) y3 <- -.6*x3 + rnorm(n, 150, 30) # all groups together X <- c(x1, x2, x3) Y <- c(y1, y2, y3) # we plot the data, ignoring the confounding variable scatterplot(X,Y, boxplots="", smooth=FALSE) # we assign three groups group <- c(rep("first", n), rep("second", n), rep("third", n)) # and plot the data scatterplot(X,Y, col=c("magenta", "cyan4", "dodgerblue"), groups = group, legend.plot = FALSE, smooth=FALSE)