# © 2018 - Guillaume Desagulier
# Companion file to "Why corpus linguists should be wary of kidney stones and Simpson’s paradox," in Around the word, 10/01/2018, https://corpling.hypotheses.org/?p=326.
# install and load the car package
install.packages("car")
library(car)
# we want 100 data points
n <- 100
set.seed(43)
# first group of data points
x1 <- rnorm(n, 50, 15)
y1 <- -.6*x1 + rnorm(n, 50, 30)
# second group of data points
x2 <- rnorm(n, 100, 15)
y2 <- -.6*x2 + rnorm(n, 100, 30)
# third group of data points
x3 <- rnorm(n, 150, 15)
y3 <- -.6*x3 + rnorm(n, 150, 30)
# all groups together
X <- c(x1, x2, x3)
Y <- c(y1, y2, y3)
# we plot the data, ignoring the confounding variable
scatterplot(X,Y, boxplots="", smooth=FALSE)
# we assign three groups
group <- c(rep("first", n), rep("second", n), rep("third", n))
# and plot the data
scatterplot(X,Y, col=c("magenta", "cyan4", "dodgerblue"), groups = group, legend.plot = FALSE, smooth=FALSE)