DATA 606 - Statistics & Probability - Spring 2023

Summarizing Data Part 2

Click here to open the slides (PDF).

my_formula <- y ~ m * x + b
class(my_formula)
my_formula
all.vars(my_formula)

data(mtcars)
mtcars_formula <- mpg ~ wt + cyl

mtcars$cyl <- as.factor(mtcars$cyl)

mtcars[,all.vars(mtcars_formula)]

mtcars[,all.vars(mtcars_formula)[1]]

mm <- model.matrix(mtcars_formula, data = mtcars)
class(mm)

boxplot(mtcars$mpg)
boxplot(mpg ~ cyl, data = mtcars)

library(ggplot2)

mean_pieces <- mean(legosets$pieces, na.rm = TRUE)

ggplot() + 
	geom_hline(yintercept = mean(legosets$US_retailPrice, na.rm = TRUE)) +
	geom_vline(xintercept = mean_pieces) +
	geom_point(data = legosets, 
			   aes(x=pieces, y=US_retailPrice, color=availability))

desc <- psych::describeBy(legosets$pieces,
						 group = legosets$availability,
						 mat = TRUE, skew = FALSE)
desc

ggplot() +
	geom_boxplot(data = legosets, aes(x = availability, y = pieces)) +
	# geom_vline(xintercept = mean(legosets$pieces, na.rm = TRUE)) + 
	geom_errorbar(data = desc, aes(x = group1, ymin = mean - se, ymax = mean + se), 
				  color = 'darkgreen') +
	geom_point(data = desc, aes(x = group1, y = mean), 
			   color = 'blue', size = 3) +
	scale_y_log10() + 
	# coord_flip()
	theme_minimal() +
	theme(axis.text.x = element_text(angle = 45))

ggplot(legosets, aes(x = US_retailPrice)) + geom_histogram(binwidth = 20)

ggplot(legosets, aes(x = US_retailPrice)) + 
	geom_density(color = 'blue') + 
	scale_x_log10()