I really enjoyed this workout. Using these primarily as a way to learn Quarto, and this one was really useful for that. Also used this as an opportunity to demonstrate one of my favorite viz packages in R called ggpubr, used to easily generate publication-ready statistical charts.
For those interested, here’s the Quarto code I used to generate the above document.
Click for R Code within Quarto
---
title: "Enterprise DNA Data Analysis Workout 005"
author: "Brian Julius"
date: 2023-04-29
format: docx
theme: cyborg
editor: visual
warning: false
editor_options:
chunk_output_type: console
---
# Setup Chunk
#| label: setup
#| include: false
```{r}
library(tidyverse)
library(ggpubr)
library(ggthemes)
tips <- read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/07_Visualization/Tips/tips.csv", col_types = cols(...1 = col_skip()))
tips$day <- factor(tips$day, levels = c("Thur", "Fri", "Sat", "Sun"))
tips$time <- factor(tips$time, levels = c("Lunch", "Dinner"))
tips$size <- factor(tips$size, levels = c( "1", "2", "3", "4", "5", "6"))
##Structure of “tips” file to be used in analysis:
Note: Q1 incorporated in file import step
#| label: data overview
#| echo: false
knitr::kable(head(tips, 5))
Q2: Plot the total_bill column histogram
#| label: Q2
#| echo: true
gghistogram(tips,
x="total_bill",
fill = "lightblue",
bins = 15,
rug = TRUE,
add = "mean",
xlab = "Total Bill",
ylab = "Count",
main = "Distribution of Total Bill"
)
Q3: Create a scatterplot presenting the relationship between total_bill and tip
#| label: Q3
#| echo: true
ggscatter(tips,
x="total_bill",
y="tip",
xlab = "Total Bill",
ylab = "Tip",
conf.int = TRUE,
add = "reg.line",
add.params = list(linetype = "solid", color = "red"),
main = "Relationship Between Total Bill and Tip"
)
Q4: Create one image with the relationship of total_bill, tip and size
#| label: Q4
#| echo: true
ggscatter(data = tips,
x = "total_bill",
y = "tip",
xlab = "Total Bill",
palette = rev( c("#d73027","#fc8d59","#fee090","#e0f3f8","#91bfdb","#4575b4")),
ylab = "Tip",
color = "size",
size = "size",
main = "Relationship of Total Bill, Tip and Size"
) + guides(size = guide_none()) + labs(color = "Size") + theme(legend.position = "bottom")
Q5: Present the relationship between days and total_bill value, differ the dots by sex
#| label: Q5
#| echo: true
ggstripchart(data = tips,
palette = c("red", "blue"),
color = "sex",
x = "day",
y = "total_bill",
xlab = "",
ylab = "Total Bill",
main = "Distribution of Total Bill by Day"
) + labs(color = "Gender") + theme(legend.position = "bottom")
Q6: # Create a box plot presenting the total_bill per day differentiated by time
#| label: Q6
#| echo: true
ggboxplot(data = tips,
x = "day",
y = "total_bill",
fill = "time",
xlab = "",
ylab = "",
main = "Distribution of Total Bill by Day and Time"
) + labs(fill = "Meal") + theme(legend.position = "bottom")
Q7: Create two histograms (side by side) of the tip value based for Lunch and Dinner
#| label: Q7
#| echo: true
gghistogram(tips,
x="total_bill",
fill = "time",
bins = 15,
rug = TRUE,
main = "Distribution of Total Bill by Time",
facet.by = "time",
xlab = "",
ylab = ""
) + labs(fill = "Meal") + theme(legend.position = "none")
Q8: Create two scatterplots (Male and Female) presenting the total_bill value and tip relationship, differing by smoker/non-smoker
#| label: Q8
#| echo: true
ggscatter(tips,
x="total_bill",
y="tip",
conf.int = TRUE,
palette = c("cyan", "navyblue"),
color = "smoker",
facet.by = "sex",
add = "reg.line",
xlab = "Total Bill",
ylab = "Tip",
add.params = list(linetype = "solid", color = "red"),
main = "Tip by Total Bill by Gender and Smoker"
) + labs(color = "Smoker") + theme(legend.position = "bottom")