R, e.g.: Bokal’s cheese balls
Recently, someone in the office (Bokal?) was eating cheese balls from a barrel.
My first reaction was “Is this food?” My second was slightly less judgmental and more fertile for R exploration “In what context does this food live?” This sent me off to the web and R. I found a fairly comprehensive data set for various food and plotted Bokal’s Cheese ball in context for fat, carbs and protein–mostly what food is (fiber?). I didn’t get mass-density information, so I plotted by energy density.
And the scatter plots show more clearly that Cheese balls are out on the edge–only possible with heroic technological means.
#!/usr/bin/env Rscript
#
# Scott Hendrickson
# 2012-01-27
#
library(ggplot2)
library(gridExtra)
# read in nutritional data
cnt<-read.table("./foodNutritionalData.csv", header=TRUE, sep=",")
cnt$edensity <- cnt$Energy/cnt$Weight
cnt$feratio <- cnt$Fat/cnt$Energy
cnt$peratio <- cnt$Protein/cnt$Energy
cnt$ceratio <- cnt$Carbohydrate/cnt$Energy
summary(cnt)
# single out Bokal's cheese balls
cnt$Type <- "Everything Else"
# this record was entered first in the file
cnt$Type[c(1)] <- "Bokal's Cheese Balls"
# cnt[1,]
#order and context by energy density
cntbyedensity <- cnt[order(cnt$edensity),]
ordr <- match("Bokal's Cheese Balls", cntbyedensity$Type)
print(cntbyedensity[ (ordr - 5): (ordr + 5), ], width = ,digits = 3)
# arrow - horizontal arrow pointing to the x value of point offset by aoffset
# point(x,y); point$dx, point$dy size; ad direction; xoffset arrow point offset; xhf, xhf arrow size fractions
arrow <- function(point, ad=1, xoffset=0, xhf=0.3, yhf=0.3) {
xhead <- xhf * point$dx
yhead <- (1. + yhf) * point$dy
data.frame(
x = c( point$x-ad*xoffset,
point$x-ad*xhead, point$x-ad*xhead,
point$x-ad*point$dx, point$x-ad*point$dx,
point$x-ad*xhead, point$x-ad*xhead,
point$x-ad*xoffset),
y = c( point$y,
point$y+yhead,
point$y-point$dy/2., point$y-point$dy/2.,
point$y+point$dy/2., point$y+point$dy/2.,
point$y-yhead,
point$y)
)
}
## Plots
pnt_o <- data.frame( x <- c(cnt[1,"edensity"]), y <- c(80), dx <- 4, dy <- 20 )
arrow_o <- arrow( pnt_o, -1 )
ptext_o <- data.frame(label="Bokal's Cheese Balls", x=pnt_o$x + 2, y=pnt_o$y)
lx_o <- c(cnt$edensity[1], cnt$edensity[1])
ly_o <- c(10, 90)
o <-qplot(edensity, data=cnt, geom="histogram",
binwidth = 0.2,
xlab="Energy Density (cal/g)",
ylab="Number of Foods") +
geom_line(aes(lx_o,ly_o), color="red") +
geom_polygon(aes(x,y), data=arrow_o, fill="yellow") +
geom_text(aes(x, y, label=label), ptext_o)
#
pnt_a <- data.frame( x <- c(cnt[1,"feratio"]), y <- c(150), dx <- 0.07, dy <- 60 )
arrow_a <- arrow( pnt_a , -1)
ptext_a <- data.frame(label="Bokal's Cheese Balls", x=pnt_a$x + 0.035, y=pnt_a$y)
lx_a <- c(cnt$feratio[1], cnt$feratio[1])
ly_a <- c(50, 200)
a <- qplot(feratio, data=cnt, geom="histogram",
binwidth = 0.004,
xlab="Fat/Energy (g/cal)",
ylab="Number of Foods") +
geom_line(aes(lx_a,ly_a), color="red") +
geom_polygon(aes(x,y), data=arrow_a, fill="yellow") +
geom_text(aes(x, y, label=label), ptext_a)
#
pnt_b <- data.frame( x <- c(cnt[1,"peratio"]), y <- c(110), dx <- 0.14, dy <- 30 )
arrow_b <- arrow( pnt_b ,-1 )
ptext_b <- data.frame(label="Bokal's Cheese Balls", x=pnt_b$x + 0.07, y=pnt_b$y)
lx_b <- c(cnt$peratio[1], cnt$peratio[1])
ly_b <- c(80, 140)
b <- qplot(peratio, data=cnt, geom="histogram",
binwidth = 0.005,
xlab="Protein/Energy (g/cal)",
ylab="Number of Foods") +
geom_line(aes(lx_b,ly_b), color="red") +
geom_polygon(aes(x,y), data=arrow_b, fill="yellow") +
geom_text(aes(x, y, label=label), ptext_b)
#
pnt_c <- data.frame( x <- c(cnt[1,"ceratio"]), y <- c(100), dx <- 0.24, dy <- 30 )
arrow_c <- arrow( pnt_c ,-1 )
ptext_c <- data.frame(label="Bokal's Cheese Balls", x=pnt_c$x + 0.12, y=pnt_c$y)
lx_c <- c(cnt$ceratio[1], cnt$ceratio[1])
ly_c <- c(20, 120)
c <- qplot(ceratio, data=cnt, geom="histogram",
binwidth = 0.005,
xlab="Carb/Energy (g/cal)",
ylab="Number of Foods") +
geom_line(aes(lx_c,ly_c), color="red") +
geom_polygon(aes(x,y), data=arrow_c, fill="yellow") +
geom_text(aes(x, y, label=label), ptext_c)
png(filename = "./denrat.png", width = 520, height = 900, units = 'px')
print(
grid.arrange(o, a, b, c, ncol=1)
)
dev.off()
# this would work better with volume density
#p <- ggplot(cnt, aes(Energy, Fat),
# xlab="Energy (cal)",
# ylab="Fat (g)")
# try some fat-energy relationships
#p + geom_point(aes(color = Type )) +
# scale_x_log2() +
# scale_y_log2()
#p + geom_point(aes(color = Type, size = edensity )) +
# scale_x_log2() +
# scale_y_log2()
# context for Bokal's cheese balls
q <- ggplot(cnt, aes(x = edensity))
q1 <- q +
geom_point(aes(edensity, feratio, color = Type)) +
xlab("Energy Density (cal/g)") +
ylab("Fat/Energy (g/cal)") +
geom_point(aes(edensity, feratio), data=cnt[1,], size = 4, color="red")
q2 <- q +
geom_point(aes(edensity, peratio, color = Type)) +
xlab("Energy Density (cal/g)") +
ylab("Protein/Energy (g/cal)") +
geom_point(aes(edensity, peratio), data=cnt[1,], size = 4, color="red")
q3 <- q +
geom_point(aes(edensity, ceratio, color = Type)) +
xlab("Energy Density (cal/g)") +
ylab("Carb/Energy (g/cal)") +
geom_point(aes(edensity, ceratio), data=cnt[1,], size = 4, color="red")
png(filename = "./relrat.png", width = 520, height = 900, units = 'px')
print(
grid.arrange(q1, q2, q3, ncol=1)
)
dev.off()
(Bonus here would be some R-ish way to define and see “near” foods”)






The answer to your first question is, of course, a resounding “no!”