##################################################################################
# script that uses the Pearson chi-square statistic to test if the number
# of murders in Chicago by weekday is consistent with being evenly distributed.
#
# Author: S Towers
#
# Created: Sept 5, 2013
#
# This script is not guaranteed to be free of bugs and/or errors.
#
# This script can be freely used and shared as long as the author and
# copyright information in this header remain intact.
##################################################################################
require("chron")
vdat = read.table("chicago_crime_summary.txt",header=T)
vdat = subset(vdat,year>=2001&year<=2012)
vdat$jul = julian(vdat$month,vdat$day,vdat$year)
date=as.Date(vdat$jul,origin="1970-01-01")
vdat$weekday=as.numeric(format(date,"%w"))
g = aggregate(vdat$x1,by=list(vdat$weekday),FUN="sum")
N_murder_per_weekday = g[[2]]
N = sum(N_murder_per_weekday)
mult.fig(1,main="Murders in Chicago by weekday")
plot(seq(1,7)
,N_murder_per_weekday
,xlab=""
,xaxt="n" # don't plot axis tick marks
,xaxs="i" # makes the axis line intersect at plot origin
,yaxs="i"
,ylab="Number of murders commited per weekday"
,type="l" # line plot
,xlim = c(0.5,7.5)
,ylim = N*c(0.1,0.2)
,col=4 # blue
,lwd=5)
Nexpect = N/7
lines(c(-1e9,1e9),c(Nexpect,Nexpect),col=2,lwd=3,lty=3)
vname=c("Sun","Mon","Tue","Wed","Thu","Fri","Sat")
axis(1,labels=vname,at=seq(1,7))
Q = sum((N_murder_per_weekday-Nexpect)^2/Nexpect)
cat("The Q-statistic testing the hypothesis of evenly distributed number of murders per weekday is Q =",Q,"\n")
cat("The p-value testing the null hypothesis is ",1-pchisq(Q,6),"\n")