
# THE DATA

# 40 points from N(-1,1):
xnorm <-
c(0.75745087478711, -3.67833850920761, 0.773765294923852, -2.5166350926628, 
-0.866664528466648, -1.63695628038082, -2.69436694486401, -1.17670482134698, 
-0.93268038000603, -2.68863707210148, -2.55723562317876, -0.96943475222958, 
-0.502247844160163, -0.95460405243514, -1.35246808601944, 0.430539874991081, 
-0.216421013142778, -0.0735422349319686, 0.929234311847512, -2.55736988978725, 
0.659802037906305, 0.487098508940152, -2.25146570024105, -1.54900454926781, 
-0.0399614473021575, -1.27424418773441, -1.39537014482172, -0.378208795173341, 
-1.43844136511807, -2.03013360311522, -2.60644108937020, -1.03736744149929, 
-1.06954070965917, -1.97334401699724, 0.34551263753373, -1.00174729815340, 
0.227300814797052, -1.21912077204945, -2.36256531047837, -1.07478823528914
)

# 40 points from Exp(1):
xexp <-
c(0.225873099482754, 0.148239626311750, 0.163836174178869, 3.04540461285, 
2.72215233916853, 1.25697952970387, 0.413789347279817, 0.241668664384633, 
2.18483032420187, 4.2592981774477, 1.07056121710212, 0.708313713739526, 
1.05090031489254, 0.188016591127962, 0.970786548177612, 1.01574974502893, 
0.623112535104156, 0.0752273758896854, 1.42379735390819, 1.16037799600036, 
3.44145662320738, 2.70706842640851, 1.31681641652705, 0.663615722674876, 
0.179161738153013, 1.52543949513676, 0.117737954482436, 1.22918485007705, 
0.940607616864749, 0.384938156697899, 1.17735727499695, 0.812610192970475, 
0.971939288205176, 0.0429625739343464, 0.286606042180210, 1.36010397035153, 
0.384745879098773, 1.42643168746474, 2.45802981952641, 0.565702312625945
)

# 20 points from Unif[0,1]:
xunif <-
c(0.748017165577039, 0.363418926019222, 0.459732743212953, 0.0838238240685314, 
0.817100885789841, 0.792823903961107, 0.77300950884819, 0.366373649565503, 
0.464972200803459, 0.666654106695205, 0.58317914721556, 0.519858546787873, 
0.181064054602757, 0.743976694066077, 0.668645858066157, 0.940935818711296, 
0.288860949221998, 0.085076387040317, 0.854602423030883, 0.69708909583278
)

# collect them into a single sample of 100 data points:
xdata <- c(xnorm, xexp, xunif)
n <- length(xdata)

# define the true (mixture) density:
truedens = function(x) { return( 0.4 * dnorm(x,-1,1) + 0.4 * dexp(x) +
					0.2 * dunif(x) ) }

# compute the centered interval density estimator:
centintdens = function(x) {
    sum = 0
    for (i in 1:n) {
	if (abs(xdata[i]-x) < h)
	    sum = sum + 1
    }
    return(sum / 2 / n / h)
}

# choose a kernel:
K = function(x) {dnorm(x)}

# compute the kernel density estimator:
kerdens = function(x) {
    sum = 0
    for (i in 1:n) {
	sum = sum + K((x-xdata[i])/h)
    }
    return(sum / n / h)
}

# define my own plotting routine (avoiding problems of "plot" & "curve"):
# Note: STA410/2102 students do *NOT* need to study this function,
#         though you may use it if you wish.
plotfunction = function(ff, from=xlim[1], to=xlim[2], col="black",
	add=(dev.cur()!=1), xlim=c(0,1) ) {
    numpoints = 1000;
    xlist = ylist = NULL;
    ymin = +Inf;
    ymax = -Inf;
    for (i in 1:numpoints) {
        xval = from + (to-from) * i / numpoints;
        yval = ff(xval);
        xlist = c(xlist, xval);
        ylist = c(ylist, yval);
	if (yval < ymin)
	    ymin = yval;
	if (yval > ymax)
	    ymax = yval;
    }
    if (add==FALSE) {
	plot( c((from+to)/2, (ymin+ymax)/2), type='n',
		xlim=c(from,to), ylim=c(ymin,ymax), xlab="x",
		ylab=paste( deparse(substitute(ff)), "(x)", sep="" )
		);
    }
    lines(xlist, ylist, col=col);
}
# colors() shows list of colours ... e.g. magenta, hotpink1, turquoise, ...


# DO SOME PLOTTING (can copy-and-paste individual lines for individual plots):

# compute the left and right ends of the graph, and the data dots' y level:
range = max(xdata)-min(xdata)
leftend = min(xdata) - 0.1*range
rightend = max(xdata) + 0.1*range
ylevel = -0.2
ydata <- rep(ylevel,n)

# plot original data (at y-value = ylevel), and the y-axis black line:
plot(xdata, ydata, ylim=c(2*ylevel,1) )
lines(c(leftend,rightend), c(0,0), col="black")

# plot true density:
curve(truedens, leftend, rightend, add=TRUE, col="red")

# plot histogram:
h=0.2; hist(xdata, breaks=((-4/h):(5/h))*h, freq=FALSE, add=TRUE, border="blue")

# plot centered interval density:
h=0.2; plotfunction(centintdens, leftend, rightend, col="green")

# plot kernel density estimator:
h=0.2; curve(kerdens, leftend, rightend, add=TRUE, col="purple")

# try R's built-in density-estimation function:
zz = density(xdata) ; xR = zz$x ; yR = zz$y
lines(xR, yR, col="orange")

# do some comparisons
print( sum((yR - truedens(xR))^2) )
h=0.2; print( sum((kerdens(xR) - truedens(xR))^2) )
h=0.592; print( sum((kerdens(xR) - truedens(xR))^2) )
h=0.2
# sum((centintdens(xR) - truedens(xR))^2)
ss = 0 ; for (i in 1:length(xR)) { ss = ss +
		(centintdens(xR[i]) - truedens(xR[i]))^2 } ; print( ss )


