## ----setup, include=FALSE, cache=FALSE------------------------------
# set global chunk options
## this code is only required to set up knitr for reproducible documentation
## it has nothing to do with the exercise itself
## please ignore!
opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'), out.format='latex', use.highlight=TRUE)
opts_chunk$set(fig.path='kgraph/ck', fig.align='center', fig.show='hold', prompt=FALSE, comment=NA, fig.width=6, fig.height=6, out.width='.7\\linewidth', size='scriptsize')
# options to be read by formatR
options(replace.assign=TRUE,width=70)


## ----echo=FALSE, results='hide'-------------------------------------
rm(list=ls())


## ----mhw1, child='mhw_1.Rnw'----------------------------------------

## ----set-parent-1, echo=FALSE, cache=FALSE--------------------------
set_parent('mhw.Rnw')


## ----eval=FALSE-----------------------------------------------------
## sort(round(runif(12, -1, 1), 2))


## ----echo=FALSE-----------------------------------------------------
sort(round(runif(12, -1, 1), 2))


## -------------------------------------------------------------------
sample <- runif(12, -1, 1)
print(sample)


## -------------------------------------------------------------------
sample <- round(sample, 2)
sample


## -------------------------------------------------------------------
(sample <- sort(sample))


## -------------------------------------------------------------------
(10 + 0)/2
(10 - 0)^2/12


## ----concordance=TRUE-----------------------------------------------
sample <- runif(20, min=0, max=10)
mean(sample)
var(sample)


## -------------------------------------------------------------------
ls()
rm(sample)
ls()


## ----eval=FALSE-----------------------------------------------------
## help(round)


## -------------------------------------------------------------------
runif(1)
sort(runif(12))
sort(runif(12, 0, 5))
sort(runif(12, min=0, max=5))
sort(runif(max=5, n=12, min=0))


## ----eval=FALSE-----------------------------------------------------
## help.search("principal component")


## ----eval=FALSE-----------------------------------------------------
## help(prcomp)


## ----eval=FALSE-----------------------------------------------------
## q()


## ----eval=F---------------------------------------------------------
## getwd()


## ----eval=FALSE-----------------------------------------------------
## file.show("mhw.csv")


## -------------------------------------------------------------------
mhw <- read.csv("mhw.csv")


## -------------------------------------------------------------------
str(mhw)


## -------------------------------------------------------------------
names(mhw)
colnames(mhw)


## -------------------------------------------------------------------
class(mhw)


## -------------------------------------------------------------------
summary(mhw$grain)
summary(mhw$straw)


## -------------------------------------------------------------------
dim(mhw)


## -------------------------------------------------------------------
mhw[1,]
   # third  column
   # third column of first row
length(mhw[,3])
summary(mhw[,3])
mhw[1, 3]


## -------------------------------------------------------------------
   # a named column
mhw[1, "grain"]


## -------------------------------------------------------------------
mhw[64,"grain"]
mhw[64, c("r", "c")]


## -------------------------------------------------------------------
mhw[1:3, 1:2]


## -------------------------------------------------------------------
summary(mhw[-(1:20),"grain"])
summary(mhw$grain[-(1:20)])


## -------------------------------------------------------------------
head(mhw[,3])
tail(mhw[,"grain"], n=10)
head(mhw$grain)


## -------------------------------------------------------------------
head(sort(mhw$straw), n=5)
head(order(mhw$straw), n=5)
head(mhw[order(mhw$straw), ] , n=5)


## -------------------------------------------------------------------
head(rev(sort(mhw$straw)), n=5)


## -------------------------------------------------------------------
head(sort(mhw$straw, decreasing=T), n=5)


## -------------------------------------------------------------------
tail(sort(mhw$straw), n=5)


## -------------------------------------------------------------------
(ix <- which(mhw$grain == max(mhw$grain)))
mhw[ix,]
(ix <- which(mhw$grain == min(mhw$grain)))
mhw[ix,]


## -------------------------------------------------------------------
(ix <- which.max(mhw$grain))
mhw[ix, ]
(ix <- which.min(mhw$grain))
mhw[ix, ]


## -------------------------------------------------------------------
mhw[which(mhw$straw > 8.8),]


## ----results='hide',echo=F------------------------------------------
col.right <- mhw[mhw$c==max(mhw$c),c("grain","straw","r")]
str(col.right)
(row.order <- order(col.right$grain, decreasing=T))
# no apparent order by rows
(col.right[row.order,])
t(col.right[row.order,])
# straw yields have some correspondence
dotchart(col.right[row.order,"straw"], main="Straw yields in decreasing grain-yield order")
# plot shows this also
rm(col.right, row.order)


## -------------------------------------------------------------------
save(mhw, file="mhw.RData")


## -------------------------------------------------------------------
stem(mhw$grain)


## -------------------------------------------------------------------
hist(mhw$grain)


## -------------------------------------------------------------------
hist(mhw$grain, breaks=seq(2.6, 5.2, by=.1), 
     col="lightblue", border="red",
     main="Mercer-Hall uniformity trial", 
     xlab="Grain yield, lb. per plot")
rug(mhw$grain)


## -------------------------------------------------------------------
seq(2.6, 5.2, by=.1)


## ----hist-with-density----------------------------------------------
hist(mhw$grain, breaks=seq(2.6, 5.2, by=.1), 
     col="lavender", border="darkblue",
     main="Mercer-Hall uniformity trial",
     freq=F,
#     probability=T,
     xlab="Grain yield, lb.\ per plot")
lines(density(mhw$grain), lwd=1.5)
lines(density(mhw$grain, adj=2), lwd=1.5, col="brown")
lines(density(mhw$grain, adj=.5), lwd=1.5, col="red")
text(2.5,0.95,"Default bandwidth", col="darkblue", pos=4)
text(2.5,0.90,"Double bandwidth", col="brown", pos=4)
text(2.5,0.85,"Half bandwidth", col="red", pos=4)


## ----enhanced-density, keep.source=T--------------------------------
plot(density(mhw$grain) ,xlab="Grain yield, lb.\ per plot",
     lwd=1.5, ylim=c(0,1), col="darkblue",
     main="Mercer-Hall uniformity trial")
rug(mhw$grain)
lines(density(mhw$grain, adj=2), lwd=1.5, col="brown")
lines(density(mhw$grain, adj=.5), lwd=1.5, col="red")
text(2.5,0.85,"Default bandwidth", col="darkblue", pos=4)
text(2.5,0.80,"Double bandwidth", col="brown", pos=4)
text(2.5,0.75,"Half bandwidth", col="red", pos=4)


## ----enhanced-hist-with-text, out.width='0.8\\textwidth'------------
h <- hist(mhw$grain, breaks = seq(2.6, 5.2, by=.2), plot=F)
str(h)
plot(h, col = heat.colors(length(h$mids))[length(h$count)-
            rank(h$count)+1],
     ylim = c(0, max(h$count)+5),
     main="Frequency histogram, Mercer & Hall grain yield",
     sub="Counts shown above bar, actual values shown with rug plot",
     xlab="Grain yield, lb. per plot")
rug(mhw$grain)
text(h$mids, h$count, h$count, pos=3)
rm(h)


## -------------------------------------------------------------------
plot(mhw$grain, mhw$straw)  


## ----out.width='0.8\\textwidth'-------------------------------------
plot(mhw$grain, mhw$straw, cex=0.8, pch=21, col="blue",
     bg="red", xlab="Grain yield, lb.\ plot-1",
     ylab="Straw yield, lb.\ per plot-1")
grid()
title(main="Mercer-Hall wheat uniformity trial")
abline(v=mean(mhw$grain), lty=2, col="blue")
abline(h=mean(mhw$straw), lty=2, col="blue")
points(mean(mhw$grain), mean(mhw$straw), pch=23, col="black",
       bg="brown", cex=2)
text(mean(mhw$grain), min(mhw$straw),
     paste("Mean:",round(mean(mhw$grain),2)), pos=4)
text(min(mhw$grain), mean(mhw$straw),
     paste("Mean:",round(mean(mhw$straw),2)), adj=c(0,-1))


## ----eval=FALSE-----------------------------------------------------
## plot(mhw$grain, mhw$straw)
## pts <- identify(mhw$grain, mhw$straw)

## ----echo=FALSE,results='hide'--------------------------------------
pts <- c(15, 35, 184, 284, 292, 295, 311, 337)


## -------------------------------------------------------------------
tmp <- mhw[ pts, ]
tmp[ order(tmp$grain), ]
rm(pts, tmp)


## -------------------------------------------------------------------
summary(mhw)
# summary(scale(mhw[,c("grain","straw")]))
summary(mhw$grain)


## -------------------------------------------------------------------
max(mhw$grain)
min(mhw$grain)
median(mhw$grain)
mean(mhw$grain)
quantile(mhw$grain, probs=c(.25, .75))


## -------------------------------------------------------------------
var(mhw$grain)
sd(mhw$grain)
IQR(mhw$grain)


## -------------------------------------------------------------------
require(e1071)
skewness(mhw$grain)
kurtosis(mhw$grain)
detach(package:e1071)


## -------------------------------------------------------------------
diff(range(mhw$grain))
diff(range(mhw$grain))/median(mhw$grain)


## -------------------------------------------------------------------
ix <- which.min(mhw$grain)
mhw[ix,"straw"]


## -------------------------------------------------------------------
   # find all 
row.names(subset(mhw, mhw$grain<3))
quantile(mhw$grain)
quantile(mhw$grain, seq(0, 1, .1))
mhw[mhw$grain < quantile(mhw$grain, .01), ]


## ----eval=FALSE-----------------------------------------------------
## fix(mhw)


## ----compute.gsr----------------------------------------------------
gsr <- mhw$grain/mhw$straw
summary(gsr)


## ----range.gsr------------------------------------------------------
range(gsr)
diff(range(gsr))/median(gsr)
diff(range(mhw$grain))/median(mhw$grain)


## ----add-gsr--------------------------------------------------------
mhw <- cbind(mhw, gsr)
str(mhw)  


## -------------------------------------------------------------------
ls()
rm(gsr)
ls()


## ----save_mhw2------------------------------------------------------
save(mhw, file="mhw2.RData")



## ----mhw1a, child='mhw_1a.Rnw'--------------------------------------

## ----set-parent-1a, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
plot(density(mhw$grain), col="darkblue",
     main="Grain yield, lb. per plot", lwd=1.5)
rug(mhw$grain, col="darkgreen")
grid()


## -------------------------------------------------------------------
plot(ecdf(mhw$grain), pch=1,
     xlab="Mercer & Hall, Grain yield, lb. per plot", 
     ylab="Cumulative proportion of plots",
     main="Empirical CDF",
     sub="Quantiles shown with vertical lines")
q <- quantile(mhw$grain, c(.05, .1, .25, .75, .9, .95))
abline(v=q, lty=2)
abline(v=median(mhw$grain), col="blue")
abline(v=max(mhw$grain), col="green")
abline(v=min(mhw$grain), col="green")
text(q, 0.5, names(q))
rm(q)


## ----keep.souce=T---------------------------------------------------
qqnorm(mhw$grain,
       main="Normal probability plot, grain yields (lb. plot-1)")
qqline(mhw$grain)
grid()


## -------------------------------------------------------------------
mean(mhw$grain)
sd(mhw$grain)


## -------------------------------------------------------------------
res <- 0.1
hist(mhw$grain, breaks=seq(round(min(mhw$grain),1)-res,
              round(max(mhw$grain),1)+res, by=res),
     col="lightblue", border="red", freq=F,
     xlab="Wheat grain yield, lb. per plot",
     main="Mercer & Hall uniformity trial",
     sub="Theoretical distribution (solid), empirical density (dashed)")
grid()
rug(mhw$grain)
x <- seq(min(mhw$grain)-res, max(mhw$grain)+res, by=.01)
lines(x, dnorm(x, mean(mhw$grain), sd(mhw$grain)), col="blue", lty=1, lwd=1.8) 
lines(density(mhw$grain), lty=2, lwd=1.8, col="black")
rm(res, x)


## -------------------------------------------------------------------
plot(density(mhw$grain), col="darkblue",
    main="Grain yield, lb. per plot", lwd=1.5, ylim=c(0,1),
    xlab=paste("Sample mean:",round(mean(mhw$grain), 3),
      "; s.d:", round(sd(mhw$grain),3)))
grid()
rug(mhw$grain)
curve(dnorm(x, mean(mhw$grain), sd(mhw$grain)), 2.5, 6, add=T,
      col="darkred", lwd=1.5)
text(2.5, 0.85, "Empirical", col="darkblue", pos=4)
text(2.5, 0.8, "Theoretical normal", col="darkred", pos=4)


## -------------------------------------------------------------------
shapiro.test(mhw$grain)


## -------------------------------------------------------------------
(t.q13 <- qt(c(.25, .75), length(mhw$grain) - 1))


## -------------------------------------------------------------------
(pe <- mean(mhw$grain) + t.q13 * sd(mhw$grain))


## -------------------------------------------------------------------
rel.error <- (diff(pe)/2) / mean(mhw$grain)
round(100 * rel.error, 2)


## -------------------------------------------------------------------
(yield.ha <- mean(mhw$grain)*500/(0.40469)/(2.2046226))


## -------------------------------------------------------------------
round(yield.ha * rel.error, 2)


## ----echo=FALSE, results="hide"-------------------------------------
ans.re <- rel.error
ans.yld <- yield.ha
ans.yld.pe <- round(yield.ha * rel.error, 2)


## -------------------------------------------------------------------
rm(t.q13, pe, rel.error, yield.ha)


## ----echo=FALSE, results="hide"-------------------------------------
rm(ans.re, ans.yld, ans.yld.pe)


## -------------------------------------------------------------------
plot(mhw$straw ~ mhw$grain,
     xlab="Grain yield (lb. plot-1)",
     ylab="Straw yield (lb. plot-1)",
     pch=20, col="darkblue", cex=1.2,
     sub="Medians shown with red dashed lines") 
title(main="Straw vs. grain yields, Mercer-Hall experiment")
grid()
abline(v=median(mhw$grain), lty=2, col="red")
abline(h=median(mhw$straw), lty=2, col="red")


## -------------------------------------------------------------------
colMeans(mhw[,c("grain","straw")])


## -------------------------------------------------------------------
var(mhw[,c("grain","straw")])


## -------------------------------------------------------------------
require(MASS)
sim.sample <- mvrnorm(length(mhw$grain), 
                      mu=colMeans(mhw[,c("grain","straw")]),
                      Sigma=var(mhw[,c("grain","straw")]))
head(sim.sample)
summary(sim.sample)
summary(mhw[,c("grain", "straw")])


## ----fig.width=12, fig.height=12------------------------------------
par(mfrow=c(2,2))
grain.lim = c(min(sim.sample[,"grain"], mhw$grain), 
              max(sim.sample[,"grain"], mhw$grain)) 
straw.lim = c(min(sim.sample[,"straw"], mhw$straw), 
              max(sim.sample[,"straw"], mhw$straw)) 
hist(mhw$grain, xlim=grain.lim,
     main="Grain (actual)", col="lightyellow",
     breaks=seq(grain.lim[1],grain.lim[2], length=17))
hist(sim.sample[,"grain"],xlim=grain.lim, 
     main="Grain (simulated)", col="cornsilk",
     breaks=seq(grain.lim[1],grain.lim[2], length=17))
hist(mhw$straw, xlim=straw.lim, 
     main="Straw (actual)", col="lightblue",
     breaks=seq(straw.lim[1],straw.lim[2], length=17))
hist(sim.sample[,"straw"],xlim=straw.lim,
     main="Straw (simulated)", col="lavender",
     breaks=seq(straw.lim[1],straw.lim[2], length=17))
par(mfrow=c(1,1))


## ----fig.width=12, fig.height=6, out.width='\\textwidth'------------
par(mfrow=c(1,2))
plot(sim.sample, 
     main="Simulated straw vs. grain yields", 
     xlab="Grain (lb. plot-1)", ylab="Straw (lb. plot-1)", 
     xlim=grain.lim, ylim=straw.lim, pch=20, col="blue")  
abline(v=median(sim.sample[,1]), lty=2, col="red")
abline(h=median(sim.sample[,2]), lty=2, col="red")
plot(mhw$grain, mhw$straw, 
     main="Actual straw vs. grain yields", 
     xlab="Grain (lb. plot-1)", ylab="Straw (lb. plot-1)", 
     xlim=grain.lim, ylim=straw.lim, pch=20, col="black")  
abline(v=median(mhw$grain), lty=2, col="red")
abline(h=median(mhw$straw), lty=2, col="red")
par(mfrow=c(1,1))


## -------------------------------------------------------------------
rm(sim.sample, grain.lim, straw.lim)


## -------------------------------------------------------------------
cor(mhw$grain, mhw$straw)
cor.test(mhw$grain, mhw$straw)


## -------------------------------------------------------------------
model.straw.grain <- lm(straw ~ grain, data = mhw)
summary(model.straw.grain)
coefficients(model.straw.grain)


## -------------------------------------------------------------------
plot(straw ~ grain, data=mhw)
title("Straw yield predicted by grain yield")
abline(model.straw.grain)
grid()
text(4.5, 4.5, paste("slope:",
                     round(coefficients(model.straw.grain)[2], 2)))


## ----fig.width=12, fig.height=7, out.width='0.8\\textwidth'---------
hist(residuals(model.straw.grain),
     main="Residuals from straw vs.\ grain linear model")
rug(residuals(model.straw.grain))


## -------------------------------------------------------------------
qqnorm(residuals(model.straw.grain),
       main="Residuals from straw vs.\ grain linear model")
qqline(residuals(model.straw.grain))
summary(residuals(model.straw.grain))


## -------------------------------------------------------------------
shapiro.test(residuals(model.straw.grain))


## -------------------------------------------------------------------
lim <- c(min(fitted(model.straw.grain), mhw$straw),
         max(fitted(model.straw.grain), mhw$straw))
plot(fitted(model.straw.grain), mhw$straw,
     xlab="Modelled", ylab="Actual", asp=1,
     xlim=lim, ylim=lim, pch=20,
     col=ifelse(
       (abs(fitted(model.straw.grain) - mhw$straw) < 1),
       "gray",
       ifelse(fitted(model.straw.grain) < mhw$straw, "blue","red")),
     cex=ifelse(
       (abs(fitted(model.straw.grain) - mhw$straw) < 1),1,1.3)
)
title("Actual vs. modelled straw yields")
grid()
abline(0,1)
rm(lim)


## -------------------------------------------------------------------
which.high.res <- which(abs(residuals(model.straw.grain)) > 1.8)
sort(residuals(model.straw.grain)[which.high.res])


## -------------------------------------------------------------------
high.res <- mhw[which.high.res,]
high.res[order(high.res$gsr),]
rm(which.high.res, high.res)


## -------------------------------------------------------------------
plot(model.straw.grain, which=1); grid()


## -------------------------------------------------------------------
plot(model.straw.grain, which=5); grid()


## -------------------------------------------------------------------
plot(model.straw.grain, which=6, id.n=10); grid()


## -------------------------------------------------------------------
t1 <- mean(mhw$grain); t2 <- sd(mhw$grain);
p.frame <- data.frame(grain=seq(t1-2*t2, t1+2*t2, t2))
predict(model.straw.grain, newdata=p.frame,
        interval="confidence", level=0.95)
predict(model.straw.grain, newdata=p.frame,
        interval="prediction", level=0.95)


## -------------------------------------------------------------------
p.frame <- data.frame(grain=seq(from=2,to=6,by=0.1))
pred.c <- predict(model.straw.grain, newdata=p.frame, 
                  interval="confidence", level=0.95)
pred.p <- predict(model.straw.grain, newdata=p.frame, 
                  interval="prediction", level=0.95)
plot(straw ~ grain, data=mhw, pch=20)
title(main="Straw yield predicted by grain yield", 
      sub="Prediction (blue) and confidence (red) intervals")
abline(model.straw.grain); grid()
lines(p.frame$grain, pred.c[, "lwr"], col = 2, lwd = 1.5)
lines(p.frame$grain, pred.c[, "upr"], col = 2, lwd = 1.5)
lines(p.frame$grain, pred.p[, "lwr"], col = 4, lwd = 1.5)
lines(p.frame$grain, pred.p[, "upr"], col = 4, lwd = 1.5)
points(mean(mhw$grain),mean(mhw$straw),pch=23,cex=2, bg="red")
abline(h=mean(mhw$straw), lty=2); abline(v=mean(mhw$grain), lty=2)


## -------------------------------------------------------------------
rm(t1,t2,p.frame,pred.c,pred.p)


## -------------------------------------------------------------------
model.grain.straw <- lm(grain ~ straw, data=mhw)
summary(model.grain.straw)
summary(model.straw.grain)


## -------------------------------------------------------------------
coefficients(model.straw.grain)["grain"]
1/coefficients(model.grain.straw)["straw"]


## ----fig.width=6, fig.height=6--------------------------------------
plot(straw ~ grain, pch=1, data=mhw,
     main="Mercer-Hall wheat yields",
     xlab="grain (lb. plot-1)", ylab="straw (lb. plot-1)")
title(sub="straw vs. grain: solid; grain vs. straw: dashed")
abline(model.straw.grain)
beta <- coefficients(model.grain.straw)
abline(-beta["(Intercept)"]/beta["straw"],
       1/beta["straw"], lty=2)
grid()
rm(beta)


## -------------------------------------------------------------------
var(mhw$straw)/var(mhw$grain)


## -------------------------------------------------------------------
struct.beta <- function(y, x, lambda) {
  a <- var(y)-lambda*var(x);
  c <- var(x,y);
  return((a + sqrt(a^2 + 4 * lambda * c^2))/(2*c))
}


## -------------------------------------------------------------------
print(paste("Forward:", 
            round(coefficients(model.straw.grain)["grain"],4)))
print(paste("Proportional:", 
            round(struct.beta(mhw$straw,
                              mhw$grain,var(mhw$straw)/var(mhw$grain)),4)))
print(paste("Inverse proportional:", 
            round(1/struct.beta(mhw$grain,mhw$straw,
                                var(mhw$grain)/var(mhw$straw)),4)))
print(paste("Orthogonal:", 
            round(struct.beta(mhw$straw,mhw$grain,1),4)))
print(paste("Inverse orthogonal:", 
            round(1/struct.beta(mhw$grain,mhw$straw,1),4)))
print(paste("Reverse:", 
            round(1/coefficients(model.grain.straw)["straw"],4)))


## ----fig.height=7, fig.width=7--------------------------------------
plot(straw ~ grain, main="Mercer-Hall wheat yields",
     sub="Regression slopes", xlab="grain (lb. plot-1)",
     ylab="straw (lb. plot-1)", data=mhw)
abline(model.straw.grain, col="blue")
beta <- coefficients(model.grain.straw)
abline(-beta["(Intercept)"]/beta["straw"]  , 1/beta["straw"],
       lty=2, col="green")
beta <- struct.beta(mhw$straw, mhw$grain, 1)
abline(mean(mhw$straw)-beta*mean(mhw$grain), beta, lty=3, col="red")
beta <- struct.beta(mhw$straw, mhw$grain,var(mhw$straw)/var(mhw$grain))
abline(mean(mhw$straw)-beta*mean(mhw$grain), beta, lty=4, col="brown")
lines(c(4,4.5),c(5,5), lty=1, col="blue") 
lines(c(4,4.5),c(4.4,4.4), lty=4, col="brown") 
lines(c(4,4.5),c(4.6,4.6), lty=3, col="red") 
lines(c(4,4.5),c(4.8,4.8), lty=2, col="green") 
grid()
text(4.5,5,paste("Forward:",
             round(coefficients(model.straw.grain)["grain"],4)),
     col="blue", pos=4)
text(4.5,4.4,paste("Proportional:",
round(struct.beta(mhw$straw,mhw$grain,var(mhw$straw)/var(mhw$grain)),4)),
     col="brown", pos=4)
text(4.5,4.6, paste("Orthogonal:",
                round(struct.beta(mhw$straw,mhw$grain,1),4)),
     col="red", pos=4)
text(4.5,4.8,paste("Reverse:",
               round(1/coefficients(model.grain.straw)["straw"],4)),
     col="green", pos=4)


## -------------------------------------------------------------------
model.straw.grain.0 <- lm(straw ~ grain - 1, data=mhw)
summary(model.straw.grain.0)


## ----show-with------------------------------------------------------
mean(mhw$grain)
with(mhw, mean(grain))


## ----fig.height=7, fig.width=7--------------------------------------
with(mhw, 
     plot(straw ~ grain, main="Mercer-Hall wheat yields",
          xlab="grain (lb. plot-1)", ylab="straw (lb. plot-1)",
          xlim=c(0,ceiling(max(grain))),
          ylim=c(0, ceiling(max(straw))), cex=0.8))
abline(model.straw.grain, col="blue")
abline(model.straw.grain.0, col="red")
grid()
text(4.5,4, paste("    With:",
             round(coefficients(model.straw.grain)["grain"],2)),
     col="blue", pos=4)
text(4.5,3.4,paste("Without:",
              round(coefficients(model.straw.grain.0)["grain"],2)),
     col="red", pos=4)
abline(v=mean(mhw$grain), col="darkgray", lty=2)
abline(h=mean(mhw$straw), col="darkgray", lty=2)
points(mean(mhw$grain), mean(mhw$straw), cex=2, pch=23, bg="red")
abline(h=coefficients(model.straw.grain)["(Intercept)"],
       col="darkgray", lty=2)
text(1,1,paste("Intercept:",
          round(coefficients(model.straw.grain)["(Intercept)"],2)),
     col="blue", pos=4)


## -------------------------------------------------------------------
mean(residuals(model.straw.grain.0))
mean(residuals(model.straw.grain))


## -------------------------------------------------------------------
(TSS <- sum((mhw$straw-mean(mhw$straw))^2))
(TSS0 <- sum(mhw$straw^2))
(RSS <- sum(residuals(model.straw.grain)^2))
(RSS0 <- sum(residuals(model.straw.grain.0)^2))
(R2 <- 1 - (RSS/TSS))     
(R20 <- 1 - (RSS0/TSS0))  
rm(TSS, TSS0, RSS, RSS0, R2, R20)


## -------------------------------------------------------------------
summary(model.straw.grain.0)$adj.r.squared
summary(model.straw.grain)$adj.r.squared


## -------------------------------------------------------------------
sqrt(sum(residuals(model.straw.grain)^2)/(length(mhw$straw)))
sqrt(sum(residuals(model.straw.grain.0)^2)/(length(mhw$straw)))


## ----add-in-north---------------------------------------------------
mhw <- cbind(mhw, in.north = (mhw$r < 11))
str(mhw)


## -------------------------------------------------------------------
summary(mhw$in.north)


## -------------------------------------------------------------------
   # plot halves in appropriate colours
with(mhw,
     plot(c, r, col=ifelse(in.north,"blue","darkslategrey"), 
          cex=1.3*straw/max(straw), pch=1,
          xlab="Column", ylab="Row", ylim=c(20,1),
          sub="North: blue; South: gray"))
title(main="Postplot of straw yields, coded by field half")
abline(h=10.5, lty=2)


## -------------------------------------------------------------------
par(mfrow=c(3,1))
boxplot(grain ~ in.north, names=c("S", "N"),
        main="Grain yield", horizontal=T, data=mhw)
boxplot(straw ~ in.north, names=c("S", "N"),
        main="Straw yield", horizontal=T, data=mhw)
boxplot(gsr ~ in.north, names=c("S", "N"),
        main="Grain/straw ratio", horizontal=T, data=mhw)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
with(mhw, by(grain, in.north, summary))
with(mhw,      by(straw, in.north, summary))
with(mhw,      by(gsr, in.north, summary))
with(mhw,      by(grain, in.north, var))
with(mhw,      by(straw, in.north, var))
with(mhw,      by(gsr, in.north, var))


## -------------------------------------------------------------------
   # compare the means with a t-test
with(mhw, t.test(straw[in.north], straw[!in.north]))


## -------------------------------------------------------------------
model.straw.ns <- lm(straw ~ in.north, data=mhw)
summary(model.straw.ns)


## -------------------------------------------------------------------
anova(model.straw.ns)


## -------------------------------------------------------------------
qqnorm(residuals(model.straw.ns),
       main="Residuals from one-way ANOVA",
       sub="Straw yield vs. field half")
qqline(residuals(model.straw.ns))


## -------------------------------------------------------------------
quantile(mhw$grain, p=0.01, type=5)
mean(sort(mhw$grain)[5:6])


## -------------------------------------------------------------------
quantile(mhw$grain, p=0.01, type=5)


## -------------------------------------------------------------------
boot.q01 <- function(data, indices){
	obs <- data[indices,]
	return(quantile(obs$grain, p=0.01, type=5))
}


## -------------------------------------------------------------------
require(boot)
b.q01 <- boot(mhw, boot.q01, R=1024)
print(b.q01)


## ----fig.width=12, fig.height=6, out.width='\\textwidth'------------
plot(b.q01)


## -------------------------------------------------------------------
mean(b.q01$t)
b.q01$t0


## -------------------------------------------------------------------
(b.q01.ci <- boot.ci(b.q01, conf = 0.95, type=c("norm","basic")))


## ----eval=F---------------------------------------------------------
## struct.beta <- function(y, x, lambda) {
##   a <- var(y)-lambda*var(x);
##   c <- var(x,y);
##   return((a + sqrt(a^2 + 4 * lambda * c^2))/(2*c))
## }


## ----eval=F---------------------------------------------------------
## beta <- struct.beta(straw,grain,var(straw)/var(grain))
## alpha <- mean(straw)-beta*mean(grain)


## -------------------------------------------------------------------
boot.sr <- function (data, indices) {
	obs <- data[indices,]
        beta <- struct.beta(obs$straw,obs$grain,
                            var(obs$straw)/var(obs$grain))
        alpha <- mean(obs$straw)-beta*mean(obs$grain)
	return(c(beta, alpha))
}


## -------------------------------------------------------------------
b.sr <- boot(mhw, boot.sr, R=1024)
print(b.sr)


## ----fig.width=12, fig.height=6, out.width='\\textwidth'------------
plot(b.sr, index=1)


## ----fig.width=12, fig.height=6, out.width='\\textwidth'------------
plot(b.sr, index=2)


## -------------------------------------------------------------------
mean(b.sr$t[,1])
(b.sr.ci <- boot.ci(b.sr, conf = 0.95, type=c("norm","basic"), index=1))
mean(b.sr$t[,2])
(b.sr.ci <- boot.ci(b.sr, conf = 0.95, type=c("norm","basic"), index=2))


## ----echo=F, results="hide"-----------------------------------------
b.corr <- function(data,indices) { 	obs <- data[indices,]; return(cor(obs$grain, obs$straw)) }
boot.corr <- boot(mhw, b.corr, R=1024)
boot.corr.ci <- boot.ci(boot.corr, conf=0.95, type="basic")


## ----echo=F, fig.width=12, fig.height=6, out.width='\\textwidth'----
plot(boot.corr)


## -------------------------------------------------------------------
rm(model.grain.straw)
rm(boot.q01, b.q01, b.q01.ci, boot.sr, b.sr, b.sr.ci)
detach(package:boot)


## ----echo=F,results="hide"------------------------------------------
rm(boot.corr, b.corr, boot.corr.ci)



## ----mhw1ar, child='mhw_1ar.Rnw'------------------------------------

## ----set-parent-1ar, echo=FALSE, cache=FALSE------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
mhw.c <- mhw


## -------------------------------------------------------------------
ix <- (mhw.c$r < 5) & (mhw.c$c <5)
rownames(mhw.c[ix,])
subset(mhw.c, ix)


## -------------------------------------------------------------------
mhw.c[ix, "grain"] <- mhw.c[ix, "grain"]/4


## -------------------------------------------------------------------
summary(mhw$grain); sd(mhw$grain)
summary(mhw.c$grain); sd(mhw.c$grain)


## ----fig.width=10, fig.height=5, out.width='\\textwidth'------------
par(mfrow=c(1,2))
hist(mhw$grain, xlab="Grain yield, lbs / plot",
     main="Actual", breaks=seq(0,6, by=.25))
rug(mhw$grain)
hist(mhw.c$grain, xlab="Grain yield, lbs / plot",
     main="Contaminated", breaks=seq(0,6, by=.25))
rug(mhw.c$grain)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
mean(mhw$grain); mean(mhw.c$grain)
(mean(mhw.c$grain) - mean(mhw$grain))/mean(mhw$grain)*100
sd(mhw$grain); sd(mhw.c$grain)
(sd(mhw.c$grain) - sd(mhw$grain))/sd(mhw$grain)*100
median(mhw$grain); median(mhw.c$grain)
(median(mhw.c$grain) - median(mhw$grain))/median(mhw$grain)*100
IQR(mhw$grain); IQR(mhw.c$grain)
(IQR(mhw.c$grain) - IQR(mhw$grain))/IQR(mhw$grain)*100


## -------------------------------------------------------------------
head(cbind(mhw[, c("grain", "straw")],rank(mhw$grain),rank(mhw$straw)), n=8)


## ----fig.width=9, fig.height=9, out.width='\\textwidth'-------------
par(mfrow=c(2,2))
plot(rank(mhw$grain), rank(mhw$straw), 
     xlab="Grain rank", ylab="Straw rank",
     pch=1, main="Original")  
plot(mhw$grain, mhw$straw, 
     xlab="Grain (lbs / plot)", ylab="Straw (lbs / plot)",
     pch=20, main="Original")  
plot(rank(mhw.c$grain), rank(mhw.c$straw),
     xlab="Grain rank", ylab="Straw rank", 
     pch=1, main="Contaminated")  
plot(mhw.c$grain, mhw.c$straw, 
     xlab="Grain (lbs / plot)", ylab="Straw (lbs / plot)",
     pch=20, main="Contaminated")  
par(mfrow=c(1,1))


## -------------------------------------------------------------------
(c.p <- cor(mhw$grain, mhw.c$straw, method="pearson"))
(cc.p <- cor(mhw.c$grain, mhw.c$straw, method="pearson"))
(c.s <- cor(mhw$grain, mhw.c$straw, method="spearman"))
(cc.s <- cor(mhw.c$grain, mhw.c$straw, method="spearman"))


## -------------------------------------------------------------------
print(model.straw.grain)
(model.straw.grain.c <- lm(straw ~ grain, data=mhw.c))


## ----fig.width=9, fig.height=9, out.width='\\textwidth'-------------
par(mfrow=c(2,2))
plot(model.straw.grain.c)
par(mfrow=c(1,1))


## ----fig.width=7, fig.height=7--------------------------------------
with(mhw.c, plot(straw ~ grain))
abline(model.straw.grain.c)
abline(model.straw.grain, lty=2, col="blue")
legend(1.5, 8.5, legend=c("fit", "fit to uncontaminated"),
       lty=1:2, col=c("black","blue"))


## -------------------------------------------------------------------
require(MASS)
(model.straw.grain.c.r <- lqs(straw ~ grain, data=mhw.c))
sqrt(mean(residuals(model.straw.grain)^2))


## ----fig.width=7, fig.height=7--------------------------------------
with(mhw.c, plot(straw ~ grain))
abline(model.straw.grain.c.r)
abline(model.straw.grain.c, lty=3, col="red")
abline(model.straw.grain, lty=2, col="blue")
legend(1.5, 8.5,
       legend=c("robust fit", "linear fit", "fit to uncontaminated"), 
       lty=c(1,3,2), col=c("black","red","blue"))



## ----mhw1b, child='mhw_1b.Rnw'--------------------------------------

## ----set-parent-1b, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
summary(model.straw.ns)


## -------------------------------------------------------------------
summary(model.straw.grain)


## -------------------------------------------------------------------
model.straw.ns.grain <- lm(straw ~ in.north + grain, data=mhw)
summary(model.straw.ns.grain)


## -------------------------------------------------------------------
# scatterplot, coloured by zone
with(mhw,
     plot(straw ~ grain, 
          col=ifelse(in.north,"blue","slategray"),
          pch=20, xlab="grain (lbs plot-1)", ylab="straw (lbs plot-1)"))
title(main="Straw vs. grain yield")
title(sub="N half: blue, S half: grey; whole-field line: red")                  # S
abline(coefficients(model.straw.ns.grain)["(Intercept)"] ,
       coefficients(model.straw.ns.grain)["grain"], col="slategray")
# N
abline((coefficients(model.straw.ns.grain)["(Intercept)"] 
    +  coefficients(model.straw.ns.grain)["in.northTRUE"])
   , coefficients(model.straw.ns.grain)["grain"], col="blue")
# univariate line
abline(model.straw.grain, lty=2, col="red")
grid()


## -------------------------------------------------------------------
summary(model.straw.ns)$adj.r.squared
summary(model.straw.grain)$adj.r.squared
summary(model.straw.ns.grain)$adj.r.squared


## -------------------------------------------------------------------
anova(model.straw.ns.grain, model.straw.ns)
anova(model.straw.ns.grain, model.straw.grain)


## -------------------------------------------------------------------
model.straw.ns.grain.i <- lm(straw ~ in.north * grain, data=mhw)
summary(model.straw.ns.grain.i)


## -------------------------------------------------------------------
with(mhw,
     plot(straw ~ grain, 
          col=ifelse(in.north,"blue","slategray"),
          pch=20, xlab="grain (lbs plot-1)", ylab="straw (lbs plot-1)"))
title(main="Straw vs. grain, by field half")
title(sub="Interaction: solid lines; additive: dashed lines")
abline(lm(straw ~ grain, data=mhw, subset=in.north), col="blue")
abline(lm(straw ~ grain, data=mhw, subset=!in.north), col="slategray")
abline(coefficients(model.straw.ns.grain)["(Intercept)"] , 
       coefficients(model.straw.ns.grain)["grain"], col="slategray", lty=2)
# N
abline((coefficients(model.straw.ns.grain)["(Intercept)"] 
    +  coefficients(model.straw.ns.grain)["in.northTRUE"])
   , coefficients(model.straw.ns.grain)["grain"], col="blue", lty=2)
grid()


## -------------------------------------------------------------------
summary(model.straw.ns.grain)$adj.r.squared
summary(model.straw.ns.grain.i)$adj.r.squared
anova(model.straw.ns.grain.i, model.straw.ns.grain)


## ----fig.height=12, fig.width=12, out.width='\\textwidth'-----------
par(mfrow=c(2,2))
plot(model.straw.ns.grain, which=c(1,2,5,6), id.n=10)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
(selected <- which(abs(rstandard(model.straw.ns.grain)) > 3))
rstandard(model.straw.ns.grain)[selected]


## ----keep.source=T--------------------------------------------------
mhw.hires <- cbind(mhw[selected,], 
                   sres = rstandard(model.straw.ns.grain)[selected])
rm(selected)
str(mhw.hires)


## -------------------------------------------------------------------
mhw.hires[order(mhw.hires$sres),]


## ----vis-large-resid------------------------------------------------
with(mhw,
     plot(c, r, ylim=c(20,1),
          cex=3*gsr/max(gsr), pch=20,
          col=ifelse(rstandard(model.straw.ns.grain) > 3, "brown",
              ifelse(rstandard(model.straw.ns.grain) < (-3), "red",
                     ifelse(in.north, "lightblue", "gray"))),
          xlab="column", ylab="row"))
abline(h=10.5)
title(main="Large residuals, straw yield vs.\ field half and grain yield")
title(sub="Positive: brown; negative: red")


## -------------------------------------------------------------------
model.straw.ns.grain.adj <- lm(straw ~ in.north + grain, data=mhw[-c(15,35),])
summary(model.straw.ns.grain.adj)
summary(model.straw.ns.grain)


## -------------------------------------------------------------------
model.straw.ns.grain.nest <- lm(straw ~ in.north / grain, data=mhw)
summary(model.straw.ns.grain.nest)
plot(straw ~ grain, data=mhw, 
     col=ifelse(mhw$in.north, "blue", "slategray"),
     pch=20, xlim=c(2.5,5.5), ylim=c(4,9.5))
coef <- coef(model.straw.ns.grain.nest)
abline(coef[1], coef[3], col="slategray")
abline(coef[1]+coef[2], coef[4], col="blue")
grid()


## -------------------------------------------------------------------
rm(model.straw.ns, model.straw.grain, model.straw.ns.grain)
rm(model.straw.ns.grain.adj, model.straw.ns.grain.i, model.straw.ns.grain.nest)
rm(struct.beta, beta, mhw.hires)



## ----mhw1bpc, child='mhw_1bpc.Rnw'----------------------------------

## ----set-parent-1bpc, echo=FALSE, cache=FALSE-----------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
pc <- prcomp(mhw[,c("grain","straw")], scale=T)
# pc <- prcomp(mhw[,c("grain","straw")], scale=F)
summary(pc)


## -------------------------------------------------------------------
pc$rotation


## ----fig.width=9, fig.height=9, out.width='\\textwidth'-------------
biplot(pc, pc.biplot=T)
abline(h=0, lty=2)
abline(v=0, lty=2)
grid()


## -------------------------------------------------------------------
summary(pc$x)
mhw[ix.max <- which.max(pc$x[,"PC1"]),]
pc$x[ix.max,]
mhw[ix.min <- which.min(pc$x[,"PC1"]),]
pc$x[ix.min,]


## -------------------------------------------------------------------
pc$sdev
pc$x[ix.max,]/pc$sdev
pc$x[ix.min,]/pc$sdev


## -------------------------------------------------------------------
mhw[ix.max <- which.max(pc$x[,"PC2"]),]
pc$x[ix.max,]/pc$sdev
mhw[ix.min <- which.min(pc$x[,"PC2"]),]
pc$x[ix.min,]/pc$sdev


## -------------------------------------------------------------------
pc$rotation[,1]*pc$sdev[1]
pc$rotation[,2]*pc$sdev[2]



## ----mhw1c, child='mhw_1c.Rnw'--------------------------------------

## ----set-parent-1c, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
head(row.names(mhw), n=10)


## -------------------------------------------------------------------
head(mhw, n=10)


## -------------------------------------------------------------------
dim(mhw)
(n <- dim(mhw)[1])
set.seed(123)
head(index.calib <- sort(sample(1:n, size=floor(n*3/4), replace=F)), n=12)
length(index.calib)


## -------------------------------------------------------------------
head(index.valid <- setdiff(1:n, index.calib), n=12)
length(index.valid)


## -------------------------------------------------------------------
setequal(union(index.calib, index.valid), 1:n)


## -------------------------------------------------------------------
cal.straw.grain <- lm(straw ~ grain, data=mhw, subset=index.calib)
summary(cal.straw.grain)


## ----keep.source=T--------------------------------------------------
model.straw.grain <- lm(straw ~ grain, data=mhw)
(coef(cal.straw.grain) - coef(model.straw.grain))
((coef(cal.straw.grain) - coef(model.straw.grain))
                    /coef(model.straw.grain))*100


## -------------------------------------------------------------------
pred <- predict.lm(cal.straw.grain, newdata=mhw[index.valid,])


## -------------------------------------------------------------------
actual <- mhw[index.valid, "straw"]


## ----fig.width=10, fig.height=5, out.width='\\textwidth'------------
summary(pred); summary(actual)
par(mfrow=c(1,2))
hist(pred, main="", xlab="Predicted straw yields, lb / plot",
     breaks=seq(4,9.2,by=0.4), freq=F, ylim=c(0,.8))
rug(pred)
hist(actual, main="", xlab="Actual straw yields, lb / plot",
     breaks=seq(4,9.2,by=0.4), freq=F, ylim=c(0,.8))
rug(actual)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
plot(actual ~ pred, ylab="Actual", xlab="Predicted", asp=1,
     main="Mercer-Hall trial, straw yield, lbs/plot",
     xlim=c(4.5,9), ylim=c(4.5,9));
abline(0,1); grid()


## -------------------------------------------------------------------
(valid.msd <- sum((actual - pred)^2)/length(index.valid))
(valid.msd <- mean((actual - pred)^2))
(valid.rmsep <- sqrt(valid.msd))


## -------------------------------------------------------------------
(rmse <- sqrt(mean(residuals(cal.straw.grain)^2)))


## -------------------------------------------------------------------
(valid.bias <- (mean(pred) - mean(actual)))
(valid.sb <- valid.bias^2)
valid.sb/valid.msd*100


## -------------------------------------------------------------------
regr.actual.pred <- lm(actual ~ pred)
summary(regr.actual.pred)
plot(actual ~ pred, ylab="Actual", xlab="Predicted", asp=1,
     main="Mercer-Hall trial, straw yield, lbs/plot",
     xlim=c(4.5,9), ylim=c(4.5,9));
abline(regr.actual.pred, col="red")
abline(0,1); grid()
text(4.5, 8.5, paste("Gain:", round(coef(regr.actual.pred)[2], 2)),
     pos=4, col="red")
legend(7.5, 5, c("1:1","regression"), lty=1, col=c("black","red"))


## -------------------------------------------------------------------
regr.actual.pred.0 <- lm(I(actual - pred) ~ pred)
summary(regr.actual.pred.0)
plot(I(actual - pred) ~ pred, ylab="Actual - Predicted",
     xlab="Predicted",
     main="Mercer-Hall trial, straw yield, lbs/plot")
grid(); abline(regr.actual.pred.0, col="red"); abline(h=0)
text(5, 1.6, paste("Slope:",
                   round(coef(regr.actual.pred.0)[2], 2)),
     pos=4, col="red")
legend(7,3, c("1:1","regression"), lty=1, col=c("black","red"))


## -------------------------------------------------------------------
coef(regr.actual.pred)[2] - coef(regr.actual.pred.0)[2]


## -------------------------------------------------------------------
b <- coef(regr.actual.pred)[2]; names(b) <- NULL; print(b)


## -------------------------------------------------------------------
(valid.msd.pred <- mean((pred - mean(pred))^2))


## -------------------------------------------------------------------
(valid.nu <- (1 - b)^2 * valid.msd.pred)
valid.nu/valid.msd*100


## -------------------------------------------------------------------
(valid.msd.actual <- mean((actual - mean(actual))^2))


## -------------------------------------------------------------------
(r2 <- summary(regr.actual.pred)$r.squared)
(r2 <- cor(actual, pred)^2)
(valid.lc <- (1 - r2) * valid.msd.actual)
valid.lc/valid.msd * 100


## -------------------------------------------------------------------
print(valid.msd - (valid.sb + valid.nu + valid.lc))


## -------------------------------------------------------------------
cal.straw.grain.00 <- lm(straw ~ grain - 1, data=mhw, subset=index.calib)
summary(cal.straw.grain.00)


## -------------------------------------------------------------------
plot(straw ~ grain, data=mhw, subset=index.calib, xlim=c(0,6), ylim=c(0,9))
title("Mercer-Hall trial, calibration dataset")
abline(cal.straw.grain, lty=2)
abline(cal.straw.grain.00, col="red")
grid()
legend(4, 1, c("with intercept","no intercept"), 
       lty=c(2,1), col=c("black","red"))
text(0, 2.5, paste("Slope:",round(coef(cal.straw.grain)[2],2)), pos=4)
text(1, 0.5, paste("Slope:",round(coef(cal.straw.grain.00)[1],2)), col="red")


## -------------------------------------------------------------------
pred <- predict.lm(cal.straw.grain.00, newdata=mhw[index.valid,])
summary(pred)


## -------------------------------------------------------------------
regr.actual.pred.00 <- lm(actual ~ pred)
plot(actual ~ pred, ylab="Actual", xlab="Predicted", asp=1,
     main="Mercer-Hall trial, straw yield, lbs/plot",
     xlim=c(4.5,9), ylim=c(4.5,9));
abline(regr.actual.pred.00, col="red")
abline(0,1); grid()
text(4.5, 8.5, paste("Gain:",
                     round(coef(regr.actual.pred.00)[2], 2)),
     pos=4, col="red")
legend(7.5, 5, c("1:1","regression"), lty=1,
       col=c("black","red"))


## -------------------------------------------------------------------
(msd.00 <- mean((actual - pred)^2))
(rmsep.00 <- sqrt(msd.00))
(sb.00 <- (mean(pred) - mean(actual))^2)
(nu.00 <- (1 - coef(regr.actual.pred.00)[2])^2 * mean((pred - mean(pred))^2))
(lc.00 <- (1 - cor(actual, pred)^2) * mean((actual - mean(actual))^2))


## -------------------------------------------------------------------
sb.00/msd.00*100
nu.00/msd.00*100
lc.00/msd.00*100
msd.00 - (sb.00 + nu.00 + lc.00)


## ----eval=T---------------------------------------------------------
rm(n, index.valid, index.calib, actual)
rm(cal.straw.grain, pred)
rm(valid.msd, rmse)
rm(regr.actual.pred, regr.actual.pred.0, valid.bias, valid.sb, valid.lc)
rm(b, valid.nu, valid.msd.pred, valid.msd.actual, r2)
rm(cal.straw.grain.00, regr.actual.pred.00, msd.00, rmsep.00, sb.00, nu.00, lc.00)



## ----mhw1d, child='mhw_1d.Rnw'--------------------------------------

## ----set-parent-1d, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## ----function.Xval--------------------------------------------------
Xval <- function(model, dset) {
  pred <- rep(0, nrow(dset))
  n <- length(coefficients(model))
  coef <- matrix(0, nrow=nrow(dset), ncol=n)
  colnames(coef) <- paste("b", as.character(0:(n-1)) , sep="")
  for (i in 1:nrow(dset)) {
    m <- lm(formula(model), data=dset[-i,]);
    pred[i] <- predict(m, newdata=dset[i,])
    coef[i,] <- coefficients(m)
  }
  return(list(pred=pred, coef=coef))
}


## ----function.Xval.apply--------------------------------------------
xval.fit <- Xval(model.straw.grain, mhw)
str(xval.fit)


## -------------------------------------------------------------------
lim <- range(xval.fit$pred, mhw$straw)
plot(mhw$straw ~ xval.fit$pred, asp=1, xlim=lim, ylim=lim, 
     xlab="LOOCV prediction", ylab="Actual")
abline(0,1)
grid()


## -------------------------------------------------------------------
xval.res <- xval.fit$pred - mhw$straw
summary(xval.res)
hist(xval.res, main="LOOCV residuals")
rug(xval.res)


## -------------------------------------------------------------------
sqrt(sum(xval.res^2)/nrow(mhw))
sqrt(sum(residuals(model.straw.grain)^2)/(model.straw.grain$df.residual))
print(valid.rmsep)


## -------------------------------------------------------------------
summary(xval.fit$coef, digits=5)
coefficients(model.straw.grain)


## ----echo=FALSE-----------------------------------------------------
round(sqrt(sum(xval.res^2)/nrow(mhw)),4)
round(valid.rmsep,4)
round(sqrt(sum(residuals(model.straw.grain)^2)/(model.straw.grain$df.residual)),4)
round(diff(range(xval.fit$coef[,2])),4)
round(coefficients(model.straw.grain)["grain"],4)
round(diff(range(xval.fit$coef[,1])),4)
round(coefficients(model.straw.grain)[1],4)



## ----mhw2, child='mhw_2.Rnw'----------------------------------------

## ----set-parent-2, echo=FALSE, cache=FALSE--------------------------
set_parent('mhw.Rnw')


## ----echo=FALSE, results="hide"-------------------------------------
options(prompt="> ", continue="+ ", digits=5, width=70)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
with(mhw, plot(c, r, type="n", xlab="column", ylab="row", ylim=c(20,1),
     main="Layout of the Mercer-Hall uniformity trial"))
abline(v=1:25, lty=1, col="darkgray")
abline(h=1:20, lty=1, col="darkgray")
   # rownames() gives the plot number
with(mhw, text(c,r, rownames(mhw), cex=.5))


## -------------------------------------------------------------------
with(mhw,
     plot(c, r, pch=21, col="black", bg="lightblue", ylim=c(20,1),
          xlab="column", ylab="row",
          main="Mercer-Hall uniformity trial",
          sub="Area of circles proportional to grain yield",
          cex=2*grain/max(grain)))


## -------------------------------------------------------------------
   # compute cutoff points for the octiles
(q8 <- quantile(mhw$grain, seq(0, 1, length=9)))
   # classify each observation
grain.c <- cut(mhw$grain, q8, include.lowest=T, labels=F)
sort(unique(grain.c))


## -------------------------------------------------------------------
   # look at the colour ramp: these are RRGGBB from 00 to FF (hex)
terrain.colors(8)


## -------------------------------------------------------------------
print(0xf2/0xff)


## -------------------------------------------------------------------
with(mhw,
     plot(c, r, pch=20, cex=2, bg="lightblue", ylim=c(20,1),
          xlab="column", ylab="row",
          main="Mercer-Hall uniformity trial",
          sub="Colour of circles from low yield (green) to high (gray)",
          col=terrain.colors(8)[grain.c]))


## ----out.width='0.8\\textwidth'-------------------------------------
plot(wireframe(grain ~ r + c, data=mhw, drape=T,
               aspect=c(1,.2), col.regions=sp::bpy.colors(128),
               main="Grain yield, lb. per plot",
               screen= c(z=30, x=-60),
               xlab="N to S", ylab="W to E",
               sub="Looking SE from NW corner of field"))


## ----out.width='0.8\\textwidth'-------------------------------------
plot(wireframe(grain ~ r + c, data=mhw, drape=T,
               aspect=c(1,.08), col.regions=bpy.colors(128),
               main="Grain yield, lb. per plot",
               screen= c(z=270, x=-75), zlab="",
               xlab="N to S", ylab="W to E",
               sub="Looking N from S end of field"))


## -------------------------------------------------------------------
ha2ac <- 2.471054
ft2m <- .3048
(field.area <- 10000/ha2ac)


## -------------------------------------------------------------------
(plot.area <- field.area/500)
(plot.len <- sqrt(field.area)/20)
(plot.wid <- sqrt(field.area)/25)
rm(ha2ac, ft2m, field.area)


## -------------------------------------------------------------------
(tot.len <- plot.len*20)
(tot.wid <- plot.wid*25)
tot.len * tot.wid
rm(tot.len, tot.wid)


## -------------------------------------------------------------------
plot.wid/2
plot.len/2


## -------------------------------------------------------------------
nrow <- length(unique(mhw$r))
ncol <- length(unique(mhw$c))
sx <- seq(plot.wid/2, plot.wid/2+(ncol-1)*plot.wid, length=ncol)
sy <- seq(plot.len/2+(nrow-1)*plot.len, plot.len/2, length=nrow)
xy <- expand.grid(x=sx, y=sy)
rm(nrow, ncol, sx, sy)


## -------------------------------------------------------------------
require(sf); require(gstat); require(lattice)


## -------------------------------------------------------------------
mhw.sf <- cbind(mhw, xy)
names(mhw.sf)
mhw.sf <- st_as_sf(mhw.sf, coords = c("x", "y"))
class(mhw.sf)
summary(mhw.sf)
st_bbox(mhw.sf)


## -------------------------------------------------------------------
save(mhw.sf, file="mhw_spatial.RData")


## -------------------------------------------------------------------
library(ggplot2)
library(gridExtra)


## ----fig.width=6, fig.height=6, out.width='\\textwidth'-------------
g1 <- ggplot() +
    geom_sf(data = mhw.sf, aes(colour = grain), shape = 15, size = 3) +
    scale_colour_gradientn(colours = bpy.colors(8)) +
    labs(title = "Grain yield", colour = "lbs/plot")
#
g2 <- ggplot() +
    geom_sf(data = mhw.sf, aes(colour = straw), shape = 15, size = 3) +
    scale_colour_gradientn(colours = heat.colors(8)) +
    labs(title = "Straw yield", colour = "lbs/plot")
#
g3 <- ggplot() +
    geom_sf(data = mhw.sf, aes(colour = gsr), shape = 15, size = 3) +
    scale_colour_gradientn(colours = terrain.colors(8)) +
    labs(title = "Grain/straw ratio", colour = "")
grid.arrange(g1, g2, g3, nrow = 2)


## ----out.width='0.7\\textwidth'-------------------------------------
ggplot() +
    geom_sf(data = mhw.sf, aes(size=grain, colour = grain)) +
    scale_colour_gradientn(colours = bpy.colors(8)) +
    scale_size_binned(n.breaks = 8) +
    labs(title = "Grain yield", 
         subtitle="Symbol size proportional to yield",
         colour = "lbs/plot", size ="")


## -------------------------------------------------------------------
with(mhw, sort(by(grain, r, mean), decreasing=FALSE))
with(mhw, sort(by(grain, c, mean), d=F))


## -------------------------------------------------------------------
  # show the rows horizontally
boxplot(grain ~ r, horizontal=T,  data=mhw, xlim=c(20,1),
        ylab="Row number", xlab="Grain yield, lb. per plot")


## -------------------------------------------------------------------
# and the columns vertically
boxplot(grain ~ c, data=mhw,
        xlab="Column number", ylab="Grain yield, lb. per plot")


## ----fig.width=12, fig.height=6, out.width='\\textwidth'------------
g1 <- ggplot(data = mhw) +
    geom_boxplot(aes(y = as.factor(r), x = grain)) +
    scale_y_discrete(limits = rev) +
    labs(y = "Row number", x = "Grain yield, lbs/plot")
#
g2 <- ggplot(data = mhw) +
    geom_boxplot(aes(x = as.factor(c), y = grain)) +
    labs(x = "Column number", y = "Grain yield, lbs/plot")
grid.arrange(g1, g2, nrow = 1)


## ----ts1------------------------------------------------------------
ts1 <- lm(mhw.sf$grain ~ st_coordinates(mhw.sf))
summary(ts1)


## -------------------------------------------------------------------
rm(ts1)


## ----out.width='0.55\\textwidth'------------------------------------
v <- variogram(grain ~ 1, mhw.sf,
               cutoff=plot.wid*10, width=plot.wid)
print(plot(v, plot.numbers=T, pch=20, cex=1.5))


## -------------------------------------------------------------------
(vm <- vgm(0.15, "Sph", 5, 0.02))
(vm <- vgm(0.03, "Sph", 20, add.to=vm))
print(plot(v, model= vm, main="Estimated variogram model",
           pch = 20, cex = 1.5,
           plot.numbers = TRUE))


## -------------------------------------------------------------------
(vmf <- fit.variogram(v, vm))
print(plot(v, model= vmf, main="Fitted variogram model",
           pch = 20, cex = 1.5,
           plot.numbers = TRUE))


## ----rm-ts1---------------------------------------------------------
set.seed(4502)
head(mhw$grain)
head(s1 <- sample(mhw$grain, length(mhw$grain), replace=FALSE))
head(s2 <- sample(mhw$grain, length(mhw$grain), replace=FALSE))
head(s3 <- sample(mhw$grain, length(mhw$grain), replace=FALSE))
head(s1)
head(s2)
head(s3)


## ----fig.height=8, fig.width=8, out.width='\\textwidth'-------------
par(mfrow=c(2,2))
plot(mhw$c, mhw$r, pch=20, cex=2, bg="lightblue", ylim=c(20,1),
     xlab="column", ylab="row", main="Randomization 1",
     col=terrain.colors(8)[cut(s1, q8, include.lowest=T, labels=F)])
plot(mhw$c, mhw$r, pch=20, cex=2, bg="lightblue", ylim=c(20,1),
     xlab="column", ylab="row", main="Randomization 2",
     col=terrain.colors(8)[cut(s2, q8, include.lowest=T, labels=F)])
plot(mhw$c, mhw$r, pch=20, cex=2, bg="lightblue", ylim=c(20,1),
     xlab="column", ylab="row", main="Randomization 3",
     col=terrain.colors(8)[cut(s3, q8, include.lowest=T, labels=F)])
plot(mhw$c, mhw$r, pch=20, cex=2, bg="lightblue", ylim=c(20,1),
     xlab="column", ylab="row", main="Actual spatial distribution",
     col=terrain.colors(8)[grain.c])
par(mfrow=c(1,1))


## -------------------------------------------------------------------
s1 <- data.frame(s1); coordinates(s1) <- xy
s2 <- data.frame(s2); coordinates(s2) <- xy
s3 <- data.frame(s3); coordinates(s3) <- xy
pv <- plot(variogram(grain ~ 1, mhw.sf, cutoff=plot.wid*10,
                     width=plot.wid), 
           main="Real", pch = 20, cex = 1.5)
p1 <- plot(variogram(s1 ~ 1, s1, cutoff=plot.wid*10,
                     width=plot.wid),
           main="Simulation 1", pch = 20, cex = 1.5)
p2 <- plot(variogram(s2 ~ 1, s2, cutoff=plot.wid*10,
                     width=plot.wid),
           main="Simulation 2", pch = 20, cex = 1.5)
p3 <- plot(variogram(s3 ~ 1, s3, cutoff=plot.wid*10,
                     width=plot.wid),
           main="Simulation 3", pch = 20, cex = 1.5)


## ----fig.height=10, fig.width=10, out.width='0.8\\textwidth'--------
print(p1, split=c(1,1,2,2), more=T)
print(p2, split=c(2,1,2,2), more=T)
print(p3, split=c(1,2,2,2), more=T)
print(pv, split=c(2,2,2,2), more=F)


## -------------------------------------------------------------------
rm(xy, q8, grain.c, s1, s2, s3, pv, p1, p2, p3)


## -------------------------------------------------------------------
table(mhw.sf$in.north)
mhw.sf.ns <- split(mhw.sf, mhw.sf$in.north)
summary(mhw.sf.ns[["TRUE"]])
summary(mhw.sf.ns[["FALSE"]])


## -------------------------------------------------------------------
v.n <- variogram(grain ~ 1, mhw.sf.ns[["TRUE"]], cutoff=30)
v.s <- variogram(grain ~ 1, mhw.sf.ns[["FALSE"]], cutoff=30)


## -------------------------------------------------------------------
g.max = max(v$gamma, v.n$gamma, v.s$gamma)*1.2
plot.vgm.all <- plot(v, plot.numbers=T, pch=20,
                     main="All", ylim=c(0,g.max))
plot.vgm.N <- plot(v.n, plot.numbers=T, pch=20,
                   main="N half", ylim=c(0,g.max))
plot.vgm.S <- plot(v.s, plot.numbers=T, pch=20,
                   main="S half", ylim=c(0,g.max))


## ----fig.width=18, fig.height=6, out.width='\\textwidth'------------
   # split screen and show all three
   # must save the plots and then put them into one window
   # make a stretched window to compare all 3
#windows(h=8, w=20)  # might need smaller numbers on your screen
print(plot.vgm.all, split=c(1,1,3,1), more=T)
print(plot.vgm.N, split=c(2,1,3,1), more=T)
print(plot.vgm.S, split=c(3,1,3,1), more=F)


## -------------------------------------------------------------------
(vmS <- vgm(.14, "Sph", 20, .09))
(vmN <- vgm(.08, "Sph", 13 , .11))
(vmSf <- fit.variogram(v.s, vmN))
(vmNf <- fit.variogram(v.n, vmS))
plot.vgm.all <- plot(v, plot.numbers=T, pch=20, 
                     main="All", model=vmf, ylim=c(0,g.max))
plot.vgm.N <- plot(v.n, plot.numbers=T, pch=20, 
                   main="N half", model=vmNf, ylim=c(0,g.max))
plot.vgm.S <- plot(v.s, plot.numbers=T, pch=20, 
                   main="S half", model=vmSf, ylim=c(0,g.max))


## ----fig.width=18, fig.height=6, out.width='\\textwidth'------------
print(plot.vgm.all, split=c(1,1,3,1), more=T)
print(plot.vgm.N, split=c(2,1,3,1), more=T)
print(plot.vgm.S, split=c(3,1,3,1), more=F)


## -------------------------------------------------------------------
   # when done examining, clean up
rm(g.max, plot.vgm.all, plot.vgm.N, plot.vgm.S)


## -------------------------------------------------------------------
rm(v, v.n, v.s, vm, vmf, vmN, vmNf, vmS, vmSf)


## -------------------------------------------------------------------
rm(mhw.sf.ns)



## ----mhw2g, child='mhw_2g.Rnw'--------------------------------------

## ----set-parent-2g, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## ----echo=FALSE, results='hide'-------------------------------------
options(prompt="> ", continue="+ ", digits=5, width=70)
par(mfrow=c(1,1))


## ----show.lm.coef---------------------------------------------------
coef(model.straw.grain <- lm(straw ~ grain, data=mhw.sf))


## ----out.width='0.7\\textwidth'-------------------------------------
mhw.sf$gls.res <- residuals(model.straw.grain)
.lim <- round(max(abs(mhw.sf$gls.res)),2) + 0.1
ggplot() +
    geom_sf(data = mhw.sf, aes(colour = gls.res), shape = 15, size = 3) +
    scale_colour_distiller(palette = "RdBu", limits = c(-.lim, .lim)) +
    labs(title = "straw ~ grain residuals, lb. per plot", colour = "")


## ----out.width='0.55\\textwidth'------------------------------------
vr <- variogram(gls.res ~ 1, loc=mhw.sf, cutoff=plot.wid*10, width=plot.wid)
plot(vr, pl=T, pch=20, cex=1.5)


## -------------------------------------------------------------------
(vgmr <- fit.variogram(vr, model=vgm(0.15, "Sph", 20, 0.05)))
plot(vr, model=vgmr, pch=20, cex=1.5)


## -------------------------------------------------------------------
dnorm(x=c(-1.96,-1,-0.5,0,.5,1,1.96), mean=0, sd=1)


## -------------------------------------------------------------------
s <- seq(0.4, 2, by=.2)
data.frame(sd=s, p=dnorm(x=1, mean=0, sd=s))


## ----fig.width=10, fig.height=5,  out.width='\\textwidth'-----------
tmp <- rainbow(length(s))
curve(dnorm(x, mean=0, s[1]), -3, 3, col=tmp[1],
      main="Normal probability density",
      sub="Varying the standard deviation",
      ylab="density",xlab="residual")
for (i in 2:length(s))
     curve(dnorm(x, mean=0, sd=s[i]), -3, 3,
           col=tmp[i], add=T)
grid()
abline(v=1, lty=2)
legend(-3, 1, s, lty=1, col=tmp)


## ----function.like--------------------------------------------------
like <- function(beta0, beta1, sigma, x, y) {
        s2 <- sigma^2; n <- length(y)
        # compute fits given the coefficients
        pred <- beta0 + beta1 * x
        loglike <- -(n/2)*(log(2*pi)) - 
            (n/2)*(log(s2)) - 
            (1/(2*s2))*(sum((y-pred)^2))
        return(loglike) 
	} # like


## ----lm.fit.params--------------------------------------------------
coefficients(lm(straw ~ grain, data=mhw))
summary(lm(straw ~ grain, data=mhw))$sigma


## ----params.grid----------------------------------------------------
coef <- round(coefficients(lm(straw ~ grain, data=mhw)),5)
(beta0 <- coef[1]*seq(0.9,1.1,by=.02))
(beta1 <- coef[2]*seq(0.9,1.1,by=.02))
(sigma <- round(summary(lm(straw ~ grain, data=mhw))$sigma,5)*
     seq(0.9,1.1,by=.02))
beta <- expand.grid(beta0=beta0, beta1=beta1, sigma=sigma)
beta$loglik <- 0
dim(beta)
rm(coef, beta0, beta1, sigma)


## -------------------------------------------------------------------
for (i in 1:length(beta$loglik))
  beta$loglik[i] <- like(beta[i,"beta0"], beta[i,"beta1"], beta[i,"sigma"],
                         mhw$grain, mhw$straw)
summary(beta$loglik)
(beta.mle <- beta[which.max(beta$loglik),])


## ----fig.width=15, fig.height=5, out.width='\\textwidth'------------
par(mfrow=c(1,3))
tmp <- beta[(beta$sigma==beta.mle$sigma),]
tmp <- tmp[(tmp$beta1==beta.mle$beta1),]
plot(tmp$loglik ~ tmp$beta0, type="b", xlab="intercept", ylab="log-likelihood",
     main="Constant slope, s.d.")
grid()
abline(v=beta.mle$beta0)
tmp <- beta[(beta$sigma==beta.mle$sigma),]
tmp <- tmp[(tmp$beta0==beta.mle$beta0),]
plot(tmp$loglik ~ tmp$beta1, type="b", xlab="slope", ylab="log-likelihood",
     main="Constant intercept, s.d.")
grid()
abline(v=beta.mle$beta1)
tmp <- beta[(beta$beta1==beta.mle$beta1),]
tmp <- tmp[(tmp$beta0==beta.mle$beta0),]
plot(tmp$loglik ~ tmp$sigma, type="b", xlab="s.d.", ylab="log-likelihood",
     main="Constant intercept, slope")
grid()
abline(v=beta.mle$sigma)
par(mfrow=c(1,1))


## ----fig.width=10, fig.height=5, out.width='\\textwidth'------------
tmp <- beta[(beta$sigma==beta.mle$sigma),]
wireframe(loglik ~ beta0 + beta1,
          data = tmp, aspect=c(1,.5), drape=T,
          main="Log-likelihood, constant s.d.")


## -------------------------------------------------------------------
rm(like, beta, i, beta.mle, tmp)


## -------------------------------------------------------------------
require(nlme)
help(gls)


## ----fit.gls--------------------------------------------------------
coords <- st_coordinates(mhw.sf)
mhw.xy <- cbind(mhw.sf, coords)
names(mhw.xy)
system.time(
            model.gls.straw.grain <-
            gls(model = straw ~ grain,
                data = mhw.xy,
                correlation = corSpher(
                  value = c(vgmr[2,"range"],vgmr[1,"psill"]),
                  form = ~X + Y, nugget=T))
            )


## ----summary.gls----------------------------------------------------
summary(model.gls.straw.grain)


## -------------------------------------------------------------------
coefficients(model.gls.straw.grain)
coefficients(model.straw.grain)


## ----logLik.gls-----------------------------------------------------
logLik(model.gls.straw.grain)
logLik(model.straw.grain)
AIC(model.gls.straw.grain)
AIC(model.straw.grain)


## ----plot.ols.gls, out.width='0.7\\textwidth'-----------------------
plot(straw ~ grain, data=mhw, pch=20,
     sub="black: OLS; red: GLS")
grid()
abline(model.straw.grain)
abline(model.gls.straw.grain, col="red")


## ----plot.ols.gls.2, out.width='0.7\\textwidth'---------------------
ggplot(data = mhw.sf, aes(x = grain, y = straw)) +
    geom_point() +
    geom_smooth(method = "lm") +
    geom_abline(aes(intercept = coefficients(model.gls.straw.grain)[1],
                    slope = coefficients(model.gls.straw.grain)[2]), 
                color = 'red', lwd = 1) +
    labs(title = "GLS (red) vs. OLS (black) regressions")


## ----eval=F, echo=F, results='hide'---------------------------------
## (vgmr.exp <- fit.variogram(vr, model=vgm(0.15, "Exp", 20*3, 0.05)))
## plot(vr, model=vgmr.exp)
## model.gls.straw.grain.exp <-
##   gls(model=straw ~ grain,
##       data=as.data.frame(mhw.sf),
##       correlation=corExp(
##         value=c(vgmr.exp[2,"range"],vgmr.exp[1,"psill"]),
##         form=~x+y, nugget=T))
## summary(model.gls.straw.grain.exp)
## coefficients(model.gls.straw.grain)-
##   coefficients(model.gls.straw.grain.exp)
## plot(straw ~ grain, data=mhw, pch=20,
##      sub="black: OLS; red: GLS (corSpher), green: GLS (corExp)")
## grid()
## abline(model.straw.grain)
## abline(model.gls.straw.grain, col="red")
## abline(model.gls.straw.grain.exp, col="darkgreen")
## rm(model.gls.straw.grain.exp, vgmr.exp)


## -------------------------------------------------------------------
library(sp)
library(spgwr)


## ----sf.to.sp-------------------------------------------------------
mhw.sp <- as(mhw.sf, "Spatial")
str(mhw.sp)


## -------------------------------------------------------------------
(bw <- gwr.sel(straw ~ grain, data=mhw.sp, adapt=F, verbose=F))


## ----fit.vgm.gsr----------------------------------------------------
(vmf.gsr <- fit.variogram(v.gsr <- variogram(gsr ~ 1, loc=mhw.sf),
                          model=vgm(0.004, "Sph", 10, 0.002)))


## -------------------------------------------------------------------
(gwr.sg <- gwr(straw ~ grain, data = mhw.sp, bandwidth = bw))


## ----out.width='0.8\\textwidth'-------------------------------------
gwr.coef <- as(gwr.sg$SDF,"SpatialPixelsDataFrame")
print(spplot(gwr.coef, zcol="grain",
             col.regions=bpy.colors(64),
             key.space="right", cuts=8,
             main="Slope: straw ~ grain"))


## -------------------------------------------------------------------
print(spplot(gwr.coef, zcol="gwr.e",
             col.regions=cm.colors(64),
             key.space="right", cuts=8,
             main="Slope: straw ~ grain"))


## ----show-gwr-r-vgm, out.width='0.55\\textwidth'--------------------
vr.gwr <- variogram(gwr.e ~ 1,
                    loc=as(gwr.coef, "SpatialPointsDataFrame"),
                    cutoff=12, width=0.5)
plot(vr.gwr, plot.numbers=TRUE, pch=20)


## -------------------------------------------------------------------
(vmf.r.gwr <- fit.variogram(vr.gwr,
                            model=vgm(0.10, "Sph", 8, 0.25)))
plot(vr.gwr, plot.numbers=TRUE, pch=20,
     model=vmf.r.gwr)


## -------------------------------------------------------------------
vr.gls <- variogram(gls.res ~ 1, loc=mhw.sf, 
                    cutoff= plot.wid*10, width=plot.wid)
(vmf.r.gls <- fit.variogram(vr.gls, model=vgm(0.1, "Sph", 5, 0.2)))


## -------------------------------------------------------------------
ylim.plot=c(0, max(vr.gwr$gamma, vr.gls$gamma, vr$gamma))
plot(gamma ~ dist, data=vr.gwr, ylim=ylim.plot,
     type="b", lty=2, col="red", xlab="separation, m",
     ylab="semivariance, (lbs plot-1)^2",
     main="Regression model residuals")
lines(variogramLine(vmf.r.gwr, maxdist=max(vr.gwr$dist)),
      col="red")
points(gamma ~ dist, data=vr, type="b", lty=2, col="blue")
lines(variogramLine(vgmr, maxdist=max(vr.gwr$dist)),
      col="blue")
points(gamma ~ dist, data=vr.gls, type="b", lty=2, col="green")
lines(variogramLine(vmf.r.gls, maxdist=max(vr.gwr$dist)),
      col="green")
legend(8,0.15,c("OLS", "GLS", "GWR"), lty=1,
       col=c("blue","green","red"))



## ----mhw2m, child='mhw_2m.Rnw'--------------------------------------

## ----set-parent-2m, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## ----ggplot-scatter-------------------------------------------------
library(ggplot2)
ggplot(mhw, aes(grain, straw, col=gsr)) +
    geom_point()


## ----6clusters------------------------------------------------------
(gs.cluster <- kmeans(mhw[, c("grain","straw")],
                      centers=6, nstart = 20))


## ----ggplot-scatter-6-----------------------------------------------
mhw.sp$cluster6 <- as.factor(gs.cluster$cluster)
ggplot(data=mhw.sp@data, aes(grain, straw, color=cluster6)) +
    geom_point() +
    labs(colour="cluster6")


## ----show-6-zones, out.width='0.8\\textwidth'-----------------------
g <- ggplot(mhw.sp@data, aes(c, r, color=cluster6)) +
    geom_point(shape=15, size=5)
print(g)


## ----color-brewer---------------------------------------------------
require(RColorBrewer)
display.brewer.all(type="qual")


## ----show-6-zones-accent, out.width='0.8\\textwidth'----------------
g + scale_colour_brewer(name="cluster6", palette="Accent")



## ----mhw2a, child='mhw_2a.Rnw'--------------------------------------

## ----set-parent-2a, echo=FALSE, cache=FALSE-------------------------
set_parent('mhw.Rnw')


## ----echo=FALSE, results='hide'-------------------------------------
options(prompt="> ", continue="+ ", digits=5, width=70)
par(mfrow=c(1,1))
require(sp)
require(gstat)
require(lattice)


## ----fig=T----------------------------------------------------------
mhw.rc <- mhw; coordinates(mhw.rc) <- ~r +c
v.map <- variogram(grain ~ 1, loc=mhw.rc, map=TRUE, cutoff=10, width=1)
summary(v.map$map$var1)
class(v.map)
plot(v.map, col.regions=bpy.colors(64), 
     xlab="Row-wise (N-S) lag", ylab="Column-wise (W-E) lag")


## -------------------------------------------------------------------
c0 <- var(mhw$grain)
v.map$map$cov <- c0  - v.map$map$var1
v.map$map$cor <- v.map$map$cov/c0
v.map$map$cor[which(is.na(v.map$map$cor))] <- 1
summary(v.map$map$cor)


## ----fig=T, keep.source=T-------------------------------------------
str(v.map$map@data)
n <- sqrt(length(v.map$map$cor))
v.map.mat <- matrix(v.map$map$cor, nrow=n, ncol=n)
plot(wireframe(v.map.mat, drape=T, aspect=c(1,.25),
               screen=c(z=225, x=-60), ylab="Column-wise (W-E) lag",
               xlab="Row-wise (N-S) lag", zlab="rho(h)",
               main="Autocorrelation surface, grain yields",
               col.regions=bpy.colors(72)))


## -------------------------------------------------------------------
(c <- ceiling(n/2))
(v.map.mat[c:n,c])
(v.map.mat[c,c:n])


## ----fig.width=8, fig.height=4, out.width='\\textwidth'-------------
par(mfrow=c(1,2))
str(v.map.mat)
plot(v.map.mat[c,c:n], type="h", ylim=c(-.2,1),
     main="Along columns (E-W)", ylab=expression(rho),
     col="blue", xlab="lag (rows)")
abline(h=0)
plot(v.map.mat[c:n,c], type="h", ylim=c(-.2,1),
     main="Along rows (N-S)", ylab=expression(rho),
     col="darkgreen", xlab="lag (columns)")
abline(h=0)
par(mfrow=c(1,1))


## -------------------------------------------------------------------
# centre grain yields at zero
# organize as an array
m <- max(mhw$r); n <- max(mhw$c)
str(mhw.grain.matrix <- matrix(data = (mhw.rc$grain - mean(mhw.rc$grain)),
                               nrow = m, ncol = n))


## -------------------------------------------------------------------
## p, q lags in row, column direction
## positive column lags
cpq <- function(p, q, mat) {
  s <- 0
  for (i in 1:(m - p))
    for (j in 1:(n - q))
      s <- s + (mat[i,j]) * (mat[i+p, j+q])
  return((1/((m - p) * (n - q))) * s)
  }
## negative column lags
cpmq <- function(p, q, mat) {
  s <- 0
  for (i in 1:(m - p))
    for (j in (q+1):n)
      s <- s + (mat[i,j]) * (mat[i+p, j-q])
  return((1/((m - p) * (n - q))) * s)
  }


## -------------------------------------------------------------------
## symmetry relations
## c(-p,q) = c(p, -q); c(-p, -q) = c(p, q)
## compute for the desired combination of lags
max.l <- 14; d <- 2*max.l + 1
ch <- matrix(0, d, d)
for (lag.r in 1:max.l)
  for (lag.c in 1:max.l) {
    ## c(p, -q) : lower-left
    ch[(max.l+1) + lag.r, (max.l +1) - lag.c] <- 
        (cpmq(lag.r, lag.c, mhw.grain.matrix))
    ## c(-p, q) : upper-right
    ch[(max.l+1) - lag.r, (max.l+1) + lag.c] <- 
        ch[(max.l+1) + lag.r, (max.l +1) - lag.c]
  }
for (lag.r in 0:max.l)
  for (lag.c in 0:max.l) {
    ## c(p, q) : lower-right, including centre
    ch[(max.l+1) + lag.r, (max.l+1) + lag.c] <- 
        (cpq(lag.r, lag.c, mhw.grain.matrix))
    ## c(-p, -q) : upper-left
    ch[(max.l+1) - lag.r, (max.l+1) - lag.c] <- 
        ch[(max.l+1) + lag.r, (max.l+1) + lag.c]
  }
ch <- ch/var(mhw$grain)
summary(as.vector(ch))


## ----out.width='0.9\\textwidth'-------------------------------------
plot(wireframe(ch, drape=T, aspect=c(1,.25),
               screen=c(z=225, x=-60), ylab="Column-wise (W-E) lag",
               xlab="Row-wise (N-S) lag", zlab="rho(h)",
               main="Autocorrelation surface, grain yields",
               auto.key=T,
               col.regions=bpy.colors(72)))


## -------------------------------------------------------------------
grs <- function(r=0, s=0, t, L, cor.mat) {
  ## smoothing function: Bartlett lag window
  w <- function(x, y) {
    h <- sqrt(x^2 + y^2)
    return(ifelse(h <= L, 1 - (h/L),0))
  }
  max.p <- dim(cor.mat)[1]; max.q <- dim(cor.mat)[2]
  centre.p <- ((max.p-1)/2)+1; centre.q <- ((max.q-1)/2)+1
  sum <- 0
  for (q in -L:L) {
    if (centre.q+q+1 > max.q) break
    for (p in -L:L) {
      if (centre.p+p+1 > max.p) break
      ## cor.mat is dimensioned 1.. 2L+1
      ## p, q are dimensions -L...0... L
      ## so add offset to the matrix subscripts, from centre out
      s1 <- (cor.mat[centre.p+p,centre.q+q] * 
             w(p,q) * cos((pi/t)*((r*p) + (s*q))))
#     print(paste("x=",centre.p+p+L+1, "y=",centre.q+q+L+1,
#                 "s1=",round(s1,4)))
      sum <- sum + s1
    }
  }
  return(sum/(4*pi^2))
}


## -------------------------------------------------------------------
# theta <- 50; dens <- rep(0, 2*theta+1)
theta <- 50; dens <- rep(0, theta+1)
#for (r in -theta:theta)
# for (r in 0:theta)
#  dens[theta+r+1] <- grs(r, 0, t=theta, L=10 ,cor.mat=ch)
for (s in 0:theta)
  dens[s+1] <- grs(0, s, t=theta, L=10 ,cor.mat=ch)


## ----fig.width=8, fig.height=5, out.width='0.9\\textwidth'----------
plot(dens ~ seq(0, 0.5, length=theta+1), type="p", 
     main="Spectral density, W-E", sub="window size 10", 
     ylab="density", xlab="frequency, cycles", pch=20, cex=0.6)
dens.smooth <- spline(dens)
lines(dens.smooth$y ~ seq(0, 0.5, length=length(dens.smooth$x)), lty=1)
grid()


## ----keep.source=T--------------------------------------------------
l.s <- seq(4,14,by=2)
theta <- 50
dens <- matrix(rep(0, length(l.s)*(theta+1)),
               nrow=length(l.s))


## -------------------------------------------------------------------
for (i in 1:length(l.s)) {
  for (s in 0:theta) {
    dens[i,s+1] <- grs(0, s, theta, l.s[i] ,ch)
  }
}


## ----fig.width=8, fig.height=5, out.width='\\textwidth'-------------
plot(dens[1,] ~ seq(0, 0.5, length=theta+1), type="n",
     main="Spectral density, W-E", ylab="density", xlab="frequency, cycles",
     ylim=c(min(0,dens), max(dens)*1.1))
for (i in 1:length(l.s)) {
  dens.smooth <- spline(dens[i,])
  lines(dens.smooth$y ~ seq(0, 0.5, length=length(dens.smooth$x)), lty=i)
  text(0,max(dens[i,]),paste("L =",l.s[i],sep=""), pos=3)
}
grid()


## ----fig.width=8, fig.height=5, out.width='0.8\\textwidth'----------
theta <- 50; dens <- rep(0, theta+1)
for (r in 0:theta)
  dens[r+1] <- grs(r, 0, t=theta, L=10 ,cor.mat=ch)
plot(dens ~ seq(0, 0.5, length=theta+1), type="p",
     main="Spectral density, N-S", sub="window size 10",
     ylab="density", xlab="frequency, cycles", pch=20, cex=0.6)
dens.smooth <- spline(dens)
lines(dens.smooth$y ~ seq(0, 0.5, length=length(dens.smooth$x)), lty=1)
grid()


## ----out.width='0.8\\textwidth'-------------------------------------
theta=25
dens <- matrix(rep(0, (theta+1)^2), nrow=theta+1)
for (s in 0:theta)
  for (r in 0:theta)
    dens[r+1,s+1] <- grs(r, s, theta, 10 ,ch)
wireframe(dens, drape=T, aspect=c(1,.35), screen=c(z=225, x=-60),
          xlab="N-S frequency",
          ylab="E-W frequency", zlab="density",
          auto.key=T,
          col.regions=topo.colors(72)
          )


## ----eval=T---------------------------------------------------------
rm(mhw.rc, v.map, c0, v.map.mat, m, n, c, max.l, lag.r, lag.c)
rm(r, s, theta, dens, dens.smooth, l.s, grs, ch)



## ----mhw3, child='mhw_3.Rnw'----------------------------------------

## ----set-parent-3, echo=FALSE, cache=FALSE--------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
round(100 * sd(mhw$grain)/mean(mhw$grain),1)


## -------------------------------------------------------------------
cv <- function(x) { round(100*sd(x) / mean(x),1) }
class(cv)


## -------------------------------------------------------------------
cv(mhw$grain)


## ----keep.source=T--------------------------------------------------
plot.len; plot.wid; plot.area


## -------------------------------------------------------------------
plots <- data.frame(acre.fraction=c(500, 250, 125, 100, 50, 25, 10),
      adj.rows=c(1, 2, 4, 1, 2, 4, 10),
      adj.col=c(1, 1, 1, 5, 5, 5, 5))
row.names(plots) <- c("1/500", "1/250", "1/125", "1/100", "1/50", "1/25", "1/10")
str(plots)


## -------------------------------------------------------------------
plots <- cbind(plots, len = plot.len*plots$adj.row)
plots <- cbind(plots, wid = plot.wid*plots$adj.col)
plots <- cbind(plots, area = plots$len * plots$wid)
plots


## -------------------------------------------------------------------
head(mhw$r%%2, 20)


## -------------------------------------------------------------------
tmp <- unstack(mhw, grain ~ r%%2); str(tmp)
grain.250 <- tmp$X0 + tmp$X1; rm(tmp)
str(grain.250)
cv(grain.250)


## -------------------------------------------------------------------
plots.250 <- data.frame(r = seq(1.5, 19.5, by=2),
      c=rep(1:25, each=10),
      grain = grain.250)
str(plots.250)


## -------------------------------------------------------------------
with(mhw,
     plot(plots.250$c, plots.250$r, pch=20, cex=2,
          bg="lightblue", xlab="column", ylab="row",
          main="Grain yield of 1/250 acre plots",
          sub="Colour of circles from low yield (green) to high (gray)",
          xlim=c(1, 25), ylim=c(20, 1),
          col=terrain.colors(8)[cut(grain,
              quantile(grain, seq(0, 1, length=9)),
              include.lowest=T, labels=F)]))


## -------------------------------------------------------------------
tmp <- unstack(mhw, grain ~ r%%4); str(tmp)
grain.125 <- apply(tmp, 1, sum)
rm(tmp)
str(grain.125)
cv(grain.125)
plots.125 <- data.frame(r = seq(2, 18, by=4),
      c=rep(1:25, each=10),
      grain = grain.125)
str(plots.125)


## -------------------------------------------------------------------
tmp <- unstack(mhw, grain ~ c %% 5)
grain.100 <- apply(tmp, 1, sum)
rm(tmp)
cv(grain.100)
plots.100 <- data.frame(r = rep(1:20, each=5),
      c=seq(3, 23, by=5),
      grain = grain.100)
str(plots.100)


## -------------------------------------------------------------------
tmp <- unstack(plots.100, grain ~ r %% 2)
grain.50 <-  apply(tmp, 1, sum)
rm(tmp)
cv(grain.50)
plots.50 <- data.frame(r = rep(seq(1.5, 19.5, by=2), each=5),
      c=seq(3, 23, by=5),
      grain = grain.50)
str(plots.50)


## -------------------------------------------------------------------
tmp <- unstack(plots.100, grain ~ r %% 4)
grain.25 <-  apply(tmp, 1, sum)
rm(tmp)
cv(grain.25)
plots.25 <- data.frame(r = rep(seq(2.5, 18.5, by=4), each=5),
      c=seq(3, 23, by=5),
      grain = grain.25)
str(plots.25)


## -------------------------------------------------------------------
tmp <- unstack(plots.100, grain ~ r %% 10)
grain.10 <-  apply(tmp, 1, sum)
rm(tmp)
cv(grain.10)
plots.10 <- data.frame(r = rep(seq(5.5, 15.5, by=10), each=5),
      c=seq(3, 23, by=5),
      grain = grain.10)
str(plots.10)


## -------------------------------------------------------------------
summary(mhw$grain)
   summary(plots.250$grain)/2
   summary(plots.125$grain)/4
   summary(plots.100$grain)/5
   summary(plots.50$grain)/10
   summary(plots.25$grain)/20
   summary(plots.10$grain)/50


## -------------------------------------------------------------------
print(size.cv <- data.frame(area = plots$area, cv = c(
   cv(mhw$grain), cv(plots.250$grain), cv(plots.125$grain),
   cv(plots.100$grain), cv(plots.50$grain), cv(plots.25$grain),
   cv(plots.10$grain))))
plot(size.cv$area, size.cv$cv, xlab="Plot size, m^2",
     ylab="Coefficient of variation, %",
     main="Plot size vs. CV, Mercer-Hall grain", type="b",
     xlim=c(0,600))
grid()
text(size.cv$area, size.cv$cv, pos=4,
     paste(
        plots$adj.rows,
        " row",
        ifelse(plots$adj.rows==1,"","s"),
        ", ",
        plots$adj.col,
        " column",
         ifelse(plots$adj.col==1,"","s"),
         sep=""))


## ----eval=F---------------------------------------------------------
## rm(cv,
##    plot.len, plot.wid, plot.area,plots,
##    plots.250, plots.125, plots.100, plots.50,
##    plots.25, plots.10,
##    grain.250, grain.125, grain.100, grain.50,
##    grain.25, grain.10, size.cv)


## -------------------------------------------------------------------
plots[,c("len", "wid", "area")]


## ----echo=F, results='hide'-----------------------------------------
rm(cv,
   plots,
   plots.250, plots.125, plots.100, plots.50, plots.25, plots.10, grain.250, grain.125, grain.100, grain.50, grain.25,  grain.10,
   size.cv
   )



## ----mhwb, child='mhw_b.Rnw'----------------------------------------

## ----set-parent-b, echo=FALSE, cache=FALSE--------------------------
set_parent('mhw.Rnw')


## -------------------------------------------------------------------
head(colours(), 16)


## ----fig.height=6, fig.width=12, out.width='\\textwidth'------------
plot(seq(1:length(colors())), rep(2, length(colours())), type="h",
     lwd=2, col=colors(), ylim=c(0,1), xlab="Colour number",
     ylab="", yaxt="n",
     main="Colours available with the colour() function") 


## ----eval=F---------------------------------------------------------
## abline(h=0.5, lwd=3)
## (selected <- identify(seq(1:length(colors())),
##                       rep(0.5, length(colors()))))
## colors()[selected]; rm(selected)


## -------------------------------------------------------------------
palette(); palette()[2]


## -------------------------------------------------------------------
boxplot(mhw$straw ~ mhw$r, col=mhw$r, xlab="row",
        ylab="Straw yield, lbs plot-1")


## -------------------------------------------------------------------
palette(gray(seq(0,.9,len=20))); palette()
boxplot(mhw$straw ~ mhw$r, col=mhw$r,
        xlab="row", ylab="Straw yield, lbs plot-1")
palette("default"); palette()


## -------------------------------------------------------------------
as.numeric("0xe6")/as.numeric("0xff")


