## ----setup, include=FALSE, cache=FALSE-----------------------------------
# set global chunk options
## this code is only required to set up knitr for reproducible documentation
## it has nothing to do with the exercise itself
## please ignore!
opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'), out.format='latex', use.highlight=TRUE)
opts_chunk$set(fig.path='kgraph/gsintro', fig.align='center', fig.show='hold', prompt=FALSE, comment=NA, fig.width=6, fig.height=6, out.width='.7\\linewidth', size='scriptsize')
# options to be read by formatR
options(replace.assign=TRUE,width=70)


## ----echo=FALSE, results='hide'------------------------------------------
rm(list=ls())


## ----child-gs_load, child='gs_load.Rnw'----------------------------------

## ----load-set-parent, echo=FALSE, cache=FALSE----------------------------
set_parent('gs_intro.Rnw')


## ----eval=FALSE----------------------------------------------------------
install.packages(c("sp", "gstat"), dependencies=TRUE)


## ----load-first-expr-----------------------------------------------------
2*pi/360


## ----load-load-libraries-------------------------------------------------
library(sp)
library(gstat)


## ------------------------------------------------------------------------
search()


## ----load-show-datasets, eval=FALSE--------------------------------------
data()


## ----load-show-datasets-2, eval=FALSE------------------------------------
data(package="sp")


## ------------------------------------------------------------------------
ls()
data("meuse")
ls()


## ----load-str-df---------------------------------------------------------
str(meuse)


## ----load-show-df-as-matrix----------------------------------------------
dim(meuse)
dim(as.matrix(meuse))
str(as.matrix(meuse))
head(as.matrix(meuse))


## ----eval=FALSE----------------------------------------------------------
help(meuse)


## ----load-summary-meuse--------------------------------------------------
summary(meuse)


## ----child-gs_break, child='gs_break.Rnw'--------------------------------

## ----break-set-parent, echo=FALSE, cache=FALSE---------------------------
set_parent('gs_intro.Rnw')


## ----eval=FALSE----------------------------------------------------------
q()


## ------------------------------------------------------------------------
library(sp)
library(gstat)


## ----child-gs_uni, child='gs_uni.Rnw'------------------------------------

## ----uni-set-parent, echo=FALSE, cache=FALSE-----------------------------
set_parent('gs_intro.Rnw')


## ------------------------------------------------------------------------
meuse$zinc
sort(meuse$zinc)


## ----uni-show-hist-Zn, out.width='0.65\\textwidth'-----------------------
hist(meuse$zinc, breaks=16)
rug(meuse$zinc)
summary(meuse$zinc)


## ----uni-show-hist-logZn, out.width='0.65\\textwidth'--------------------
meuse$logZn <- log10(meuse$zinc)
hist(meuse$logZn, breaks=16)
rug(meuse$logZn)
summary(meuse$logZn)


## ------------------------------------------------------------------------
head(meuse$zinc)
head(meuse$logZn)


## ------------------------------------------------------------------------
meuse$logCu <- log10(meuse$copper)
str(meuse)


## ----child-gs_bi, child='gs_bi.Rnw'--------------------------------------

## ----bi-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ----bi-plot-logZnlogCu--------------------------------------------------
plot(meuse$logZn ~ meuse$logCu)


## ----bi-find-unusual-1---------------------------------------------------
which(meuse$logZn < 2.6)
which(meuse$logCu > 1.6)


## ----bi-find-unusual-2---------------------------------------------------
which((meuse$logZn < 2.6) & (meuse$logCu > 1.6))


## ----bi-find-unusual-3---------------------------------------------------
ix <- which((meuse$logZn < 2.6) & (meuse$logCu > 1.6))


## ----bi-show-unusual-----------------------------------------------------
meuse[ix, ]


## ----bi-row.names.meuse--------------------------------------------------
print(row.names(meuse))


## ----m-lzn-lcu-----------------------------------------------------------
m.lzn.lcu <- lm(logZn ~ logCu, data=meuse)


## ----bi-m-lzn-lcu-matrix-------------------------------------------------
model.matrix(m.lzn.lcu)[1:6,]
meuse$logCu[1:6]


## ------------------------------------------------------------------------
ls()
class(meuse)
class(m.lzn.lcu)


## ------------------------------------------------------------------------
summary(m.lzn.lcu)


## ----bi-plot-hist-LznLcu-resid, out.width='0.6\\textwidth'---------------
hist(residuals(m.lzn.lcu))


## ----bi-plot-m-lzn-lcu, out.width='\\textwidth', fig.width=12, fig.height=4----
par(mfrow=c(1,3))
plot(m.lzn.lcu, which=c(1,2,5))
par(mfrow=c(1,1))


## ----find-largest-resids-------------------------------------------------
(which.ix <- which(abs(residuals(m.lzn.lcu)) > 0.3))
# the records in the data frame that correspond to large absolute residuals
meuse[which.ix,]
# these large absolute residuals
residuals(m.lzn.lcu)[which.ix]
# the largest one
which.max(abs(residuals(m.lzn.lcu))[which.ix])


## ----bi-show.base.colours------------------------------------------------
palette()


## ----bi-plot-lZn-lCu-ffreq, out.width='0.75\\textwidth', keep.source=TRUE----
plot(meuse$logZn ~ meuse$logCu, asp=1, col=meuse$ffreq, pch=20,
     xlab="log10(Cu ppm)", ylab="log10(Zn ppm)")
abline(m.lzn.lcu)
legend("topleft", legend=c("2 years","10 years", "50 years"),
       pch=20, col=1:3)


## ----bi-ffreq.table------------------------------------------------------
table(meuse$ffreq)


## ----bi-boxplot-Zn-ffreq, out.width='0.6\\textwidth', keep.source=T------
boxplot(meuse$logZn ~ meuse$ffreq, xlab="Flood frequency class",
        ylab="log10-Zn ppm",
        main="Metal concentration per flood frequency class",
        boxwex=0.5, col="lightblue")


## ----bi-m-lzn-ff---------------------------------------------------------
m.lzn.ff <- lm(logZn ~ ffreq, data=meuse)


## ----bi-m-lzn-ff-matrix--------------------------------------------------
model.matrix(m.lzn.ff)[1:6,]


## ----bi-summary-lzn-ff---------------------------------------------------
summary(m.lzn.ff)


## ----bi-means------------------------------------------------------------
pred.ff <- predict(m.lzn.ff, newdata=data.frame(ffreq=as.factor(c(1:3))), se.fit=TRUE,
                   interval="prediction")
pred.ff$fit
pred.ff$se.fit


## ----bi-m-mixed----------------------------------------------------------
m.lzn.ff.lcu <- lm(logZn ~ ffreq + logCu, data=meuse)
summary(m.lzn.ff.lcu)


## ----bi-m-mixed-anova----------------------------------------------------
anova(m.lzn.ff.lcu, m.lzn.lcu)


## ----echo=FALSE, results='hide'------------------------------------------
# save anova results for answers
tmp.anova.result <- anova(m.lzn.ff.lcu, m.lzn.lcu)
-tmp.anova.result$Df[2]
-round(tmp.anova.result$"Sum of Sq"[2],3)
round(tmp.anova.result$"Pr(>F)"[2],2)


## ----bi-m-mixed-i--------------------------------------------------------
m.lzn.ff.lcu.i <- lm(logZn ~ ffreq * logCu, data=meuse)
summary(m.lzn.ff.lcu.i)
anova(m.lzn.ff.lcu.i, m.lzn.lcu)


## ----echo=FALSE, results='hide'------------------------------------------
# save anova results for answers
tmp.anova.result.i <- anova(m.lzn.ff.lcu.i, m.lzn.lcu)
-tmp.anova.result.i$Df[2]
-round(tmp.anova.result.i$"Sum of Sq"[2],3)
round(tmp.anova.result.i$"Pr(>F)"[2],2)


## ----bi-plot-mixed-i, keep.source=T--------------------------------------
with(meuse, plot(logZn ~ logCu, col=ffreq, pch = 20,
                 xlab = "log10(Cu)", ylab = "log10(Zn)"))
legend("topleft", legend=levels(meuse$ffreq), pch=20,
       col=1:3)
title(main = "Relation between log10(Zn) and log10(Cu)")
title(sub = 
          "Interaction: solid lines; per class; single: dashed line")
abline(lm(logZn ~ logCu, data = meuse),col = "purple",
       lty=3, lwd=2.5)
abline(lm(logZn ~ logCu, data = meuse,
          subset=(meuse$ffreq==1)),col = 1)
abline(lm(logZn ~ logCu, data = meuse,
          subset=(meuse$ffreq==2)),col = 2)
abline(lm(logZn ~ logCu, data = meuse,
          subset=(meuse$ffreq==3)),col = 3)


## ----child-gs_sp, child='gs_sp.Rnw'--------------------------------------

## ----sp-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ----sp-load-load-libraries----------------------------------------------
library(sp)
library(gstat)


## ----show-classes--------------------------------------------------------
class(1); class("A"); class(TRUE)
class(list(1, "A", TRUE))
class(as.matrix(1:9, nrow=3))
class(meuse)
class(meuse$ffreq)


## ----assign-coords-------------------------------------------------------
class(meuse)
coordinates(meuse) <- c("x","y")
class(meuse)


## ------------------------------------------------------------------------
str(meuse)


## ----spatvar-plot-meuse-riv----------------------------------------------
plot(meuse, asp=1, pch=1)
data(meuse.riv)
lines(meuse.riv)


## ----convert-meuse-grid--------------------------------------------------
data(meuse.grid)
class(meuse.grid)
coordinates(meuse.grid) <-c ("x","y")
gridded(meuse.grid) <- TRUE; fullgrid <- TRUE
class(meuse.grid)


## ----child-gs_trees, child='gs_trees.Rnw'--------------------------------

## ----trees-set-parent, echo=FALSE, cache=FALSE---------------------------
set_parent('gs_intro.Rnw')


## ----trees-load-if-necess, echo=FALSE, results='hide'--------------------
require(sp)
if (!exists("meuse")) data(meuse)
if (!exists("meuse$logZn")) meuse$logZn <- log10(meuse$zinc)


## ----trees-load-rpart----------------------------------------------------
library(rpart)
library(rpart.plot)


## ----trees-comp-rpart----------------------------------------------------
m.lzn.rp <- rpart(logZn ~ ffreq + dist + elev,
                  data=meuse,
                  minsplit=2,
                  cp=0.003)


## ----trees-print-unpruned------------------------------------------------
print(m.lzn.rp)


## ----trees-plot-unpruned, out.width='\\textwidth'------------------------
rpart.plot(m.lzn.rp, digits=3, type=4, extra=1)


## ----trees-count-leaves--------------------------------------------------
sum(m.lzn.rp$frame$var == '<leaf>')
sum(m.lzn.rp$frame$var !="<leaf>")


## ----trees-import-rpart--------------------------------------------------
x <- m.lzn.rp$variable.importance 
data.frame(variableImportance = 100 * x / sum(x))


## ----trees-plotcp-rp, out.width='\\textwidth'----------------------------
printcp(m.lzn.rp)
plotcp(m.lzn.rp)


## ----trees-find-min-xerror-----------------------------------------------
head(cp.table <- m.lzn.rp[["cptable"]],8)
(cp.ix <- which.min(cp.table[,"xerror"]))
print(cp.table[cp.ix,])
cp.min <- cp.table[cp.ix,"CP"]


## ----trees-cp-table-min--------------------------------------------------
(cp.min.plus.sd <- cp.table[cp.ix,"xerror"] + cp.table[cp.ix,"xstd"])
cp.ix.sd <- min(which(cp.table[,"xerror"] < cp.min.plus.sd))
print(cp.table[cp.ix.sd,])
cp.min.sd <- cp.table[cp.ix.sd,"CP"]


## ----trees-prune-rpart---------------------------------------------------
(m.lzn.rpp <- prune(m.lzn.rp, cp=cp.min))


## ----trees-plot-rpart----------------------------------------------------
rpart.plot(m.lzn.rpp, digits=3, type=4, extra=1)


## ----trees-predict-rpart, out.width='0.75\\textwidth', keep.source=TRUE----
p.rpp <- predict(m.lzn.rpp, newdata=meuse)
length(unique(p.rpp))
summary(r.rpp <- meuse$logZn - p.rpp)
sqrt(sum(r.rpp^2)/length(r.rpp))
plot(meuse$logZn ~ p.rpp, asp=1, pch=20, xlab="fitted", ylab="actual",
     xlim=c(2,3.3), ylim=c(2,3.3),
     main="log10(Zn), Meuse topsoils, Regression Tree")
grid()
abline(0,1)


## ----gam-idw-elev, keep.source=T-----------------------------------------
tmp <- idw(elev ~ 1, locations=meuse,
           nmax=16, idp=2, newdata=meuse.grid)
meuse.grid$elev <- tmp$var1.pred; rm(tmp)
pts.s <- list("sp.points", meuse, col="darkgreen",
              pch=1, cex=2*meuse$elev/max(meuse$elev))
print(spplot(meuse.grid, zcol="elev",
             col.regions=heat.colors(64),
             main="Elevation (m)",
             sub="Interpolated by IDW^2, 16 neighbours",
             sp.layout = list(pts.s)))
summary(meuse$elev)
summary(meuse.grid$elev)


## ----trees-predict-map---------------------------------------------------
rt.grid <- predict(m.lzn.rpp, newdata=meuse.grid)
meuse.grid$rt.pred <- rt.grid; rm(rt.grid)
spplot(meuse.grid, zcol="rt.pred",
       main="Regression tree prediction, log10(Zn)")


## ----trees-set-seed-classtree, echo=FALSE--------------------------------
# to get consistent results for comments
set.seed(6345789)


## ----trees-build-classtree-----------------------------------------------
m.ff.rp <- rpart(ffreq ~ dist + elev,
                  data=meuse,
                  minsplit=2,
#                  method="class",
                  cp=0)


## ----trees-show-classtree-cp---------------------------------------------
printcp(m.ff.rp)
plotcp(m.ff.rp)
cp.table.class <- m.ff.rp[["cptable"]]
# total variance explained is sum of the CP for all split
sum(cp.table.class[,"CP"])


## ----trees-root-node-error-----------------------------------------------
(n <- length(m.ff.rp$y))
(class.majority <- which.max(m.ff.rp$parms$prior))
(class.majority.proportion <- 
     m.ff.rp$parms$prior[class.majority])
(1 - (class.majority.proportion))


## ----trees-hide-classtree-cp, echo=FALSE,results='hide'------------------
# for the answers, others can see from the table
cp.ix.class <- which.min(cp.table.class[,"xerror"])
print(cp.table.class[cp.ix.class,])
(cp.min <- cp.table.class[cp.ix.class,"CP"])


## ----trees-prune-classtree-----------------------------------------------
(m.ff.rpp <- prune(m.ff.rp, cp=cp.table.class[3,"CP"]))
printcp(m.ff.rpp)


## ----trees-show-pruned-classtree, out.width='\\textwidth'----------------
par(mfrow=c(1,2))
rpart.plot(m.ff.rpp, type=4, extra=1)
rpart.plot(m.ff.rpp, type=4, extra=4)
par(mfrow=c(1,1))


## ----trees-model-rf, keep.source=TRUE------------------------------------
library(randomForest)
m.lzn.rf <- randomForest(logZn ~ ffreq + dist+ elev, data=meuse,
                         importance=T, na.action=na.omit, mtry=2)
print(m.lzn.rf)


## ----trees-mse-rf--------------------------------------------------------
plot(m.lzn.rf)


## ----trees-varimp-rf-----------------------------------------------------
importance(m.lzn.rf, type=1)
varImpPlot(m.lzn.rf, type=1)


## ----trees-predict-rf, out.width='0.75\\textwidth', keep.source=TRUE-----
p.rf <- predict(m.lzn.rf, newdata=meuse)
length(unique(p.rf))
summary(r.rpp <- meuse$logZn - p.rf)
sqrt(sum(r.rpp^2)/length(r.rpp))
plot(meuse$logZn ~ p.rf, asp=1, pch=20, xlab="fitted", ylab="actual",
     xlim=c(2,3.3), ylim=c(2,3.3),
     main="log10(Zn), Meuse topsoils, Random Forest")
grid()
abline(0,1)


## ----trees-oob-rf, keep.source=TRUE--------------------------------------
p.rf.oob <- predict(m.lzn.rf)
summary(r.rpp.oob <- meuse$logZn - p.rf.oob)
sqrt(sum(r.rpp.oob^2)/length(r.rpp.oob))
plot(meuse$logZn ~ p.rf.oob, asp=1, pch=20,
     xlab="Out-of-bag cross-validation estimates",
     ylab="actual", xlim=c(2,3.3), ylim=c(2,3.3),
     main="log10(Zn), Meuse topsoils, Random Forest")
grid()
abline(0,1)


## ----rf-predict-map------------------------------------------------------
rf.grid <- predict(m.lzn.rf, newdata=meuse.grid)
meuse.grid$rf.pred <- rf.grid; rm(rf.grid)
spplot(meuse.grid, zcol="rf.pred",
       main="Random forest prediction, log10(Zn)")


## ----trees-build-classRF-------------------------------------------------
library(randomForest)
m.ff.rf <- randomForest(ffreq ~ dist + elev,
                        data=meuse,
                        importance=T,
#                        type="class",
                        mtry=2)
print(m.ff.rf)
importance(m.ff.rf, type=1)


## ----trees-error-rate-classRF, out.width='0.9\\textwidth', fig.width=10, fig.height=7----
plot(m.ff.rf)
palette()


## ----trees-oob-classRF---------------------------------------------------
p.rf <- predict(m.ff.rf)
table(meuse$ffreq) # observed
table(p.rf)        # predicted
(p.rf.cm <- table(p.rf, meuse$ffreq, dnn=c("Predicted","Observed")))


## ----trees-oob-classRF-matrix--------------------------------------------
(d <-diag(p.rf.cm))
(row.sums <- apply(p.rf.cm, MARGIN=1, "sum"))
(mu.purity <- d/row.sums)


## ----child-gs_spatvar, child='gs_spatvar.Rnw'----------------------------

## ----spatvar-set-parent, echo=FALSE, cache=FALSE-------------------------
set_parent('gs_intro.Rnw')


## ----spatvar-plot-meuse-Zn-riv-------------------------------------------
plot(meuse, asp=1, cex=4*meuse$zinc/max(meuse$zinc), pch=1)
lines(meuse.riv)


## ------------------------------------------------------------------------
n <- length(meuse$logZn)
n*(n-1)/2


## ------------------------------------------------------------------------
dim(coordinates(meuse))
coordinates(meuse)[1,]
coordinates(meuse)[2,]
(sep <- dist(coordinates(meuse)[1:2,]))
(gamma <- 0.5 * (meuse$logZn[1] - meuse$logZn[2])^2)


## ----show-vgm-cloud, out.width='0.55\\textwidth'-------------------------
(vc <- variogram(logZn ~ 1, meuse, cutoff=72, cloud=TRUE))
plot(vc$gamma ~ vc$dist, xlim=c(0,80), pch=20, cex=2,
     ylab="semivariance, (log10(Zn)^2)", xlab="separation, m")
grid()


## ----which-vgm-cloud-large-----------------------------------------------
order(vc$gamma, decreasing=TRUE)
order(vc$gamma, decreasing=TRUE)[1]
which.max(vc$gamma)
(unusual.pair <- vc[which.max(vc$gamma),])
meuse[138, "logZn"]; meuse[76, "logZn"]
(gamma.76.138 <- 0.5 * (meuse@data[138, "logZn"] - meuse@data[76, "logZn"])^2)


## ----spatvar-show-vgm-logZn, out.width='0.55\\textwidth'-----------------
(v <- variogram(logZn ~ 1, meuse, cutoff=1300, width=90))
print(plot(v, plot.numbers=T))


## ----spatvar-showvgms, out.width='\\textwidth'---------------------------
print(show.vgms())


## ----spatvar-plot-guess-vgm-model, out.width='0.55\\textwidth'-----------
vm <- vgm(psill=0.12,model="Sph",range=850,nugget=0.01)
print(plot(v, pl=T, model=vm))


## ----spatvar-plot-fitted-vgm-model---------------------------------------
(vmf <- fit.variogram(v, vm))
print(plot(v, pl=T, model=vmf))


## ----child-gs_ok, child='gs_ok.Rnw'--------------------------------------

## ----ok-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ------------------------------------------------------------------------
k40 <- krige(logZn ~ 1, locations=meuse, newdata=meuse.grid, model=vmf)


## ------------------------------------------------------------------------
str(k40)


## ----ok-plot-k40pred, keep.source=T--------------------------------------
print(spplot(k40, "var1.pred", asp=1, col.regions=bpy.colors(64),
             main="OK prediction, log-ppm Zn"))


## ----ok-bpy--------------------------------------------------------------
head(bpy.colors(64))
bpy.colors(64)[32]


## ----ok-plot-k40var, keep.source=T---------------------------------------
print(spplot(k40, "var1.var",
             col.regions=cm.colors(64),
             asp=1,
             main="OK prediction variance, log-ppm Zn^2"))


## ----ok-plot-k40pred-pts, keep.source=T----------------------------------
pts.s <- list("sp.points", meuse, col="white",
              pch=1, cex=4*meuse$zinc/max(meuse$zinc))
print(spplot(k40, "var1.pred", asp=1, col.regions=bpy.colors(64),
             main="OK prediction, log-ppm Zn",
             sp.layout = list(pts.s)
             ))


## ----ok-plot-k40var-pts, keep.source=T-----------------------------------
pts.s <- list("sp.points", meuse, col="black", pch=20)
print(spplot(k40, zcol="var1.var",
             col.regions=cm.colors(64), asp=1,
             main="OK prediction variance, log-ppm Zn^2",
             sp.layout = list(pts.s)
             ))



## ----child-gs_ik, child='gs_ik.Rnw'--------------------------------------

## ----ik-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ----ik-compute-indicator------------------------------------------------
meuse$zn.i <- (meuse$zinc < 150)
summary(meuse$zn.i)


## ----ik-show-i-vi--------------------------------------------------------
vi <- variogram(zn.i ~ 1, location=meuse, cutoff=1500)
print(plot(vi, pl=T))


## ----ik-show-i-vimf------------------------------------------------------
(vimf <- fit.variogram(vi, vgm(0.12, "Sph", 1300, 0)))
print(plot(vi, pl=T, model=vimf))


## ----ik-spplot-k40i-pred, out.width='0.65\\textwidth', keep.source=T-----
k40.i <- krige(zn.i ~ 1, loc=meuse,
               newdata=meuse.grid, model=vimf)
print(spplot(k40.i, zcol="var1.pred",
             col.regions=heat.colors(64), asp=1,
             main="Probability Zn < 150"))


## ----ik-spplot-k40i-var, out.width='0.65\\textwidth', keep.source=T------
k40.i$var1.sd <- sqrt(k40.i$var1.var)
print(spplot(k40.i, zcol="var1.sd",
             col.regions=cm.colors(64), asp=1,
             main="standard deviation, Probability Zn < 150"))


## ----ik-safe-80pct-------------------------------------------------------
k40.i$safe150.80pct <- ifelse((k40.i$var1.pred <= 0.8), TRUE, FALSE)
summary(k40.i$safe150.80pct)
print(spplot(k40.i, zcol="safe150.80pct", asp=1,
             main="(p >= 0.8) below critical level (Zn < 150)",
             colorkey=FALSE,
             col.regions=c("lightgreen", "gray")))


## ----child-gs_rk, child='gs_rk.Rnw'--------------------------------------

## ----rk-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ----rk-summary-meuse-grid-ffreq, keep.source=T--------------------------
summary(meuse.grid$ffreq)
print(spplot(meuse.grid, zcol="ffreq",
             col.regions=topo.colors(3),
             main="Flooding frequency"))


## ----rk-spplot-kffreq-pred, out.width='0.55\\textwidth', keep.source=T----
k.ffreq <- krige(logZn ~ ffreq, locations=meuse,
                 newdata=meuse.grid, model=NULL)
print(spplot(k.ffreq, zcol="var1.pred",
             col.regions=bpy.colors(64),
             main="prediction by flood frequency, log-ppm Zn"))


## ----rk-spplot-kffreq-var, out.width='0.55\\textwidth', keep.source=T----
print(spplot(k.ffreq, zcol="var1.var",
             col.regions=cm.colors(64),
             main="prediction variance, log-ppm Zn^2"))


## ----rk-show-ffreq-vr, out.width='0.55\\textwidth', keep.source=T--------
(vr <- variogram(logZn ~ ffreq, location=meuse,
                 cutoff=1300, width=90))
print(plot(vr, plot.numbers=T,
           main="Residuals, flood frequency co-variable"))


## ------------------------------------------------------------------------
(vrmf <- fit.variogram(vr, vgm(psill=0.08, model="Sph", range=700, nugget=0.01)))
print(vmf)


## ----rk-show-ffreq-vr-model, out.width='0.55\\textwidth', keep.source=T----
print(plot(vr, plot.numbers=T, model=vrmf,
           main="Residuals, flood frequency co-variable"))


## ----keep.source=T-------------------------------------------------------
kr40 <- krige(logZn ~ ffreq, locations=meuse,
              newdata=meuse.grid, model=vrmf)


## ----rk-spplot-kr40, keep.source=T---------------------------------------
print(spplot(kr40, "var1.pred", asp=1,
             col.regions=bpy.colors(64),
             main="KED-ffreq prediction, log-ppm Zn"))


## ----keep.source=T-------------------------------------------------------
(zmax <- max(k40$var1.pred,kr40$var1.pred))
(zmin <- min(k40$var1.pred,kr40$var1.pred))
(zmax <- round(zmax, 1) + 0.1)
(zmin <- round(zmin, 1) - 0.1)
(ramp <- seq(from=zmin, to=zmax, by=.1))
p1 <- spplot(k40, "var1.pred", asp=1, main="OK prediction",
             at=ramp, col.regions=bpy.colors(64))
p2 <- spplot(kr40, "var1.pred", asp=1, main="KED-ffreq prediction",
             at=ramp, col.regions=bpy.colors(64))


## ----rk-compare-k40-kr40, out.width='\\textwidth', fig.width=10, fig.height=5----
plot(p1, split=c(1,1,2,1), more=T)
plot(p2, split=c(2,1,2,1), more=F)


## ----rk-spplot-kr40-var, out.width='0.65\\textwidth', keep.source=T------
print(spplot(kr40, "var1.var", asp=1,
             col.regions=cm.colors(64),
             main="KED prediction variance, log-ppm Zn^2"))


## ------------------------------------------------------------------------
summary(kr40$var1.var)
summary(k40$var1.var)


## ------------------------------------------------------------------------
zmax <- round(max(k40$var1.var,kr40$var1.var), 3) + 0.001
zmin <- round(min(k40$var1.var,kr40$var1.var), 3) - 0.001
(ramp <- seq(from=zmin, to=zmax, by=.005))
p1 <- spplot(k40, "var1.var",
             col.regions=cm.colors(64),
             asp=1, at=ramp,
             main="OK prediction variance, log-ppm Zn")
p2 <- spplot(kr40, "var1.var",
             col.regions=cm.colors(64),
             asp=1, at=ramp,
             main="KED-ffreq prediction variance, log-ppm Zn")


## ----rk-compare-k40-kr40-var, out.width='\\textwidth', fig.width=10, fig.height=5----
plot(p1, split=c(1,1,2,1), more=T)
plot(p2, split=c(2,1,2,1), more=F)


## ------------------------------------------------------------------------
names(meuse.grid)
intersect(names(meuse), names(meuse.grid))


## ----rk-spplot-meuse-grid-dist, out.width='0.55\\textwidth', keep.source=T----
print(spplot(meuse.grid, zcol="dist",
             main="Normalized distance to river",
             col.regions=topo.colors(64)))


## ----rk-plot-Zn-dist, out.width='0.55\\textwidth'------------------------
plot(logZn ~ dist, data=meuse@data, col=meuse$ffreq)
legend(x=0.7, y=3.2, legend=c("1","2","3"), pch=1, col=1:3)


## ----rk-m-lzn-dist-------------------------------------------------------
m.lzn.dist <- lm(logZn ~ dist, data=meuse)
summary(m.lzn.dist)


## ----keep.source=T-------------------------------------------------------
k.dist <- krige(logZn ~ dist, locations=meuse,
                newdata=meuse.grid, model=NULL)
p1 <- spplot(k.dist, zcol="var1.pred", col.regions=bpy.colors(64),
             main="prediction, log-ppm Zn")
p2 <- spplot(k.dist, zcol="var1.var",
             col.regions=cm.colors(64),
             main="prediction variance, log-ppm Zn^2")


## ----rk-spplot-kdist, out.width='\\textwidth', fig.width=12, fig.height=6----
require(lattice)
tmp <- lattice.options()
lattice.options(layout.widths =
       list(key.right = list(x = 3, units = "cm", data = NULL)))
print(p1, split=c(1,1,2,1), more=T)
print(p2, split=c(2,1,2,1), more=F)
lattice.options(tmp)


## ----rk-m-lzn-ff-dist, keep.source=T-------------------------------------
m.lzn.ff.dist <- lm(logZn ~ ffreq + dist, data=meuse)
m.lzn.ff.dist.i <- lm(logZn ~ ffreq * dist, data=meuse)
anova(m.lzn.ff.dist.i, m.lzn.ff.dist, m.lzn.dist, m.lzn.ff)


## ----rk-AIC--------------------------------------------------------------
AIC(m.lzn.dist, m.lzn.ff, m.lzn.ff.dist, m.lzn.ff.dist.i)


## ------------------------------------------------------------------------
summary(m.lzn.ff.dist.i)


## ----rk-plot-m-lzn-ff-dist-i, out.width='\\textwidth', fig.width=12, fig.height=4----
par(mfrow=c(1,3))
plot(m.lzn.ff.dist.i, which=c(1,2,5))
par(mfrow=c(1,1))


## ----rk-find-worst-row-name----------------------------------------------
(ix <- which(row.names(meuse@data) == "76"))
meuse@data[ix,]


## ----rk-find-bad-rows----------------------------------------------------
# which row numbers correspond to the observations witH large residuals?
(bad.pt <- which(row.names(meuse@data) %in% c("76","51","157")))
# where are they?
coordinates(meuse)[bad.pt,]
# make a logical vector of all rows, whether they have large residuals
#  or not
is.row.bad <- (row(meuse@data)[,1] %in% bad.pt)


## ----rk-show-bad-pts, out.width='0.8\\textwidth', keep.source=T----------
colours.ffreq = c("red","orange","green")
plot(coordinates(meuse), asp=1,
     col=colours.ffreq[meuse$ffreq],
     # select print character, large residual or not?
     # 20 = filled circle; 1 = open circle
     pch=ifelse(is.row.bad, 20, 1),
     # symbol size proportional to Zn concentration
     cex=4*meuse$zinc/max(meuse$zinc),
     main="Suspicious regression residuals (solid circles)",
     sub="Symbol size proportional to Zn concentration")
grid()
legend(178000, 333000, pch=1, col=colours.ffreq,
       legend=c("Frequent", "Occasional", "Rare"))
text(coordinates(meuse)[bad.pt,],
    c("76","51","157"), pos=4)


## ------------------------------------------------------------------------
rm(bad.pt, bad.row)


## ----rk-plot-vr2, out.width='0.55\\textwidth'----------------------------
(vr2 <- variogram(logZn ~ ffreq*dist, location=meuse, cutoff=1300, width=90))
print(plot(vr2, plot.numbers=T, main="Residuals, ffreq*dist"))


## ----rk-vrm2f------------------------------------------------------------
(vrm2f <- fit.variogram(vr2, vgm(psill=0.04, model="Sph", range=700, nugget=0.01)))
vrmf
vmf


## ----rk-plot-vrm2f, out.width='0.55\\textwidth'--------------------------
print(plot(vr2, plot.numbers=T, model=vrm2f, main="Residuals, ffreq*dist"))


## ----rk-kr240------------------------------------------------------------
kr240 <- krige(logZn ~ ffreq*dist, locations=meuse, newdata=meuse.grid, model=vrm2f)


## ----rk-plot-kr240, keep.source=T----------------------------------------
print(spplot(kr240, "var1.pred", asp=1,
             col.regions=bpy.colors(64),
             main="KED-ffreq*dist prediction, log-ppm Zn"))


## ----keep.source=T-------------------------------------------------------
zmax <- round(max(k40$var1.pred,
                  kr40$var1.pred,
                  kr240$var1.pred), 1) + 0.1
zmin <- round(min(k40$var1.pred,
                  kr40$var1.pred,
                  kr240$var1.pred), 1) - 0.1
ramp <- seq(from=zmin, to=zmax, by=.1)
p1 <- spplot(k40, "var1.pred", asp=1, col.regions=bpy.colors(64),
             main="OK prediction, log-ppm Zn", at=ramp)
p2 <- spplot(kr40, "var1.pred", asp=1, col.regions=bpy.colors(64),
             main="KED-ffreq prediction, log-ppm Zn", at=ramp)
p3 <- spplot(kr240, "var1.pred", asp=1, col.regions=bpy.colors(64),
             main="KED-ffreq*dist prediction, log-ppm Zn", at=ramp)


## ----rk-plot-k40kr40kr240-pred, out.width='\\textwidth', fig.width=12, fig.height=4----
plot(p1, split=c(1,1,3,1), more=T)
plot(p2, split=c(2,1,3,1), more=T)
plot(p3, split=c(3,1,3,1), more=F)


## ------------------------------------------------------------------------
summary(kr240$var1.var)
summary(kr40$var1.var)
summary(k40$var1.var)


## ----keep.source=T-------------------------------------------------------
zmax <- round(max(k40$var1.var,
                  kr40$var1.var,
                  kr240$var1.var), 3) + 0.001
zmin <- round(min(k40$var1.var,
                  kr40$var1.var,
                  kr240$var1.var), 3) - 0.001
(ramp <- seq(from=zmin, to=zmax, by=.005))
p1 <- spplot(k40, "var1.var",
             col.regions=cm.colors(64),
             asp=1, at=ramp,
             main="OK pred.var., log-ppm^2 Zn")
p2 <- spplot(kr40, "var1.var",
             col.regions=cm.colors(64),
             asp=1, at=ramp,
             main="KED-ffreq pred.var, log-ppm^2 Zn")
p3 <- spplot(kr240, "var1.var",
             col.regions=cm.colors(64),
             asp=1, at=ramp,
             main="KED-ffreq*dist pred.var, log-ppm^2 Zn")


## ----rk-plot-k40kr40kr240-var, out.width='\\textwidth', fig.width=12, fig.height=4----
plot(p1, split=c(1,1,3,1), more=T)
plot(p2, split=c(2,1,3,1), more=T)
plot(p3, split=c(3,1,3,1), more=F)


## ----child-gs_rkgls, child='gs_rkgls.Rnw'--------------------------------

## ----rkgls-set-parent, echo=FALSE, cache=FALSE---------------------------
set_parent('gs_intro.Rnw')


## ----rkgls-compute-prop-nugget-------------------------------------------
vrm2f[2,"range"]
(prop.nugget <- vrm2f[1,"psill"]/sum(vrm2f[,"psill"]))


## ----rkgls-fit-reml, keep.source=TRUE------------------------------------
library(nlme)
m.gls <- gls(model=logZn ~ ffreq * dist,
             data=as.data.frame(meuse),
             correlation=corSpher(
                 form=~x + y,
                 nugget=TRUE,
                 value=c(vrm2f[2,"range"], prop.nugget),
                 ))


## ----rkgls-summary-reml--------------------------------------------------
summary(m.gls)


## ----rkgls-compare-ols-gls-coeff, keep.source=TRUE-----------------------
coef(m.gls); coef(m.lzn.ff.dist.i)
# percent change
round(100*(coefficients(m.gls)
           - coefficients(m.lzn.ff.dist.i))/
      coefficients(m.lzn.ff.dist.i),2)


## ----rkgls-ci-reml-------------------------------------------------------
intervals(m.gls, level=0.95)$coef


## ----rkgls-summary-intervals---------------------------------------------
intervals(m.gls, level=0.95)$corStruct
prop.nugget; vrm2f[2,"range"]


## ----rkgls-plot-gls-fits, keep.source=TRUE, fig=TRUE---------------------
plot(meuse$logZn ~ predict(m.gls),
     col=meuse$ffreq, pch=20, asp=1,
     xlab="Fitted by GLS",
     ylab="Actual",
     main="log10(Zn), ppm")
legend("topleft", levels(meuse$ffreq), pch=20, col=1:4)
grid()
abline(0,1)


## ----rkgls-bubble-diffs, fig=TRUE, height=6, asp=1-----------------------
meuse$diff.gls.ols.resid <- residuals(m.gls) - residuals(m.lzn.ff.dist.i)
summary(meuse$diff.gls.ols.resid)
bubble(meuse, zcol="diff.gls.ols.resid", pch=1,
       main="GLS residual - OLS residual")


## ----rkgls-predict-gls---------------------------------------------------
meuse.grid$ols.pred <- predict(m.lzn.ff.dist.i, newdata=meuse.grid)
meuse.grid$gls.pred <- predict(m.gls, newdata=meuse.grid)
summary(meuse.grid$ols.pred)
summary(meuse.grid$gls.pred)
meuse.grid$diff.ols.gls.pred <- meuse.grid$ols.pred - meuse.grid$gls.pred
summary(meuse.grid$diff.ols.gls.pred)


## ----rkgls-show-ols-gls-compare, keep.source=TRUE------------------------
zmax <- round(max(meuse.grid$ols.pred,
                  meuse.grid$gls.pred), 1) + 0.1
zmin <- round(min(meuse.grid$ols.pred,
                  meuse.grid$gls.pred), 1) - 0.1
ramp <- seq(from=zmin, to=zmax, by=.1)
p1 <- spplot(meuse.grid, zcol="ols.pred", asp=1,
             col.regions=bpy.colors(64),
             main="OLS prediction, log10-ppm Zn", at=ramp)
p2 <- spplot(meuse.grid, zcol="gls.pred", asp=1,
             col.regions=bpy.colors(64),
             main="GLS prediction, log10-ppm Zn", at=ramp)
p3 <- spplot(meuse.grid, zcol="diff.ols.gls.pred", asp=1,
             col.regions=topo.colors(64),
             main="Difference OLS-GLS, log10-ppm Zn")


## ----rkgls-spplot-ols-gls-diff, out.width='\\textwidth', fig.width=12, fig.height=4----
plot(p1, split=c(1,1,3,1), more=T)
plot(p2, split=c(2,1,3,1), more=T)
plot(p3, split=c(3,1,3,1), more=F)


## ----rkgls-build-gls-resid-vgm-------------------------------------------
meuse$gls.resid <- residuals(m.gls)
(p.nugget <- intervals(m.gls)$corStruct["nugget","est."])
(t.sill <- var(meuse$gls.resid))
(nugget <- t.sill * p.nugget)
(vmf.r.gls <- vgm(psill=t.sill-nugget,
                      model="Sph",
                      range=intervals(m.gls)$corStruct["range","est."],
                      nugget=nugget))
vrm2f  # compare with residual variogram from OLS


## ----rkgls-gls-resid-vgm, fig=TRUE, width=6, height=4, keep.source=TRUE----
v.r.gls <- variogram(gls.resid ~ 1, loc=meuse, cutoff=1500, width=90)
# panel function to also show variogram and fitted model from OLS residuals
mypanel <- function(x, y, ...) {
    vgm.panel.xyplot(x, y, plot.numbers=TRUE, ...)
    panel.pointPairs(vr2$dist, vr2$gamma, col="red")
    panel.lines(variogramLine(vrm2f, maxdist=1500), lty=2, col='red')
    }
plot(v.r.gls, pl=T, model=vmf.r.gls, 
     main="Variogram model fitted to GLS residuals",
     panel = mypanel)


## ----rkgls-gls-resid-krige, keep.source=TRUE-----------------------------
k.gls.r <- krige(gls.resid ~ 1, loc=meuse,
                 newdata=meuse.grid, model=vmf.r.gls)
summary(k.gls.r)
k.gls.r$rk.gls.pred <- 
    meuse.grid$gls.pred + k.gls.r$var1.pred
k.gls.r$diff.rk.gls.ked <- 
    k.gls.r$rk.gls.pred - kr240$var1.pred
summary(k.gls.r$diff.rk.gls.ked)
zmax <- round(max(k.gls.r$rk.gls.pred,
                  kr240$var1.pred), 1) + 0.1
zmin <- round(min(k.gls.r$rk.gls.pred,
                  kr240$var1.pred), 1) - 0.1
ramp <- seq(from=zmin, to=zmax, by=.1)
p1 <- spplot(k.gls.r, zcol="rk.gls.pred", asp=1,
             col.regions=bpy.colors(64),
             main="RK/GLS prediction, log10-ppm Zn", at=ramp)
p2 <- spplot(kr240, "var1.pred", asp=1,
             col.regions=bpy.colors(64),
             main="KED prediction, log-ppm Zn", at=ramp)
p3 <- spplot(k.gls.r, zcol="diff.rk.gls.ked", asp=1,
             col.regions=topo.colors(64),
             main="Difference RK/GLS - KED, log10-ppm Zn")


## ----rkgls-spplot-rkgls-ked-diff, out.width='\\textwidth', fig.width=12, fig.height=4----
plot(p1, split=c(1,1,3,1), more=T)
plot(p2, split=c(2,1,3,1), more=T)
plot(p3, split=c(3,1,3,1), more=F)


## ----child-gs_cv, child='gs_cv.Rnw'--------------------------------------

## ----cv-set-parent, echo=FALSE, cache=FALSE------------------------------
set_parent('gs_intro.Rnw')


## ----results='hide'------------------------------------------------------
kcv.ok <- krige.cv(logZn ~ 1, locations=meuse, model=vmf)
kcv.rk <- krige.cv(logZn ~ ffreq, locations=meuse, model=vrmf)
kcv.rk2 <- krige.cv(logZn ~ ffreq*dist, locations=meuse, model=vrm2f)


## ------------------------------------------------------------------------
summary(kcv.ok)


## ------------------------------------------------------------------------
summary(kcv.ok$residual)
summary(kcv.rk$residual)
summary(kcv.rk2$residual)


## ------------------------------------------------------------------------
sqrt(sum(kcv.ok$residual^2)/length(kcv.ok$residual))
sqrt(sum(kcv.rk$residual^2)/length(kcv.rk$residual))
sqrt(sum(kcv.rk2$residual^2)/length(kcv.rk$residual))


## ----keep.source=TRUE----------------------------------------------------
(zmax <- round(max(kcv.ok$residual,kcv.rk$residual,
                   kcv.rk2$residual),2) + 0.01)
(zmin <- round(min(kcv.ok$residual,
                   kcv.rk$residual,kcv.rk2$residual),2) - 0.01)
ramp <-  quantile(c(kcv.ok$residual,
                    kcv.rk$residual,kcv.rk2$residual),
                  probs=seq(0, 1, by=0.1))
p1 <- bubble(kcv.ok, zcol="residual",
             main="OK X-validation residuals",
             key.entries=ramp, pch=1)
p2 <- bubble(kcv.rk, zcol="residual",
             main="KED ffreq X-validation residuals",
             key.entries=ramp, pch=1)
p3 <- bubble(kcv.rk2, zcol="residual",
             main="KED ffreq*dist X-validation residuals",
             key.entries=ramp, pch=1)


## ----cv-plot-kcv, out.width='\\textwidth', fig.width=12, fig.height=4----
plot(p1, split=c(1,1,3,1), more=T)
plot(p2, split=c(2,1,3,1), more=T)
plot(p3, split=c(3,1,3,1), more=F)


## ------------------------------------------------------------------------
rm(zmax, zmin, ramp, p1, p2, p3)


## ----child-gs_gam, child='gs_gam.Rnw'------------------------------------

## ----gam-set-parent, echo=FALSE, cache=FALSE-----------------------------
set_parent('gs_intro.Rnw')


## ----gam-load-data, echo=FALSE, results='hide'---------------------------
require(sp)
require(gstat)
if (!exists("meuse")) {  data(meuse); coordinates(meuse) <- ~ x + y}
if (!exists("meuse$logZn")) meuse$logZn <- log10(meuse$zinc)


## ----gam-load-mgcv-------------------------------------------------------
library(mgcv)


## ----gam-plot-Zn-elev-dist, out.width='\\textwidth', fig.width=10, fig.height=5----
library(ggplot2)
p1 <- qplot(x=dist, y=logZn, data=meuse@data, geom=c("point", "smooth")) # , method="loess"
p2 <- qplot(x=elev, y=logZn, data=meuse@data, geom=c("point", "smooth")) # , method="loess"
require(gridExtra)
grid.arrange(p1, p2, ncol=2)


## ----gam-smooth-one-add--------------------------------------------------
library(mgcv)
m.g.dist <- gam(logZn ~ s(dist), data=meuse)
summary(m.g.dist)
summary(residuals(m.g.dist))
m.g.elev <- gam(logZn ~ s(elev), data=meuse)
summary(m.g.elev)
summary(residuals(m.g.elev))


## ----gam-plot-gam, out.width='\\textwidth', fig.width=10, fig.height=5----
par(mfrow=c(1,2))
plot.gam(m.g.dist, residuals=T, pch=20)
abline(h=0, lty=2)
plot.gam(m.g.elev, residuals=T, pch=20)
abline(h=0, lty=2)
par(mfrow=c(1,1))


## ----gam-scatter-elev-dist, fig.width=8, fig.height=4--------------------
qplot(x=dist, y=elev, data=meuse@data, geom=c("point", "smooth", "rug"))
cor(meuse$elev, meuse$dist, method='pearson')
cor(meuse$elev, meuse$dist, method='spearman')


## ------------------------------------------------------------------------
m.g.dist.elev <- gam(logZn ~ s(dist) + s(elev), data=meuse)
m.g.dist.elev.i <- gam(logZn ~ s(dist) + s(elev) + ti(dist,elev), data=meuse)
summary(m.g.dist.elev)
summary(m.g.dist.elev.i)
summary(residuals(m.g.dist.elev))
summary(residuals(m.g.dist.elev.i))


## ----gam-m-g-dist-elev, out.width='\\textwidth', fig.width=10, fig.height=5----
plot.gam(m.g.dist.elev, pages=1)


## ----gam-m-g-dist-elev-i, out.width='\\textwidth', fig.width=10, fig.height=10----
plot.gam(m.g.dist.elev.i, pages=1)


## ----gam-vis-gam, keep.source=T------------------------------------------
vis.gam(m.g.dist.elev, theta=+60,
        plot.type="persp", color="terrain")


## ----gam-vis-gam-se, keep.source=T---------------------------------------
vis.gam(m.g.dist.elev, theta=+60, color="terrain", se=1.96)


## ----gam-spplot-k-g-i, out.width='\\textwidth', fig.width=10, fig.height=5, keep.source=T----
tmp <- predict.gam(object=m.g.dist.elev.i, newdata=meuse.grid, se.fit=TRUE)
names(tmp)
meuse.grid$k.g.i <- tmp$fit
meuse.grid$k.g.i.se <- tmp$se.fit
p1 <- spplot(meuse.grid, zcol="k.g.i",
             col.regions=bpy.colors(64),
             main="Predicted log(Zn) ppm, GAM")
p2 <- spplot(meuse.grid, zcol="k.g.i.se",
             col.regions=cm.colors(64),
             main="Standard error of prediction")
grid.arrange(p1, p2, ncol=2)


## ------------------------------------------------------------------------
summary(m.dist.elev.i <- lm(logZn ~ dist*elev, data=meuse))


## ----gam-m-dist-elev-i, out.width='\\textwidth', fig.width=12, fig.height=4----
par(mfrow=c(1,3))
plot(m.dist.elev.i, which=c(1,2,5))
par(mfrow=c(1,1))


## ----gam-plot-k-i, out.width='\\textwidth', fig.width=10, fig.height=5, keep.source=T----
tmp <- predict.lm(object=m.dist.elev.i,
                  newdata=meuse.grid, se.fit=TRUE)
meuse.grid$k.i <- tmp$fit
meuse.grid$k.i.se <- tmp$se.fit
p1 <- spplot(meuse.grid, zcol="k.i",
             col.regions=bpy.colors(64),
             main="Predicted log(Zn) ppm, linear")
p2 <- spplot(meuse.grid, zcol="k.i.se",
             col.regions=cm.colors(64),
             main="Standard error of prediction")
plot(p1, split=c(1,1,2,1), more=T)
plot(p2, split=c(2,1,2,1), more=F)


## ----gam-highlev-hires, out.width='0.55\\textwidth', fig.width=4, fig.height=6----
ix <- which(row.names(meuse)=="76")
meuse[ix,c("zinc","elev","dist")]
log10(meuse@data[ix,"zinc"])
fitted(m.dist.elev.i)[ix]
fitted(m.g.dist.elev.i)[ix]
plot(coordinates(meuse), asp=1, pch=21, cex=4*meuse$zinc/max(meuse$zinc),
     bg=ifelse(row.names(meuse)=="76","red","gray"))
data(meuse.riv)
lines(meuse.riv)
grid()


## ----gam-diff-lm-gam, out.width='0.65\\textwidth', keep.source=T---------
meuse.grid$diff <- meuse.grid$k.g.i - meuse.grid$k.i
print(spplot(meuse.grid, zcol="diff",
             col.regions=terrain.colors(64),
             main="Difference, GAM prediction - linear prediction"))


## ----gam-plot-gam-resid, out.width='\\textwidth', fig.width=10, fig.height=5----
meuse$resid.gam <- residuals(m.g.dist.elev.i)
p1 <- bubble(meuse, zcol="resid.gam", pch=1)
vr <- variogram(resid.gam ~ 1, locations=meuse)
p2 <- plot(vr, plot.numbers=T)
plot(p1, split=c(1,1,2,1), more=T)
plot(p2, split=c(2,1,2,1), more=F)


## ------------------------------------------------------------------------
max(vr$gamma)/max(variogram(logZn ~ 1, locations=meuse)$gamma)


## ----echo=FALSE, results='hide'------------------------------------------
(vrmf <- fit.variogram(vr, model=vgm(0.15, "Sph", 800, 0.05)))
tmp <- krige(resid.gam ~ 1, locations=meuse, newdata=meuse.grid, model=vrmf, beta=0)
meuse.grid$gam.r.sk <- tmp$var1.pred
meuse.grid$gam.r.sk.var <- tmp$var1.var
meuse.grid$rk.gam <- meuse.grid$k.g.i + meuse.grid$gam.r.sk


## ----gam-spplot-gam-r-sk, out.width='0.65\\textwidth', echo=FALSE--------
pts.s <- list("sp.points", meuse,
              pch=1, cex=2*abs(meuse$resid.gam)/max(abs(meuse$resid.gam)),
              col=ifelse(meuse$resid.gam > 0,"white", "black"))
plot(spplot(meuse.grid, zcol="gam.r.sk", main="GAM residuals interpolated by SK",
            sub="positive: white, negative: black",
       col.regions=heat.colors(64),
              sp.layout = list(pts.s)))


## ----gam-pplot-meuse-rk-gam, out.width='0.65\\textwidth', echo=FALSE-----
plot(spplot(meuse.grid, zcol="rk.gam", main="RK using GAM",
       col.regions=bpy.colors(64)))



## ----child-gs_ans, child='gs_ans.Rnw'------------------------------------

## ----ans-set-parent, echo=FALSE, cache=FALSE-----------------------------
set_parent('gs_intro.Rnw')


## ----compute-ffreq-Zn-mean-----------------------------------------------
tapply(meuse$logZn, meuse$ffreq, mean)


## ----show.lowest.pred.variance.distance----------------------------------
ix <- which.min(k.dist$var1.var)
k.dist@data[ix,]
meuse.grid@data[ix, "dist"]
mean(meuse$dist)


