## ----setup, include=FALSE, cache=FALSE-------------------------------------------------------------------------------------------------
# set global chunk options
## this code is only required to set up knitr for reproducible documentation
## it has nothing to do with the exercise itself, please ignore!
opts_knit$set(aliases=c(h = 'fig.height', w = 'fig.width'), out.format='latex', use.highlight=TRUE)
opts_chunk$set(fig.path = 'kgraph/RegioMapPrep', fig.align = 'center', fig.show = 'hold', prompt = FALSE,
               fig.width = 6, fig.height = 6, out.width = '.7\\linewidth', size = 'scriptsize')
# when fig.cap needs to use objects in the current chunk
opts_knit$set(eval.after = 'fig.cap')
## comment=NA, 
## this does not comment out the output
## options to be read by formatR                                       
options(replace.assign = TRUE, width = 70)  # code width, no > prompt
options(warn = -1)  # no warnings
## supress warnings about rgdal being archived.
options("rgdal_show_exportToProj4_warnings"="none")


## ----echo=FALSE, results='hide'--------------------------------------------------------------------------------------------------------
rm(list=ls())


## ----child-data, child='MappingRegionalClimate_prepare.Rnw'----------------------------------------------------------------------------

## ----data-set-parent, echo=FALSE, cache=FALSE------------------------------------------------------------------------------------------
set_parent('MappingRegionalClimate_DataPrep.Rnw')


## ----setwd-mac-no, eval=FALSE----------------------------------------------------------------------------------------------------------
## setwd("~/ds/NEweather")


## ----setwd-win, eval=FALSE-------------------------------------------------------------------------------------------------------------
## setwd("M://css6200/dgr2/datasets/NEweather")


## ----list-files-no, eval=F-------------------------------------------------------------------------------------------------------------
## list.files(".", pattern=".shp$")


## ----list-files-yes, echo=F, eval=T----------------------------------------------------------------------------------------------------
## this is the file specification on the author's system; he organizes datasets into a subdirectory named `ds', and then
##   each data source into its own sub-subdirectory
list.files("./ds/NEweather", pattern=".shp$")


## ----load-packages---------------------------------------------------------------------------------------------------------------------
require(sf)


## ----load-all-usa-no, eval=FALSE-------------------------------------------------------------------------------------------------------
## usa <- st_read(dsn=".", layer="gdd50_7100j",
##                int64_as_string = FALSE, quiet = TRUE)
## print(usa)


## ----load-all-usa-yes, echo=FALSE------------------------------------------------------------------------------------------------------
## this is for the author's system
usa <- st_read(dsn="./ds/NEweather", layer="gdd50_7100j",
               int64_as_string = FALSE, quiet = TRUE)
print(usa)


## ----crs-all-usa-----------------------------------------------------------------------------------------------------------------------
st_crs(usa)
st_bbox(usa)


## ----crs-all-usa-proj4-----------------------------------------------------------------------------------------------------------------
st_crs(usa)$proj4string


## ----str-all-usa-----------------------------------------------------------------------------------------------------------------------
names(usa)


## ----num-to-char-ids-------------------------------------------------------------------------------------------------------------------
str(usa)
usa$STATION_ID <- as.character(usa$STATION_ID)
usa$STATION_NA <- as.character(usa$STATION_NA)
usa$STN_NAME <- as.character(usa$STN_NAME)
usa$OID_<- as.character(usa$OID_)
usa$COOP_ID <- as.character(usa$COOP_ID)
summary(usa)


## ----list-states-----------------------------------------------------------------------------------------------------------------------
length(unique(usa$STATE))
unique(usa$STATE)


## ----select-ne-------------------------------------------------------------------------------------------------------------------------
dim(usa)[1]  # number of rows in the full dataset
ix <- (usa$STATE %in% c("NY","VT","NJ","PA"))  # logical expression
str(ix)      # each row is either TRUE or FALSE
sum(ix)      # number of rows selected
head(which(ix))  # first six row numbers of stations in these states
ne <- usa[ix,]  # select only these states' records
dim(ne)[1]   # number of rows in the restricted dataset


## ----adjust-levels---------------------------------------------------------------------------------------------------------------------
class(ne$STATE)
levels(ne$STATE)
ne$STATE <- factor(ne$STATE)
levels(ne$STATE)


## ----new-bbox--------------------------------------------------------------------------------------------------------------------------
st_bbox(ne)


## ----check-zerodist--------------------------------------------------------------------------------------------------------------------
(d <- sp::zerodist(as_Spatial(ne)))


## ----check-zerodist-usa----------------------------------------------------------------------------------------------------------------
(d <- zerodist(as_Spatial(usa)))
usa[d, ]


## ----adjust-coords---------------------------------------------------------------------------------------------------------------------
st_coordinates(usa)[d[1],]; st_coordinates(usa)[d[4],]
usa[d[1], c("STATE", "STATION_NA")]; usa[d[4], c("STATE", "STATION_NA")]
st_geometry(usa)[d[4]] <- st_sfc(st_point(c(-84.99, 40.42)))
usa[d[1],4:5] <- c(40.42, -84.99)  # this is the order in the dataframe


## ----eval=FALSE------------------------------------------------------------------------------------------------------------------------
## usa <- (usa[-d[4],])


## ----show-proj4------------------------------------------------------------------------------------------------------------------------
st_crs(ne)
st_crs(ne)$proj4string


## ----find-albers-parameters------------------------------------------------------------------------------------------------------------
st_bbox(ne)
round(median(st_bbox(ne)[c("xmin","xmax")]),1)
round(median(st_bbox(ne)[c("ymin","ymax")]),1)
floor(st_bbox(ne)["ymin"]+1)
ceiling(st_bbox(ne)["ymin"]-1)


## ----make-albers-----------------------------------------------------------------------------------------------------------------------
(ne.crs <-
  st_crs("+proj=aea +lat_0=42.5 +lat_1=39
         +lat_2=44 +lon_0=-76 +ellps=WGS84 +units=m"))$proj4string
ne.m <- st_transform(ne, ne.crs)


## ----convert-to-df---------------------------------------------------------------------------------------------------------------------
class(ne.m)
class(ne.coords <- st_coordinates(ne.m))
class(ne.df <- st_drop_geometry(ne.m))
ne.df <- cbind(ne.df, ne.coords)
names(ne.df)
rm(ne.coords)


## ----coord-names-----------------------------------------------------------------------------------------------------------------------
names(ne.df)[which(names(ne.df) == "X")] <- "E"
names(ne.df)[which(names(ne.df) == "Y")] <- "N"
names(ne.df)


## ----get-state-boundaries-no, eval=FALSE-----------------------------------------------------------------------------------------------
## state <- st_read(dsn="./cb_2022_us_state_500k", "cb_2022_us_state_500k"
##                  , quiet = TRUE)
## print(state)


## ----get-state-boundaries-yes, eval=T, echo=F------------------------------------------------------------------------------------------
## for the author's system
state <- st_read(dsn="./ds/NEweather/cb_2022_us_state_500k", 
                 layer = "cb_2022_us_state_500k", quiet = TRUE)
print(state)


## ----limit-to-ne-----------------------------------------------------------------------------------------------------------------------
dim(state)
state.ne <- state[state$STUSPS %in% c('NY','NJ', 'PA', 'VT'),]
dim(state.ne)
st_bbox(state.ne)


## ----state-CRS-------------------------------------------------------------------------------------------------------------------------
state.ne.m <- st_transform(state.ne, st_crs(ne.m))
str(state.ne.m)
st_bbox(state.ne.m)


## ----load-4km-grid-a-no, eval=FALSE----------------------------------------------------------------------------------------------------
## require(terra)
## dem.ne.m <- terra::rast("./dem_ne_4km.tif")
## class(dem.ne.m)
## str(dem.ne.m)
## summary(values(dem.ne.m))


## ----load-4km-grid-a-yes, eval=T, echo=F-----------------------------------------------------------------------------------------------
## for the author's system
require(terra)
dem.ne.m <- terra::rast("./ds/NEweather/dem_ne_4km.tif")
class(dem.ne.m)
summary(values(dem.ne.m))


## ----remove-negatives-4km-grid---------------------------------------------------------------------------------------------------------
values(dem.ne.m) <- pmax(0, values(dem.ne.m))
summary(values(dem.ne.m))


## --------------------------------------------------------------------------------------------------------------------------------------
terra::image(dem.ne.m, col = topo.colors(64))


## ----mask-to-states--------------------------------------------------------------------------------------------------------------------
dem.ne4.m <- terra::mask(dem.ne.m, state.ne.m)
terra::image(dem.ne4.m)


## ----convert-4km-sf--------------------------------------------------------------------------------------------------------------------
# convert to `sf` with coordinate geometry
class(dem.ne.m)
dim(dem.ne.m)
v <- as.points(dem.ne.m)
class(v)
dem.ne.m.sf <- st_as_sf(v)
plot(dem.ne.m.sf, main = "4km DEM, feet a.s.l.")
dim(dem.ne.m.sf)
class(dem.ne.m.sf)
names(dem.ne.m.sf)[1] <- "ELEVATION_" # to match with dataframe
summary(dem.ne.m.sf)
rm(v)


## ----convert-4km-df--------------------------------------------------------------------------------------------------------------------
# convert to `sf` with coordinate geometry
dem.ne.m.df <- as.data.frame(dem.ne.m, xy = TRUE)
dem.ne4.m.df <- as.data.frame(dem.ne4.m, xy = TRUE)
str(dem.ne.m.df)
# make the field names compatible with the point dataset
names(dem.ne.m.df) <- c("E", "N", "ELEVATION_")
names(dem.ne4.m.df) <- c("E", "N", "ELEVATION_")


## ----load.lakes-no, eval=FALSE---------------------------------------------------------------------------------------------------------
## lakes <- st_read("./LakesOntarioErie.gpkg")

## ----load.lakes-yes, echo=FALSE--------------------------------------------------------------------------------------------------------
lakes <- st_read("./ds/NEweather/LakesOntarioErie.gpkg")


## ----load.lakes-show, fig.height=4, fig.width=4----------------------------------------------------------------------------------------
st_crs(lakes)$proj4string
lakes <- st_transform(lakes, crs=st_crs(ne.m))
plot(lakes["name_fr"], main = "Great Lakes bordering NY and PA")


## ----show.dist.lakes.stations, fig.width=6, fig.height=6-------------------------------------------------------------------------------
dd <- sf::st_distance(lakes, ne.m)
dim(dd) # distance to each lake
ne.m$dist.lakes <- apply(dd, 2, min)
summary(ne.m$dist.lakes)
ne.df$dist.lakes <- ne.m$dist.lakes
ggplot(ne.m) +
  geom_sf(data=lakes) + 
  geom_sf(data=state.ne.m) + 
  geom_sf(aes(color=dist.lakes))


## ----plot.dist.lakes.stations, fig.width=6, fig.height=4-------------------------------------------------------------------------------
plot(ne.m$dist.lakes, ne.m$ANN_GDD50, col=ne.m$STATE, pch=20, 
     xlab="Distance from Great Lakes, m", ylab="Annual GDD50")
legend("bottomright", legend=levels(ne.df$STATE), pch=20, col=1:4)


## ----show.dist.lakes.grid, fig.width=6, fig.height=6-----------------------------------------------------------------------------------
dd <- st_distance(lakes, dem.ne.m.sf)
dem.ne.m.sf$dist.lakes <- apply(dd, 2, min)
dem.ne.m.df$dist.lakes <- dem.ne.m.sf$dist.lakes
plot(dem.ne.m.sf["dist.lakes"], 
     main = "Distance to Great Lakes, m")


## ----load-coast-no,eval=FALSE----------------------------------------------------------------------------------------------------------
## coast <- st_read("./AtlanticCoastLine.gpkg", quiet = TRUE)


## ----load-coast-yes,echo=FALSE---------------------------------------------------------------------------------------------------------
coast <- st_read("./ds/NEweather/AtlanticCoastLine.gpkg", quiet = TRUE)


## ----load.coast, fig.height=6, fig.width=6---------------------------------------------------------------------------------------------
names(coast)
coast <- st_transform(coast, ne.crs)
plot(coast["featurecla"], key.pos = 1,
     col = "black",
     main = "Atlantic Ocean coastline bordering NY and NJ")


## ----show.dist.coast.stations, fig.width=6, fig.height=6-------------------------------------------------------------------------------
dd <- st_distance(coast, ne.m)
ne.m$dist.coast <- apply(dd, 2, min)
ne.df$dist.coast <- ne.m$dist.coast 
ggplot(ne.m) +
  geom_sf(data=lakes) + 
  geom_sf(data=state.ne.m) + 
  geom_sf(aes(color=dist.coast))


## ----plot.dist.coast.stations, fig.width=6, fig.height=4-------------------------------------------------------------------------------
plot(ne.m$dist.coast, ne.m$ANN_GDD50, col=ne.m$STATE, pch=20,
     xlab="Distance from coast, m", ylab="Annual GDD50")
legend("bottomleft", legend=levels(ne.df$STATE), pch=20, col=1:4)


## ----show.dist.coast.grid, fig.width=6, fig.height=6-----------------------------------------------------------------------------------
dd <- st_distance(coast, dem.ne.m.sf)
dem.ne.m.sf$dist.coast <- apply(dd, 2, min)
dem.ne.m.df$dist.coast <- dem.ne.m.sf$dist.coast
plot(dem.ne.m.sf["dist.coast"],
     main = "Distance to Atlantic Ocean coastline, m")


## ----import.mrvbf-no, eval=FALSE-------------------------------------------------------------------------------------------------------
## mrvbf <- rast("./mrvbf_ne_4km.sdat")
## print(mrvbf)


## ----import.mrvbf-yes, echo=FALSE------------------------------------------------------------------------------------------------------
mrvbf <- rast("./ds/NEweather/mrvbf_ne_4km.sdat")
print(mrvbf)


## ----import.mrvbf-plot, fig.height=7, fig.width=7, fig.cap="Multiresolution Index of Valley Bottom Flatness"---------------------------
plot(mrvbf, asp=1, col=bpy.colors(32), main = "MRVBF")
st_crs(mrvbf)$proj4string == st_crs(ne.m)$proj4string


## ----import.tri.3-no, eval=FALSE-------------------------------------------------------------------------------------------------------
## tri3 <- rast("./dem_ne_4km_TRI3_IDW2.sdat")
## print(tri3)


## ----import.tri.3-yes, echo=FALSE------------------------------------------------------------------------------------------------------
tri3 <- rast("./ds/NEweather/dem_ne_4km_TRI3_IDW2.sdat")
print(tri3)


## ----import.tri.3-plot, fig.height=7, fig.width=7, fig.cap="Terrain Ruggedness Index"--------------------------------------------------
# replace NA with 0's
length(ix <- which(is.na(values(tri3))))
if (length(ix) > 0) { values(tri3)[ix] <- 0 }
plot(tri3, asp=1, col=topo.colors(16), main = "TRI")
st_crs(tri3)$proj4string == st_crs(ne.m)$proj4string


## ----import.pop.dens-no, fig.height=6, fig.width=6, eval=FALSE-------------------------------------------------------------------------
## pop2pt5.world <- log10(rast(
##   "./gpw_v4_population_density_rev11_2000_2pt5_min.tif")+1)
## pop15.world <- log10(rast(
##   "./gpw_v4_population_density_rev11_2000_15_min.tif")+1)
## print(pop2pt5.world)
## print(pop15.world)


## ----import.pop.dens, fig.height=6, fig.width=6, echo=FALSE----------------------------------------------------------------------------
pop2pt5.world <- log10(rast(
  "./ds/NEweather/gpw_v4_population_density_rev11_2000_2pt5_min.tif")+1)
pop15.world <- log10(rast(
  "./ds/NEweather/gpw_v4_population_density_rev11_2000_15_min.tif")+1)
print(pop2pt5.world)
print(pop15.world)


## ----crop.pop.dens---------------------------------------------------------------------------------------------------------------------
print(st_bbox(ne))
pop15.ne <- terra::crop(pop15.world, st_bbox(ne))
pop2pt5.ne <- terra::crop(pop2pt5.world, st_bbox(ne))
names(pop15.ne) <- "pop15"
names(pop2pt5.ne) <- "pop2pt5"
print(pop15.ne)
print(pop2pt5.ne)


## ----transform.pop.dens----------------------------------------------------------------------------------------------------------------
pop15.ne.m <- project(pop15.ne, ne.crs$proj4string)
print(pop15.ne.m) # resolution approx. 23.3 x 23.3  km
pop2pt5.ne.m <- project(pop2pt5.ne, ne.crs$proj4string)
print(pop2pt5.ne.m)  # approx. 3.9 x 3.9 km


## ----show.pop.dens, out.width="0.8\\linewidth", fig.height=6, fig.width=6, fig.cap = "Log10(Population density) per square km"---------
ymax <- max(values(pop15.ne), values(pop2pt5.ne), na.rm = TRUE)
plot(pop15.ne.m, main = "15' cells",
     col=rev(heat.colors(64)), range = c(0, ymax))
plot(pop2pt5.ne.m, main = "2.5' cells",
     col=rev(heat.colors(64)), range = c(0, ymax))


## ----extract-covars-stations-----------------------------------------------------------------------------------------------------------
ix.vals <- terra::extract(mrvbf, vect(ne.m))
eval(parse(text=paste("ne.m$mrvbf <- ix.vals$",
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("ne.df$mrvbf <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(tri3, vect(ne.m))
eval(parse(text=paste("ne.m$tri3 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("ne.df$tri3 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(pop15.ne.m, vect(ne.m))
eval(parse(text=paste("ne.m$pop15 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("ne.df$pop15 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(pop2pt5.ne.m, vect(ne.m))
eval(parse(text=paste("ne.m$pop2pt5 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("ne.df$pop2pt5 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
rm(ix.vals)


## ----remove-na-pop---------------------------------------------------------------------------------------------------------------------
length(ix <- which(is.na(ne.m$pop15)))
if (length(ix) > 0) {
  ne.m[ix,"pop15"] <- 0; ne.df[ix,"pop15"] <- 0 
}
length(ix <- which(is.na(ne.m$pop2pt5)))
if (length(ix) > 0) {
  ne.m[ix,"pop2pt5"] <- 0; ne.df[ix,"pop2pt5"] <- 0
}


## ----add-covars-ds---------------------------------------------------------------------------------------------------------------------
ix.vals <- terra::extract(mrvbf, vect(dem.ne.m.sf))
eval(parse(text=paste("dem.ne.m.sf$mrvbf <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("dem.ne.m.df$mrvbf <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(tri3, vect(dem.ne.m.sf))
eval(parse(text=paste("dem.ne.m.sf$tri3 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("dem.ne.m.df$tri3 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(pop15.ne.m, vect(dem.ne.m.sf))
eval(parse(text=paste("dem.ne.m.sf$pop15 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("dem.ne.m.df$pop15 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
ix.vals <- terra::extract(pop2pt5.ne.m, vect(dem.ne.m.sf))
eval(parse(text=paste("dem.ne.m.sf$pop2pt5 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
eval(parse(text=paste("dem.ne.m.df$pop2pt5 <- ix.vals$", 
                      names(ix.vals)[2], sep="")))
summary(dem.ne.m.sf)
rm(ix.vals)


## ----remove-na-pop-grid----------------------------------------------------------------------------------------------------------------
length(ix <- which(is.na(dem.ne.m.df$pop15)))
if (length(ix) > 0) {
  dem.ne.m.sf[ix,"pop15"] <- 0; dem.ne.m.df[ix,"pop15"] <- 0 
}
length(ix <- which(is.na(dem.ne.m.df$pop2pt5)))
if (length(ix) > 0) {
  dem.ne.m.sf[ix,"pop2pt5"] <- 0; dem.ne.m.df[ix,"pop2pt5"] <- 0
}


## ----save-extended-dataset-no, eval=FALSE----------------------------------------------------------------------------------------------
## save(ne, ne.m, ne.df, state.ne, state.ne.m,
##      dem.ne.m.sf, dem.ne.m.df, dem.ne4.m, ne.crs,
##      file="./StationsDEM_covariates.RData")


## ----save-extended-dataset-yes, echo=FALSE---------------------------------------------------------------------------------------------
save(ne, ne.m, ne.df, state.ne, state.ne.m, 
     dem.ne.m.sf, dem.ne.m.df, dem.ne4.m, ne.crs,
     file="./ds/NEweather/StationsDEM_covariates.RData")


