Load the packages and data we will use.
rm(list=ls(all=TRUE))
unlink(".RData")
pacman::p_load(sp, rgdal,rgeos,tmap,raster)
shp <- readOGR("./data/soil_data_CIAT.shp")
plot(shp, pch = 19, col = "red")
View coordinate reference system and extents of the shapefile.
crs(shp)
## CRS arguments:
## +proj=utm +zone=36 +datum=WGS84 +units=m +no_defs
extent(shp)
## class : Extent
## xmin : 705125
## xmax : 720125
## ymin : 72875
## ymax : 83375
Load necessary packages and convert data to point pattern.
library(spatstat) # Used for the dirichlet tessellation function
library(maptools) # Used for conversion from SPDF to ppp
library(raster) # Used to clip out thiessen polygons
# Create a tessellated surface
dat.pp <- as(dirichlet(as.ppp(shp)), "SpatialPolygons")
dat.pp <- as(dat.pp,"SpatialPolygons")
Define the projection to the ppp object.
proj4string(dat.pp) <- crs(shp)
plot(dat.pp, pch = 19)
The Polygons are extended beyond points boundary so get the boundary and clip the Thiesen polygon
bdy <- readOGR("./data/soil_boundary.shp")
dat.pp <- intersect(dat.pp, bdy)
plot(dat.pp, pch = 19)
Assign to each polygon the Clay information.
int.Z <- over(dat.pp, shp[,"Clay"], fn=mean, na.rm=TRUE)
Map Thiessen polygons and Clay sampled points.
tm_shape(dat.pp) + tm_borders(alpha=.5, col = "black") +
tm_shape(shp) + tm_dots(col = "red", scale = 2.5, shape = 16)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
Create a SpatialPolygonsDataFrame object with Clay content,
thiessen <- SpatialPolygonsDataFrame(dat.pp, int.Z)
then visualize our point data using the newly formed Thiessen polygons.
tm_shape(thiessen) + tm_fill(col = "Clay", style = "quantile", palette = "Reds", title = "Clay content") + tm_borders(alpha=.3, col = "black") +
tm_shape(shp) + tm_dots(col = "black", scale = 2.5) +
tm_layout(legend.position = c("left", "top"), legend.text.size = 1.05, legend.title.size = 1.2, frame = FALSE)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
Load necessary packages and define sample grid based on the extent of data.
unloadNamespace("spatstat")
pacman::p_load(gstat)
grid <-spsample(bdy, type = 'regular', n = 10000)
## Warning in proj4string(obj): CRS object has comment, which is lost in output
Perform Clay content interpolation using IDW fucntion in Gstat package.
idw <- idw(shp$Clay ~ 1, shp, newdata= grid)
## [inverse distance weighted interpolation]
Before we visualize the IDW result we transform the data into a data frame object. We can then rename the column headers.
idw.output = as.data.frame(idw)
names(idw.output)[1:3] <- c("long", "lat", "prediction")
Convert the output into a raster in order to visualize the output.
pacman::p_load(raster)
# create spatial points data frame
spg <- idw.output
coordinates(spg) <- ~ long + lat
# coerce to SpatialPixelsDataFrame
gridded(spg) <- TRUE
# coerce to raster
raster_idw <- raster(spg)
# sets projection to British National Grid
projection(raster_idw) <- crs(shp)
we can quickly plot the raster to check if its okay.
plot(raster_idw, main= "Clay content")
Even 3D plots can be used to visualize the predictions using the raster::persp()
function..
persp(raster_idw, col=terrain.colors(100, rev = TRUE))
# this package lets us create interactive 3d visualisations
pacman::p_load(rgl)
# we need to first convert the raster to a matrix object type
idw2 <- as.matrix(raster_idw)
persp3d(idw2, col = "red")
Now we are ready to plot the raster. We will use the functionality of the tmap package, using the tm_raster() function. The instructions below will create a smoothed surface. In the example we have created 100 colour breaks and turned the legend off, this will create a smoothed colour effect. You can experiment by changing the breaks (n) to 7 and turning the legend back on (legend.show) to make the predicted values easier to interpret.
pacman::p_load(tmap)
tm_shape(raster_idw) + tm_raster("prediction", style = "quantile", n = 100, palette = "Reds", legend.show = FALSE) +
#Overlay the boundary to provide some orientation
tm_shape(bdy) + tm_borders(alpha=1, col = "blue")
Overlay the original Clay data as this provides a good demonstration of how the IDW is distributed.
tm_shape(raster_idw) + tm_raster("prediction", style = "quantile", n = 100, palette = "-Reds",legend.show = FALSE) +
tm_shape(bdy) + tm_borders(alpha=1, col="black") +
tm_shape(shp) + tm_bubbles(size = "Clay", col = "Clay", palette = "-Purples", contrast=1, style = "quantile", legend.size.show = FALSE, title.col = "Clay content (%)") +
tm_layout(legend.position = c("left", "top"), legend.text.size = 1.1, legend.title.size = 1.4, frame = FALSE, legend.bg.color = "white", legend.bg.alpha = 0.5)
The choice of power function idp
can be subjective. To fine-tune the choice of the power parameter, you can perform a leave-one-out cross validation routine to measure the error in the interpolated values.
IDW.out <- vector(length = length(shp))
for (i in 1:length(shp)) {
IDW.out[i] <- idw(Clay ~ 1, shp[-i,], shp[i,], idp=2.0)$var1.pred
}
Plot the differences in a scatterplot to evaluate accuracy. If not sufficient you can change the idp
parameter and evaluate again.
OP <- par(pty="s", mar=c(4,3,0,0))
plot(IDW.out ~ shp$Clay, asp=1, xlab="Observed", ylab="Predicted", pch=16,
col=rgb(0,0,0,0.5))
abline(lm(IDW.out ~ shp$Clay), col="red", lw=2,lty=2)
abline(0,1)
par(OP)
We can also compute Root Mean Square Error (RMSE) can be computed from IDW.out
.
sqrt( sum((idw.output$prediction - shp$Clay)^2) / length(shp$Clay))
## Warning in idw.output$prediction - shp$Clay: longer object length is not a
## multiple of shorter object length
## [1] 64.54
We can also create a 95% confidence interval map of the interpolation model to illustrate uncertainity around it. Here we’ll use jackknife technique to estimate a confidence interval at each unsampled point. Frist create an interpolation surface.
img <- gstat::idw(Clay~1, shp, newdata=grid, idp=2.0)
## [inverse distance weighted interpolation]
n <- length(shp)
Zi <- matrix(nrow = length(img$var1.pred), ncol = n)
Leave out a point then perform interpolation (do this n times for each point).
st <- stack()
for (i in 1:n){
Z1 <- gstat::idw(Clay~1, shp[-i,], newdata=grid, idp=2.0)
# coerce to SpatialPixelsDataFrame
gridded(Z1) <- TRUE
# coerce to raster
st <- addLayer(st, raster(Z1, layer=1))
# Calculated pseudo-value Z at j
Zi[,i] <- n * img$var1.pred - (n-1) * Z1$var1.pred
}
Compute jackknife estimator of parameter Z at location j.
Zj <- as.matrix(apply(Zi, 1, sum, na.rm=T) / n )
# Compute (Zi* - Zj)^2
c1 <- apply(Zi,2,'-',Zj) # Compute the difference
c1 <- apply(c1^2, 1, sum, na.rm=T ) # Sum the square of the difference
# Compute the confidence interval
CI <- sqrt( 1/(n*(n-1)) * c1)
# Create (CI / interpolated value) raster
gridded(img) <- TRUE
img.sig <- img
img.sig$v <- CI /img$var1.pred
Clip the confidence raster to boundary and plot the map.
r <- raster(img.sig, layer="v")
r.m <- mask(r, bdy)
# Plot the map
tm_shape(r.m) + tm_raster(n = 10, style = "kmeans", palette = "-Blues",title="95% confidence interval \n(Clay [%])") +
tm_shape(shp) + tm_dots(size=0.2) +
tm_legend(legend.outside=TRUE)
De Smith, M. J., Goodchild, M. F., & Longley, P. (2018). Geospatial analysis: a comprehensive guide to principles, techniques and software tools. Troubador publishing ltd.