Several cultivars from literature within Zambia and Malawi were tested and compared to observed production information in a bid to select a dominant variety for maize yield prediction.
Load Zambia’s ministry of agriculture maize forecasts data as reference.
Load cultivars data. Basically, 12 maize cultivars were tested between 2010–2022 to
## [1] "Province" "District" "planted_ha" "harvested_ha"
## [5] "production_MT" "yield_MT_ha" "Year"
Convert District and province names to upper case.
## [1] "CENTRAL" "COPPERBELT" "EASTERN" "LUAPULA"
## [5] "LUSAKA" "MUCHINGA" "NORTH-WESTERN" "NORTHERN"
## [9] "SOUTHERN" "WESTERN"
x |
---|
CHADIZA |
CHAMA |
CHAVUMA |
CHIBOMBO |
CHIENGE |
CHILILABOMBWE |
Load information on different cultivars production potential as predicted by RHEA.
files <- list.files(paste0(root, "Cultivars_Selection/Zambia/"), full.names = T)
f <- read.csv(files[1], stringsAsFactors = FALSE)
df_list <- list()
for(i in 1:length(files)){
f <- read.csv(files[i], stringsAsFactors = FALSE)
f$cultivar <- sub("\\_.*", "", basename(files[i]))
temp <- sub("_v2", "", tools::file_path_sans_ext(basename(files[i])))
f$Year <- sub(".*\\_|.csv.*", "", temp)
df_list[[i]] <- f
}
df <- do.call(rbind, df_list)
df$cname[df$cname=="CHIENGI"] <- "CHIENGE"
df$cname[df$cname=="SHANGOMBO"] <- "SHANG'OMBO"
df$gwad <- df$gwad/1000
Check missing names.
c <- sort(unique(ref$District))
c[!c %in% sort(unique(df$cname))]
## [1] "CHILILABOMBWE" "IKELENGE" "KALOMO" "LIVINGSTONE"
## [5] "LUFWANYAMA" "MAFINGA"
Merge RHEAS and observed MOA data
rh_q <- aggregate(gwad~cname+cultivar+Year, data=df, FUN = 'quantile', probs=c(0.25, 0.5, .75, .95), na.rm=T)
rh_q <- do.call(data.frame, rh_q)
names(rh_q) <- gsub("\\.","",names(rh_q))
rh_m <-aggregate(gwad~cname+cultivar+Year, data=df, FUN = mean, na.rm=T) #aggregate(df[,"gwad"], df[,c("cname","cultivar", "Year")], mean, na.rm=T)
names(rh_m)[4] <- "gwadmean"
rh <- rh_q
rh$gwadmean <- rh_m$gwadmean
names(rh)[1] <- "District"
dff <- merge(ref,rh, by=c("District", "Year"))
We can use the Root Mean Square Error (RMSE) and mean absolute percentage error (MAPE) to evaluate the models accuracy. RMSE is given as:
\[ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}, \] where \(\widehat{y}\) and \(y\) are predicted yields and observed yields respectively while n is the number of fitted points.
What about Relative root mean square error (RRMSE):
\[ \text{RRMSE} = \frac{\sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}}{\frac{1}{n} \sum_{i=1}^n y}, \]
According to Li et al. 2013, the performance of the model is excellent when RRMSE < 10%; good if 10% < RRMSE < 20%; fair if 20% < RRMSE < 30%; and poor if RRMSE ≥ 30%.
Here validation measures are computed for each cultivar at distruict level based on historically observed maize yields.
val <- data.frame()
Cultivar <- sort(unique(dff$cultivar))
dists <- sort(unique(dff$District))
for(i in 1:length(Cultivar)){
for(j in 1:length(dists)){
#print(paste0("cultivar",i, " in ", dists[j]))
temp <- subset(dff, cultivar==paste0("cultivar",i) & District==dists[j])
rmse_m <- rmse(temp$gwadmean-temp$yield_MT_ha)
rrmse_m <- rrmse(temp$gwadmean, temp$yield_MT_ha)
rmse25 <- rmse(temp$gwad25-temp$yield_MT_ha)
rrmse25 <- rrmse(temp$gwad25, temp$yield_MT_ha)
rmse50 <- rmse(temp$gwad50-temp$yield_MT_ha)
rrmse50 <- rrmse(temp$gwad50, temp$yield_MT_ha)
rmse75 <- rmse(temp$gwad75-temp$yield_MT_ha)
rrmse75 <- rrmse(temp$gwad75, temp$yield_MT_ha)
rmse95 <- rmse(temp$gwad95-temp$yield_MT_ha)
rrmse95 <- rrmse(temp$gwad95, temp$yield_MT_ha)
output <- c(i, dists[j], rmse_m, rrmse_m, rmse25, rrmse25, rmse50, rrmse50, rmse75, rrmse75, rmse95, rrmse95)
val <- rbind(val, output)
}
}
colnames(val) <-c("Cultivar", "District", "RMSE_mean", "RRMSE_mean", "RMSE25", "RRMSE25", "RMSE50", "RRMSE50", "RMSE75", "RRMSE75", "RMSE95", "RRMSE95")
#knitr::kable(val,caption = "Cultivar assessement based on ensemble mean, 25%. 50%, 75% and 95% percentiles respectively.")
Find cultivars with least RMSE in each district.
vals <- data.frame()
for(i in 1:length(dists)){
temp <- subset(val, District==dists[i])
output <- c(temp$Cultivar[which.min(temp$RMSE_mean)], dists[i], min(temp$RMSE_mean))
vals <- rbind(vals, output)
}
colnames(vals) <-c("Cultivar", "District", "RMSE_mean")
vals$RMSE_mean <- as.numeric(vals$RMSE_mean)
knitr::kable(vals, caption = "Cultivar assessement based on ensemble RMSE of average yields from RHEAS ensemble.")
Cultivar | District | RMSE_mean |
---|---|---|
6 | CHADIZA | 0.6064343 |
6 | CHAMA | 0.5777490 |
6 | CHAVUMA | 0.5596623 |
6 | CHIBOMBO | 0.7035040 |
6 | CHIENGE | 0.4663884 |
6 | CHILUBI | 0.3677825 |
6 | CHINGOLA | 0.9324916 |
7 | CHINSALI | 0.3867010 |
6 | CHIPATA | 0.5365719 |
6 | CHOMA | 1.0944982 |
6 | CHONGWE | 0.6050852 |
6 | GWEMBE | 1.3816591 |
7 | ISOKA | 0.3886291 |
6 | ITEZHI-TEZHI | 0.9713836 |
6 | KABOMPO | 0.3189672 |
6 | KABWE | 0.7519922 |
6 | KAFUE | 0.7180780 |
6 | KALABO | 1.9247793 |
6 | KALULUSHI | 0.4937504 |
6 | KAOMA | 0.9382855 |
6 | KAPIRI MPOSHI | 0.4170042 |
6 | KAPUTA | 0.9510975 |
7 | KASAMA | 0.4650728 |
6 | KASEMPA | 0.4986089 |
6 | KATETE | 1.1148522 |
7 | KAWAMBWA | 0.4455134 |
6 | KAZUNGULA | 1.4945964 |
6 | KITWE | 0.4530231 |
6 | LUANGWA | 2.0208272 |
6 | LUANSHYA | 0.3022950 |
6 | LUKULU | 1.6053645 |
6 | LUNDAZI | 0.4129054 |
7 | LUSAKA | 1.9083075 |
7 | LUWINGU | 0.3803144 |
6 | MAMBWE | 1.2494377 |
6 | MANSA | 0.5109675 |
6 | MASAITI | 0.4684266 |
6 | MAZABUKA | 0.7795253 |
6 | MBALA | 0.4668374 |
6 | MILENGE | 0.4092623 |
7 | MKUSHI | 0.5582931 |
6 | MONGU | 1.8196755 |
6 | MONZE | 1.0015455 |
7 | MPIKA | 0.3654331 |
6 | MPONGWE | 0.5162312 |
6 | MPOROKOSO | 0.4565665 |
6 | MPULUNGU | 0.6963483 |
7 | MUFULIRA | 0.4446564 |
6 | MUFUMBWE | 0.4054934 |
6 | MUMBWA | 0.9861472 |
7 | MUNGWI | 0.3373367 |
6 | MWENSE | 0.3080217 |
7 | MWINILUNGA | 0.3985817 |
6 | NAKONDE | 0.5181622 |
6 | NAMWALA | 1.3721491 |
6 | NCHELENGE | 0.4052449 |
6 | NDOLA | 0.4791634 |
6 | NYIMBA | 0.9451651 |
6 | PETAUKE | 0.8828479 |
6 | SAMFYA | 0.5795937 |
6 | SENANGA | 2.0153028 |
6 | SERENJE | 0.4536121 |
6 | SESHEKE | 1.9522623 |
6 | SHANG’OMBO | 2.0119840 |
6 | SIAVONGA | 1.6581150 |
6 | SINAZONGWE | 1.6579345 |
6 | SOLWEZI | 0.8111611 |
6 | ZAMBEZI | 0.7376848 |
knitr::kable(sort(unique(vals$Cultivar)), caption = "Revelevant cultivars for Zambia.")
x |
---|
6 |
7 |
Visualize RMSE spatial distribution.
Cultivars 6 and 7 seem to show lower RMSE. These cultivars corresponds to the following names.
No. | Cultivar Name |
---|---|
1 | SC627 |
2 | SC403 |
3 | PAN53 |
4 | DKC8053 |
5 | PHB30G1 |
6 | DKC8033 |
7 | SC627 |
8 | ZMS 606 |
9 | PHB 30G19 |
10 | PHB 30B50 |
11 | SC513 |
12 | MRI624 |