Introduction

Several cultivars from literature within Zambia and Malawi were tested and compared to observed production information in a bid to select a dominant variety for maize yield prediction.

Reference data

Load Zambia’s ministry of agriculture maize forecasts data as reference.

Load cultivars data. Basically, 12 maize cultivars were tested between 2010–2022 to

## [1] "Province"      "District"      "planted_ha"    "harvested_ha" 
## [5] "production_MT" "yield_MT_ha"   "Year"

Convert District and province names to upper case.

##  [1] "CENTRAL"       "COPPERBELT"    "EASTERN"       "LUAPULA"      
##  [5] "LUSAKA"        "MUCHINGA"      "NORTH-WESTERN" "NORTHERN"     
##  [9] "SOUTHERN"      "WESTERN"
Districts
x
CHADIZA
CHAMA
CHAVUMA
CHIBOMBO
CHIENGE
CHILILABOMBWE

Cultivars production

Load information on different cultivars production potential as predicted by RHEA.

files <- list.files(paste0(root, "Cultivars_Selection/Zambia/"), full.names = T)
f <- read.csv(files[1], stringsAsFactors =  FALSE)
df_list <- list()
for(i in 1:length(files)){
  f <- read.csv(files[i], stringsAsFactors =  FALSE)
  f$cultivar <- sub("\\_.*", "", basename(files[i]))
  temp <- sub("_v2", "", tools::file_path_sans_ext(basename(files[i])))
  f$Year <- sub(".*\\_|.csv.*", "", temp)
  df_list[[i]] <- f
}

df <- do.call(rbind, df_list)
df$cname[df$cname=="CHIENGI"] <- "CHIENGE"
df$cname[df$cname=="SHANGOMBO"] <- "SHANG'OMBO"
df$gwad <- df$gwad/1000

Check missing names.

c <- sort(unique(ref$District))
c[!c %in% sort(unique(df$cname))]
## [1] "CHILILABOMBWE" "IKELENGE"      "KALOMO"        "LIVINGSTONE"  
## [5] "LUFWANYAMA"    "MAFINGA"

Merge RHEAS and observed MOA data

rh_q <- aggregate(gwad~cname+cultivar+Year, data=df, FUN = 'quantile', probs=c(0.25, 0.5, .75, .95), na.rm=T)
rh_q <- do.call(data.frame, rh_q)
names(rh_q) <- gsub("\\.","",names(rh_q))
rh_m <-aggregate(gwad~cname+cultivar+Year, data=df, FUN = mean, na.rm=T) #aggregate(df[,"gwad"], df[,c("cname","cultivar", "Year")], mean, na.rm=T)
names(rh_m)[4] <- "gwadmean"
rh <- rh_q
rh$gwadmean <- rh_m$gwadmean
names(rh)[1] <- "District"
dff <- merge(ref,rh, by=c("District", "Year"))

Validation

RMSE

We can use the Root Mean Square Error (RMSE) and mean absolute percentage error (MAPE) to evaluate the models accuracy. RMSE is given as:

\[ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}, \] where \(\widehat{y}\) and \(y\) are predicted yields and observed yields respectively while n is the number of fitted points.

RRMSE

What about Relative root mean square error (RRMSE):

\[ \text{RRMSE} = \frac{\sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}}{\frac{1}{n} \sum_{i=1}^n y}, \]

According to Li et al. 2013, the performance of the model is excellent when RRMSE < 10%; good if 10% < RRMSE < 20%; fair if 20% < RRMSE < 30%; and poor if RRMSE ≥ 30%.

Compute validation measures

Here validation measures are computed for each cultivar at distruict level based on historically observed maize yields.

val <- data.frame()
Cultivar <- sort(unique(dff$cultivar))
dists <- sort(unique(dff$District))
for(i in 1:length(Cultivar)){
  for(j in 1:length(dists)){
    #print(paste0("cultivar",i, " in ", dists[j]))
    temp <- subset(dff, cultivar==paste0("cultivar",i) & District==dists[j])
    rmse_m <- rmse(temp$gwadmean-temp$yield_MT_ha)
    rrmse_m <- rrmse(temp$gwadmean, temp$yield_MT_ha)
    rmse25 <- rmse(temp$gwad25-temp$yield_MT_ha)
    rrmse25 <- rrmse(temp$gwad25, temp$yield_MT_ha)
    rmse50 <- rmse(temp$gwad50-temp$yield_MT_ha)
    rrmse50 <- rrmse(temp$gwad50, temp$yield_MT_ha)
    rmse75 <- rmse(temp$gwad75-temp$yield_MT_ha)
    rrmse75 <- rrmse(temp$gwad75, temp$yield_MT_ha)
    rmse95 <- rmse(temp$gwad95-temp$yield_MT_ha)
    rrmse95 <- rrmse(temp$gwad95, temp$yield_MT_ha)
    output <- c(i, dists[j], rmse_m, rrmse_m, rmse25, rrmse25, rmse50, rrmse50, rmse75, rrmse75, rmse95, rrmse95)
    val <-  rbind(val, output)
  }
  
}


colnames(val) <-c("Cultivar", "District", "RMSE_mean", "RRMSE_mean", "RMSE25", "RRMSE25", "RMSE50", "RRMSE50", "RMSE75", "RRMSE75", "RMSE95", "RRMSE95")

#knitr::kable(val,caption = "Cultivar assessement based on ensemble mean, 25%. 50%, 75% and 95% percentiles respectively.")

Find cultivars with least RMSE in each district.

vals <- data.frame()
for(i in 1:length(dists)){
  temp <- subset(val, District==dists[i])
  output <- c(temp$Cultivar[which.min(temp$RMSE_mean)], dists[i], min(temp$RMSE_mean))
  vals <-  rbind(vals, output)
}

colnames(vals) <-c("Cultivar", "District", "RMSE_mean")
vals$RMSE_mean <- as.numeric(vals$RMSE_mean)

knitr::kable(vals, caption = "Cultivar assessement based on ensemble RMSE of average yields from RHEAS ensemble.")
Cultivar assessement based on ensemble RMSE of average yields from RHEAS ensemble.
Cultivar District RMSE_mean
6 CHADIZA 0.6064343
6 CHAMA 0.5777490
6 CHAVUMA 0.5596623
6 CHIBOMBO 0.7035040
6 CHIENGE 0.4663884
6 CHILUBI 0.3677825
6 CHINGOLA 0.9324916
7 CHINSALI 0.3867010
6 CHIPATA 0.5365719
6 CHOMA 1.0944982
6 CHONGWE 0.6050852
6 GWEMBE 1.3816591
7 ISOKA 0.3886291
6 ITEZHI-TEZHI 0.9713836
6 KABOMPO 0.3189672
6 KABWE 0.7519922
6 KAFUE 0.7180780
6 KALABO 1.9247793
6 KALULUSHI 0.4937504
6 KAOMA 0.9382855
6 KAPIRI MPOSHI 0.4170042
6 KAPUTA 0.9510975
7 KASAMA 0.4650728
6 KASEMPA 0.4986089
6 KATETE 1.1148522
7 KAWAMBWA 0.4455134
6 KAZUNGULA 1.4945964
6 KITWE 0.4530231
6 LUANGWA 2.0208272
6 LUANSHYA 0.3022950
6 LUKULU 1.6053645
6 LUNDAZI 0.4129054
7 LUSAKA 1.9083075
7 LUWINGU 0.3803144
6 MAMBWE 1.2494377
6 MANSA 0.5109675
6 MASAITI 0.4684266
6 MAZABUKA 0.7795253
6 MBALA 0.4668374
6 MILENGE 0.4092623
7 MKUSHI 0.5582931
6 MONGU 1.8196755
6 MONZE 1.0015455
7 MPIKA 0.3654331
6 MPONGWE 0.5162312
6 MPOROKOSO 0.4565665
6 MPULUNGU 0.6963483
7 MUFULIRA 0.4446564
6 MUFUMBWE 0.4054934
6 MUMBWA 0.9861472
7 MUNGWI 0.3373367
6 MWENSE 0.3080217
7 MWINILUNGA 0.3985817
6 NAKONDE 0.5181622
6 NAMWALA 1.3721491
6 NCHELENGE 0.4052449
6 NDOLA 0.4791634
6 NYIMBA 0.9451651
6 PETAUKE 0.8828479
6 SAMFYA 0.5795937
6 SENANGA 2.0153028
6 SERENJE 0.4536121
6 SESHEKE 1.9522623
6 SHANG’OMBO 2.0119840
6 SIAVONGA 1.6581150
6 SINAZONGWE 1.6579345
6 SOLWEZI 0.8111611
6 ZAMBEZI 0.7376848
knitr::kable(sort(unique(vals$Cultivar)), caption = "Revelevant cultivars for Zambia.")
Revelevant cultivars for Zambia.
x
6
7

Visualize RMSE spatial distribution.

Cultivars 6 and 7 seem to show lower RMSE. These cultivars corresponds to the following names.

No. Cultivar Name
1 SC627
2 SC403
3 PAN53
4 DKC8053
5 PHB30G1
6 DKC8033
7 SC627
8 ZMS 606
9 PHB 30G19
10 PHB 30B50
11 SC513
12 MRI624