Several cultivars from literature within Zambia and Malawi were tested and compared to observed production information in a bid to select a dominant variety for maize yield prediction.
Load malawi’s ministry of agriculture maize forecasts data as reference.
Load cultivars data. Basically, 12 maize cultivars were tested between 2010–2022 to
## [1] "Year" "Division" "District"
## [4] "EPA" "Season.start.date" "Season.end.date"
## [7] "Total.Area_ha" "Yield_kg_ha" "Production_MT"
## [10] "Comments" "yield_MT_ha"
Convert District and province names to upper case.
## NULL
x |
---|
BALAKA |
BLANTYRE |
CHIKWAWA |
CHIRADZULU |
CHITIPA |
DEDZA |
Load information on different cultivars production potential as predicted by RHEA.
files <- list.files(paste0(root, "Cultivars_Selection/malawi/"), full.names = T)
f <- read.csv(files[1], stringsAsFactors = FALSE)
df_list <- list()
for(i in 1:length(files)){
f <- read.csv(files[i], stringsAsFactors = FALSE)
f$cultivar <- sub("\\_.*", "", basename(files[i]))
temp <- sub("_v2", "", tools::file_path_sans_ext(basename(files[i])))
f$Year <- sub(".*\\_|.csv.*", "", temp)
df_list[[i]] <- f
}
df <- do.call(rbind, df_list)
df$cname <- toupper(df$cname)
df$gwad <- df$gwad/1000
Check missing names.
c <- sort(unique(ref$District))
c[!c %in% sort(unique(df$cname))]
## [1] "DOWA EAST" "DOWA WEST" "DOWAEAST" "LILONGWE EAST"
## [5] "LILONGWE WEST" "MZIMBA NORTH" "MZIMBA SOUTH" "NKHATABAY"
Correct spellings
df$cname[df$cname=="DOWA EAST"] <- "DOWA EAST"
df$cname[df$cname=="DOWAEAST"] <- "DOWA EAST"
df$cname[df$cname=="DOWAEST"] <- "DOWA EAST"
c <- sort(unique(ref$District))
c[!c %in% sort(unique(df$cname))]
## [1] "DOWA EAST" "DOWA WEST" "DOWAEAST" "LILONGWE EAST"
## [5] "LILONGWE WEST" "MZIMBA NORTH" "MZIMBA SOUTH" "NKHATABAY"
Merge RHEAS and observed MOA data
rh_q <- aggregate(gwad~cname+cultivar+Year, data=df, FUN = 'quantile', probs=c(0.25, 0.5, .75, .95), na.rm=T)
rh_q <- do.call(data.frame, rh_q)
names(rh_q) <- gsub("\\.","",names(rh_q))
rh_m <-aggregate(gwad~cname+cultivar+Year, data=df, FUN = mean, na.rm=T) #aggregate(df[,"gwad"], df[,c("cname","cultivar", "Year")], mean, na.rm=T)
names(rh_m)[4] <- "gwadmean"
rh <- rh_q
rh$gwadmean <- rh_m$gwadmean
names(rh)[1] <- "District"
ref.agg <-aggregate(yield_MT_ha~District+Year, data=ref, FUN = mean, na.rm=T)
dff <- merge(ref.agg,rh, by=c("District", "Year"))
We can use the Root Mean Square Error (RMSE) and mean absolute percentage error (MAPE) to evaluate the models accuracy. RMSE is given as:
\[ \text{RMSE} = \sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}, \] where \(\widehat{y}\) and \(y\) are predicted yields and observed yields respectively while n is the number of fitted points.
What about Relative root mean square error (RRMSE):
\[ \text{RRMSE} = \frac{\sqrt{\frac{1}{n} \sum_{i=1}^n y-\widehat{y}}}{\frac{1}{n} \sum_{i=1}^n y}, \]
According to Li et al. 2013, the performance of the model is excellent when RRMSE < 10%; good if 10% < RRMSE < 20%; fair if 20% < RRMSE < 30%; and poor if RRMSE ≥ 30%.
Here validation measures are computed for each cultivar at district level based on historically observed maize yields.
val <- data.frame()
Cultivar <- sort(unique(dff$cultivar))
dists <- sort(unique(dff$District))
for(i in 1:length(Cultivar)){
for(j in 1:length(dists)){
#print(paste0("cultivar",i, " in ", dists[j]))
temp <- subset(dff, cultivar==paste0("cultivar",i) & District==dists[j])
rmse_m <- rmse(temp$gwadmean-temp$yield_MT_ha)
rrmse_m <- rrmse(temp$gwadmean, temp$yield_MT_ha)
pcor_m <- cor(temp$gwadmean, temp$yield_MT_ha, method = "pearson")
bias <- mean(temp$gwadmean-temp$yield_MT_ha)
rmse25 <- rmse(temp$gwad25-temp$yield_MT_ha)
rrmse25 <- rrmse(temp$gwad25, temp$yield_MT_ha)
rmse50 <- rmse(temp$gwad50-temp$yield_MT_ha)
rrmse50 <- rrmse(temp$gwad50, temp$yield_MT_ha)
rmse75 <- rmse(temp$gwad75-temp$yield_MT_ha)
rrmse75 <- rrmse(temp$gwad75, temp$yield_MT_ha)
rmse95 <- rmse(temp$gwad95-temp$yield_MT_ha)
rrmse95 <- rrmse(temp$gwad95, temp$yield_MT_ha)
output <- c(i, dists[j], rmse_m, rrmse_m, pcor_m, rmse25, bias, rrmse25, rmse50, rrmse50, rmse75, rrmse75, rmse95, rrmse95)
val <- rbind(val, output)
}
}
colnames(val) <-c("Cultivar", "District", "RMSE_mean", "RRMSE_mean", "pcor", "bias", "RMSE25", "RRMSE25", "RMSE50", "RRMSE50", "RMSE75", "RRMSE75", "RMSE95", "RRMSE95")
#knitr::kable(val,caption = "Cultivar assessement based on ensemble mean, 25%. 50%, 75% and 95% percentiles respectively.")
Find cultivars with least RMSE in each district.
vals <- data.frame()
for(i in 1:length(dists)){
#Flter out district
temp <- subset(val, District==dists[i])
#Order dataframe in decreasing correlation value
index <- order(temp$pcor, decreasing = T)
#pick dataframe with first four highest correlation
temp <- temp[index[1:4],]
output <- c(temp$Cultivar[which.min(temp$RMSE_mean)], dists[i], min(temp$RMSE_mean))
vals <- rbind(vals, output)
}
colnames(vals) <-c("Cultivar", "District", "RMSE_mean")
vals$RMSE_mean <- as.numeric(vals$RMSE_mean)
knitr::kable(vals, caption = "Cultivar assessement based on ensemble RMSE of average yields from RHEAS ensemble.")
Cultivar | District | RMSE_mean |
---|---|---|
4 | BALAKA | 1.9476243 |
7 | BLANTYRE | 1.0963509 |
6 | CHIKWAWA | 0.9564860 |
6 | CHIRADZULU | 0.5628901 |
4 | CHITIPA | 0.9194430 |
4 | DEDZA | 1.0916843 |
6 | KARONGA | 1.4471756 |
3 | KASUNGU | 1.2141267 |
4 | LIKOMA | 1.7249400 |
4 | MACHINGA | 1.6977333 |
6 | MANGOCHI | 1.1892109 |
3 | MCHINJI | 1.1222119 |
11 | MULANJE | 1.3987127 |
1 | MWANZA | 1.8924553 |
3 | MZIMBA | 0.8027455 |
6 | NENO | 0.6600554 |
4 | NKHOTAKOTA | 0.3653033 |
6 | NSANJE | 1.7372848 |
6 | NTCHEU | 0.6221345 |
1 | NTCHISI | 0.6699413 |
1 | PHALOMBE | 0.8223237 |
4 | RUMPHI | 0.5433124 |
4 | SALIMA | 1.0642157 |
6 | THYOLO | 0.7453994 |
6 | ZOMBA | 0.5932956 |
knitr::kable(sort(unique(vals$Cultivar)), caption = "Revelevant cultivars for Malawi")
x |
---|
1 |
11 |
3 |
4 |
6 |
7 |
Visualize RMSE spatial distribution.
Cultivars 4, 6 and 7 seem to show lower RMSE on mean of RHEAS ensemble yield forecasts. These cultivars corresponds to the following names.
No. | Cultivar Name |
---|---|
1 | SC627 |
2 | SC403 |
3 | PAN53 |
4 | DKC8053 |
5 | PHB30G1 |
6 | DKC8033 |
7 | SC627 |
8 | ZMS 606 |
9 | PHB 30G19 |
10 | PHB 30B50 |
11 | SC513 |
12 | MRI624 |