Download global daily weather data and interpolate over study area

In this post, you will first of all be able to download weather station data from all over the world formatted in an appropriate manner for easy manipulation using the package GSODR. And get quick view on some variable. Secondly, interpolate these data over a given area using the inverse distance weighted approach.

The code is executable in the R programming tool.

Global Surface Summary of the Day (‘GSOD’)

Daily global surface summary product are produced by the National Climatic Data Center (NCDC) These data include quality controlled daily, monthly, seasonal, and yearly measurements of temperature, precipitation, wind, and degree days as well as radar data and 30-year Climate Normals. The data can be downloaded from the Climate Data Online using this link : www.ncdc.noaa.gov/cdo-web/ Historical data for 30809 stations are available.

Framework of GSOD

The data summaries here are based on data exchanged under the World Meteorological Organization (WMO) World Weather Watch Program according to WMO Resolution 40 (Cg-XII). Get further for the description in this link: http://www7.ncdc.noaa.gov/CDO/GSOD_DESC.txt.

LOAD Packages

#Load library
library(tmap)
library(GSODR) #Global Surface Summary of the Day ('GSOD') Weather
library("dplyr")
library(hydroTSM)
library(skimr)#summary statistics
library(raster)
library(maps)
library(ggplot2)

Set working directory

setwd("C:/Users/DEROOT/Blog/N2")

GSOD Weather Station Data Inventories

inventory <- get_inventory()
as_tibble(inventory)

Retrieve stations information of a particular country

As example I will use Benin republic (in West-Africa).

Download all weather stations for the year 2018 in Benin using get_GSOD

tbar_benin_2018<-get_GSOD(years=2018, station=Benin_stat_id$STNID)

as_tibble(tbar_benin_2018)
#Sub important station ID character (remove *- 99999*)

tbar_benin_2018$STNID<-substr(tbar_benin_2018$STNID,1,6)
Benin_stat_id$STNID<-substr(Benin_stat_id$STNID,1,6)

as_tibble(Benin_stat_id)

Climograph based on precipitation and temperature data for a given each station

Quick view on data

  tbar_benin_2018$DATE<-as.Date(tbar_benin_2018$YEARMODA,"%Y-%m-%d")

  #Subset of tbar_benin_2018
  climdf=tbar_benin_2018[,c('TEMP',"MAX",'MIN','PRCP','LONGITUDE','LATITUDE','STNID')]
  clim_xts=xts(x = climdf, order.by =tbar_benin_2018$DATE)
  
  #convert content to numeric
  storage.mode(clim_xts) <- "double"
  
# Plot climograph for the first station as example
  climdt=subset(clim_xts,STNID==Benin_stat_id$STNID[1])
  climograph(pcp=climdt$PRCP,tmx=climdt$MAX,tmn=climdt$TEMP,
             main=paste("Climograph",Benin_stat_id$NAME[1]),
             na.rm=TRUE,pcp.col='green',tmean.col='red')

2. Perform an inverse distance weighted interpolation using the new tmap packages

Create new dataset

#Create new dataset
clim_dt=climdf
    clim_dt$Year=format(tbar_benin_2018$DATE,'%Y')
    
    #summarise weather paramters in a yearly basis
    clim_dt=clim_dt%>% group_by(STNID,LONGITUDE,LATITUDE,Year) %>% summarise(
                                                              TEMP=mean(TEMP),
                                                              MAX=mean(MAX, na.rm=TRUE),
                                                              MIN=mean(MIN, na.rm=TRUE),
                                                              PRCP=sum(PRCP, na.rm=TRUE))
    
    clim_dt

Get geographic data

study_area<- getData("GADM", country=c("Benin"), level=0)
    #study_area1<- getData("GADM", country=c("Benin"), level=1)#first level subdivision

plot(study_area,main='BENIN')

Grid point distribution

attach(clim_dt)

(spatial_Benin<- ggplot(clim_dt, aes(x =LONGITUDE, y =LATITUDE,
                                      colour =TEMP))+ geom_point())

    detach(clim_dt)

Format the grid points (stations) to right coordinate system

stations=SpatialPoints(clim_dt[,c("LONGITUDE","LATITUDE")],
    proj4string=CRS("+proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"))

#Creation of a SpatialPointsDataFrame variable 
    stations = SpatialPointsDataFrame(stations,clim_dt)

    class(stations)
## [1] "SpatialPointsDataFrame"
## attr(,"package")
## [1] "sp"

Check if the study area and the grid poinst (stattions) have same coordinate

crs(study_area)
## CRS arguments:
##  +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0
crs(stations)
## CRS arguments:
##  +proj=utm +zone=30 +datum=WGS84 +units=m +no_defs +ellps=WGS84
## +towgs84=0,0,0

Distribution of grid point on the study area

plot(study_area, col='lightblue')
plot(stations, main="Map of Plot Locations",add=TRUE)

IDW : Interpolation function

study_area: geographic file in SpatialPointsDataFrameformat

param:parameter interpolated

idp : inverse distance weighting power

stations: weather data in SpatialPointsDataFrame format

idw_interpol=function(study_area,stations,param,idp){
  
  lon_range <- as.numeric(c(extent(study_area)[1],extent(study_area)[2]))  # min/max longitude of the interpolation area
  lat_range <- as.numeric(c(extent(study_area)[3], extent(study_area)[4]))  # min/max latitude of the interpolation area
  
  # Grid creation
  grd <- expand.grid(x = seq(from = lon_range[1],to = lon_range[2],by = 0.1),
                     y = seq(from = lat_range[1], to = lat_range[2],by = 0.1))
  
  # Convert grd object to a matrix and then turn into a spatial points object
  coordinates(grd) <- ~x + y
  
  # Create SpatialPixel object
  gridded(grd)     <- TRUE
  fullgrid(grd)    <- TRUE  
  
  #### view grid with points overlayed
  plot(grd, cex = 1.5, col = "grey")
  plot(stations,pch = 15,col = "red",cex = 1, add = TRUE)
  plot(study_area,add = TRUE)
  
  #Extend projection attributes of stations to the empty grid
  proj4string(grd) <- proj4string(stations)
  
  # interpolate the maximum temperature
  clim_idw<-  gstat::idw(formula =param ~ 1,
                         locations = stations,
                         newdata = grd,
                         idp = 1)
  
  # Create raster object of the interplotaion and then clip to study area
  r       <- raster(clim_idw)
  r.m     <- mask(r,study_area)
  
  return(r.m)
}

Perform idw_interpol function on maxium temperature

r.m=idw_interpol(study_area,stations,stations$MAX,idp = 1)

## [inverse distance weighted interpolation]

Thematic map of the interpolated maximum temperature

pal =c("#FFFFC8", "#FCE7A0", "#F7C252" ,"#F39300", "#EB5500" ,"#BE1D00", "#7D0025")

tmap_mode("plot")
## tmap mode set to plotting
tm_shape(r.m,xlim=c(extent(r.m)[1],extent(r.m)[2]+0.5),
             ylim=c(extent(r.m)[3]-0.3,extent(r.m)[4]+0.5),unit='km')+
  tm_scale_bar(position=c("left", "bottom"))+
  tm_raster(n=10,palette =pal,auto.palette.mapping = FALSE, title="Temperature \n[C]") + 
  tm_shape(stations)+tm_dots(size=0.4) +
  tm_compass(position=c("right", "top"))+tm_grid(lines=FALSE)+
  tm_layout(scale = .8,legend.position= c("right","bottom"),
             legend.bg.color = "skyblue", legend.bg.alpha = .2, 
            legend.frame = "gray50")