R script to calculate spectrograms

## PART 0 - SCRIPT INFORMATION SECTION ----------------------------------------

#' The script can be used to calculate power spectral density estimates (PSD) 
#' for the entire time covered by the data.
#' 
#' It cuts the period of interest into time snippets, imports the seismic 
#' signals for these snippets, deconvolves them, demeans and detrends, 
#' filters (bandpass filter) and tapers them, and calculates a spectrogram 
#' using the Welch method. The resulting list of PSDs corresponding 
#' to the time sinppets is stacked to a matrix. The frequency dimension of the 
#' output matrix is aggregated to avoid data overload. For this, prior to the 
#' time snippet analysis, one such snippet is used to calculate a spectrogram 
#' and extract the frequency vector as template for aggregation (i.e., getting 
#' its length). For plotting, the spectrogram is scaled using the user-defined 
#' zlim range. The plot is written to a jpeg file. All essential script output 
#' is saved as a *.rda file. 
#' 
#' All setup parameters of the script are defined in PART 1, in the object 
#' 'pars'. The period of interest 't_start' and 't_stop' must be provided as
#' POSIXct format. The duration of each time snippet is set in seconds. To 
#' calculate the spectra only for some hours of a day (e.g., only during 
#' night time to minimise anthorpogenic signal contamination), these hours
#' can be specified in 'hours_ok'. To use all hours use 'hours_ok = 0:23'. The 
#' PSD calculation is realised in a multicore environment. The fraction of 
#' cores (CPUs) to use can be set in the parameter list. The output length of 
#' the frequency dimension is set by 'length_f' (without aggregation it would
#' be 720000 for one hour of data at 200 Hz sampling frequency). he working 
#' directory is set by 'dir'. See the documentation of 'aux_getevent' for the 
#' conventions. This also holds for the parameters 'station' and 'component'.
#' For the deconvolution it is necessary to specify the 'sensor', 'logger' and 
#' 'gain' information. Filtering needs the corner frequencies 'f_filter' of the 
#' Butterworth bandpass filter, order is set to 2 by default. Tapering amount 
#' is set by the fraction of the signal length 'p_taper'. The PSD parameters 
#' include toggeling the Welch option 'welch', the sub window size to calculate 
#' individual spectra within the hourly window 'window_sub' and their overlaps  
#' 'overlap_sub'. The parameter 'zlim' specifies the output graph's colour 
#' scale range. The jpeg properties are set by 'width', 'height', 'res' 
#' (resolution) and the file name 'name'. Finally, 'data_name' denotes the 
#' file name of the rda file that saves the parameter list, the initial PSD, 
#' time and frequency vector, as well as the aggregated versions.
#' 
#' Author: Michael Dietze, Section 5.1 GFZ Potsdam (mdietze@gfz-potsdam.de)
#' 
#' Script version: 0.1.0, 23 August 2018
#' 
#' Changes to previous versions
#'   - not applicable
#' 
#' Requirements & dependencies: 
#'   - R 3.4.4
#'   - eseis 0.5.0
#'   - parallel 3.4.4
#'   - fields 9.6 

## PART 1 - SETTINGS SECTION --------------------------------------------------

## set working directory
setwd(dir = "~/00_PROJECTS/2018_saevaran/data/")

## load packages
library("eseis")
library("parallel")

## set analysis parameters
pars <- list(t_start = "2018-03-24",
             t_stop = "2018-06-15",
             hours_ok = c(22, 23, 0, 1),
             duration = 3600,
             p_cores = 0.5,
             length_f = 2000,
             dir = paste0("/sec51/data/mdietze/2018_saevaran/",
                          "data/seismic/sac/"),
             station = "UME04",
             component = "BHZ",
             sensor = "TC120s",
             logger = "Cube3extBOB",
             gain = 4,
             f_filter = c(1, 190),
             p_taper = 0.01,
             welch = TRUE,
             window_sub = 360,
             overlap_sub = 0.5,
             zlim = c(-190, -120),
             jpg_name = "../plots/psd_total.jpg",
             jpg_width = 4000, 
             jpg_height = 2000,
             jpg_res = 300,
             data_name = paste0("/home/mdietze/00_PROJECTS/",
                                "2018_saevaran/data/psd_total_night_2.rda"))

## PART 2 - INITIATION OF ANALYSIS --------------------------------------------

## create time slice vector
t <- seq(from = as.POSIXct(x = pars$t_start, tz = "UTC"), 
         to = as.POSIXct(x = pars$t_stop, tz = "UTC") - 0.1, 
         by = pars$duration)

## extract hours of the dates
t_hour <- as.numeric(format(x = t, 
                            format = "%H", 
                            tz = "UTC"))

## keep only hours specified in parameters
t <- t[!is.na(match(x = t_hour, table = pars$hours_ok))]

## calculate test PSD to get frequency vector
## read dummy file
s <- try(eseis::aux_getevent(start = t[floor(length(t) / 2)], 
                             duration = pars$duration, 
                             station = pars$station, 
                             component = pars$component, 
                             dir = pars$dir))

## calculate dummy spectrogram
p <- try(eseis::signal_spectrogram(data = s, 
                                   Welch = pars$welch,
                                   window = pars$duration - 1,
                                   overlap = 0, 
                                   window_sub = pars$window_sub,
                                   overlap_sub = pars$overlap_sub))

## extract frequency vector
f <- try(p$PSD$f)

## append frequency vector length to parameter list
pars[[length(pars) + 1]] <- length(f)
names(pars)[length(pars)] <- "length_f"

## create aggregation index vector
i_agg <- floor(seq(from = 1, 
                   to = length(f), 
                   length.out = pars$length_f))

## initiate cluster
cores <- parallel::detectCores()
n_cpu <- floor(cores * pars$p_cores)
cores <- ifelse(cores < n_cpu, cores, n_cpu)
cl <- parallel::makeCluster(getOption("mc.cores", cores))

## print information
print(paste("Preparation finished.",
            length(t), 
            "PSDs to calculate in total, using", 
            cores, 
            "CPUs. Estimated processing time:", 
            round(x = 0.3413938 * length(t) / 60 * 
                    3600 / pars$duration, 
                  digits = 1),
            "min. Starting job NOW."))

## PART 3 - PROCESSING THE DATA -----------------------------------------------

## get time before processing
t_sys_0 <- Sys.time()

## process all time slices
psd_list <- parallel::parLapply(cl = cl, X = t, fun = function(t, pars) {
  
  ## read file
  s <- try(eseis::aux_getevent(start = t, 
                               duration = pars$duration, 
                               station = pars$station, 
                               component = pars$component, 
                               dir = pars$dir))
  
  ## deconvolve data
  s <- try(eseis::signal_deconvolve(data = s, 
                                    sensor = pars$sensor, 
                                    logger = pars$logger, 
                                    gain = pars$gain))
  
  ## demean and detrend data
  s <- try(eseis::signal_demean(data = s))
  s <- try(eseis::signal_detrend(data = s))
  
  ## filter signal
  s <- try(eseis::signal_filter(data = s, f = pars$f_filter))
  
  ## taper signal
  s <- try(eseis::signal_taper(data = s, p = pars$p_taper))
  
  ## calculate spectrogram
  p <- try(eseis::signal_spectrogram(data = s, 
                                     Welch = pars$welch,
                                     window = pars$duration - 1,
                                     overlap = 0, 
                                     window_sub = pars$window_sub,
                                     overlap_sub = pars$overlap_sub))
  
  ## extract spectrogram matrix
  p <- try(p$PSD$S[,1]) 
  
  ## return output
  if(class(p) == "try-error") {
    
    return(rep(x = NA, times = pars$length_f))
  } else {
    
    return(p)
  }
  
}, pars)

## stop cluster
try(parallel::stopCluster(cl = cl))

## get time after processing
t_sys_1 <- Sys.time()

## get time difference
dt <- as.numeric(difftime(time1 = t_sys_1, 
                          time2 = t_sys_0, 
                          units = "mins"))

## get number of successfull calculations
p_ok <- try(do.call(c, lapply(X = psd_list, FUN = function(psd_list) {
  
  sum(is.na(psd_list)) == 0
})))

## print processing information
try(print(paste("Processing finished. Time: ",
                round(x = dt, digits = 1),
                " min. ",
                sum(p_ok),
                " successfull calculations (",
                round(x = sum(p_ok) / length(p_ok) * 100, digits = 1),
                " %).", 
                sep = "")))

## convert list to matrix
psd <- try(do.call(rbind, psd_list))

## aggregate data
f_agg <- f[i_agg]
psd_agg <- psd[, i_agg]

## scale psd for plotting
psd_plot <- psd_agg
psd_plot[psd_plot < pars$zlim[1]] <- pars$zlim[1]
psd_plot[psd_plot > pars$zlim[2]] <- pars$zlim[2]


## PART 4 - SAVING AND PLOTTING -----------------------------------------------

## save the raw data
save(pars, psd, t, f, f_agg, psd_agg, file = pars$data_name)

## open plot device
jpeg(filename = pars$jpg_name,
     width = pars$jpg_width, 
     height = pars$jpg_height,
     res = pars$jpg_res)

try(fields::image.plot(x = t, 
                       y = f_agg,
                       z = psd_plot, 
                       zlim =pars$zlim))

## close plot device
dev.off()
R script to calculate spectrograms

Michael Dietze

August 24, 2018