## PART 0 - SCRIPT INFORMATION SECTION ----------------------------------------
#' Script to paste seismic data snippets into hourly file templates
#'
#' This script can be used to generate regularly spaced hourly SAC files from
#' a set of shorter file snippets. Data gaps are interpolated linearly. The
#' script was written as a patch solver. It should be used with care and
#' the scientist should be aware of the consequences of the data manipulation.
#'
#' Author: Michael Dietze, Section 4.6 GFZ Potsdam (mdietze@gfz-potsdam.de)
#'
#' Script version: 0.1.0, 05 Spetember 2019
#'
#' Changes to previous versions
#' - not applicable
#'
#' Requirements & dependencies:
#' - R 3.4.4
#' - eseis 0.5.0
## PART 1 - DEFINITION OF OBJECTS AND IMPORT OF DATA --------------------------
## load required package
library(eseis)
## define input and output directories
dir_sac = "/sec51/data/mdietze/2018_hochvogel/data/seismic/sac_redate/"
dir_out <- "/sec51/data/mdietze/2018_hochvogel/data/seismic/sac_antenna_hourly/"
## define time range to be processed
t_range <- as.POSIXct(x = c("2018-07-09",
"2018-08-01"),
tz = "UTC")
## read station infor file
stations <- read.table(file = paste0("/sec51/data/mdietze/2018_hochvogel/",
"data/seismic/station/station_info_",
"HVGL18_final_summit_utm_01.txt"),
header = TRUE,
stringsAsFactors = FALSE)[-7,]
## PART 2 - PROCESSING OF FILES -----------------------------------------------
## assign station IDs
ID_sac <- stations$ID
## list all files to process
files_sac <- list.files(path = dir_sac,
recursive = TRUE,
full.names = TRUE,
pattern = ".SAC")
## isolate sac file name from path
files_sac_name <- do.call(c, lapply(X = files_sac, FUN = function(x) {
strsplit(x = x, split = "/")[[1]][12]
}))
## convert file name to time stamp
files_sac_time <- as.POSIXct(x = substr(x = files_sac_name,
start = 7,
stop = 21),
format = "%y.%j.%H.%M.%S",
tz = "UTC")
## create vector of hourly time snippets of output files
t <- seq(from = t_range[1],
to = t_range[2],
by = 3600)
## loop through all hourly time segments
for(i in 16:length(t)) {
## create empty output data object
s_empty <- lapply(X = 1:6, FUN = function(a) {
return(rep(NA, times = 3600 * 200))
})
## assign station IDs to output object
names(s_empty) <- ID_sac
## generate time vector
t_empty <- seq(from = t[i],
by = 1/200,
length.out = 3600 * 200)
## isolate input files that can fill the output object
files_sac_i <- files_sac[files_sac_time >= t[i] - 6 * 60 &
files_sac_time <= (t[i] + 3600)]
## remove NA values, if present
files_sac_i <- na.exclude(files_sac_i)
## continue if any files are present for time snippet to work on
if(length(files_sac_i) > 0) {
## process each station individually
for(j in 1:length(ID_sac)) {
## isolate files for station to work on
files_sac_i_j <- files_sac_i[grepl(x = files_sac_i,
pattern = ID_sac[j])]
## process each file individually
for(k in 1:length(files_sac_i_j)) {
## read and prepare files
s <- eseis::read_sac(file = files_sac_i_j[k])
## define file-based start and end times of the data
s_start <- s$meta$starttime
s_stop <- s_start + s$meta$dt * s$meta$n
## toggle indices with data TRUE
t_paste <- t_empty >= s_start & t_empty < s_stop
## fill signal vector with existing data
if(sum(t_paste) > 0) {
s_empty[[j]][t_paste] <- s$signal[1:sum(t_paste)]
}
}
## identify data gaps
s_na <- is.na(s_empty[[j]])
## interpolate data gaps if present
if(sum(s_na) > 0) {
## account for first and last value being NA
if(s_na[1] == TRUE) {
s_na[1] <- FALSE
s_empty[[j]][1] <- mean(s_empty[[j]], na.rm = TRUE)
}
if(s_na[length(s_na)] == TRUE) {
s_na[length(s_na)] <- FALSE
s_empty[[j]][length(s_na)] <- mean(s_empty[[j]], na.rm = TRUE)
}
## find transitions to NA
s_na_trans <- c(FALSE, diff(s_na))
## identify start and end points of data gaps
s_na_start <- which(s_na_trans == 1) - 1
s_na_stop <- which(s_na_trans == -1)
## if gaps exist, fill them with linear sequence from margins
if(length(s_na_start) > 0) {
for(k in 1:length(s_na_start)) {
s_empty[[j]][s_na_start[k]:s_na_stop[k]] <-
seq(from = s_empty[[j]][s_na_start[k]],
to = s_empty[[j]][s_na_stop[k]],
length.out = length(s_na_start[k]:s_na_stop[k]))
}
}
}
}
## check/create output directory year
try(if(dir.exists(paths = paste0(dir_out,
format(x = t[i],
format = "%Y"))) == FALSE) {
dir.create(paste0(dir_out, format(x = t[i], format = "%Y")))
})
## check/create output directory JD
try(if(dir.exists(paths = paste0(dir_out,
format(x = t[i],
format = "%Y/%j"))) == FALSE) {
dir.create(paste0(dir_out, format(x = t[i], format = "%Y/%j")))
})
## save merged hourly data sets
for(j in 1:length(s_empty)) {
write_sac(data = s_empty[[j]],
file = paste0(dir_out,
format(x = t[i],
format = "%Y/%j/"),
names(s_empty)[j], ".",
format(x = t[i],
format = "%y.%j.%H.%M.%S"),
".BHZ.SAC"),
time = t[i],
component = "BHZ",
unit = 1,
station = names(s_empty)[j],
network = "HV",
dt = 1/200)
}
}
## print progress
print(paste(i, "of", length(t)))
}