## PART 0 - DOCUMENTATION AND INFORMATION -------------------------------------
#'
#' This script screens all Cube files and the SAC files into which they
#' were converted using the R package 'eseis'. It returns numeric and
#' graphical information about the time coverage of the data by station,
#' as well as essential setup parameters of the Cube data loggers (i.e,
#' gain, sampling rate and number of recording channels).
#'
#' The script can be customised and extended if needed. It was primarily
#' written to get an overview of the data coverage and properties for one
#' specific project and assumes other projects have the data organised in
#' the same way. Any way, make sure that the paths are correctly adjusted,
#' that the required gipptools are installed, and that the directories
#' for saving plots and data sets are adequate.
#'
#' The flags (first list element of the parameter list 'par') allow
#' using already calculated data or recalculating everything from scratch.
#' Also, plots can be toggled on or off.
#'
#' Author: Michael Dietze (mdietze@gfz-potsdam.de)
#' Version: 0.1.0
#' Date of last modification: 2019-03-15
## PART 1 - SET PARAMETERS ----------------------------------------------------
pars <- list(
flag = list(SAC_PROCESS = TRUE,
SAC_SAVE = TRUE,
SAC_PLOT = TRUE,
CUBE_PROCESS = TRUE,
CUBE_SAVE = TRUE,
CUBE_PLOT = TRUE),
path = list(sac = "hochvogel/data/seismic/sac/",
cube = "hochvogel/data/seismic/cube/",
gipptools = "software/gipptools-2018.310/",
save_sac = "hochvogel/data/sac_periods_of_interest.rda",
save_cube = "hochvogel/data/cube_setups.rda",
jpeg_sac = "hochvogel/plot/speriods_of_interest.jpg",
jpeg_cube = "hochvogel/plots/cube_parameters.jpg"))
## PART 2 - SCRIPT INITIATION -------------------------------------------------
## define additional function (might be included to 'eseis' package, later)
aux_cubeinfo <- function(file, gipptools) {
## get cube info output
x <- system(command = paste0(gipptools,
"bin/cubeinfo ",
file),
intern = TRUE)
## remove comments and empty rows
x <- x[grepl(x = x,
pattern = "# ---") == FALSE]
x <- x[nchar(x) > 0]
## separate parameter names and values
x_parameter <- trimws(substr(x = x,
start = 3,
stop = 26),
which = "left")
x_value <- trimws(substr(x = x,
start = 30,
stop = 100),
which = "right")
## organise output as data frame
x_tabular <- as.data.frame(rbind(x_value),
stringsAsFactors = FALSE)
names(x_tabular) <- x_parameter
rownames(x_tabular) <- ""
## return output
return(x_tabular)
}
## PART 3 - ANALYSIS OF SAC FILE TIME COVERAGE --------------------------------
if(pars$flag$SAC_PROCESS == TRUE) {
## get all files, with and without paths
files_path <- list.files(path = pars$path$sac,
full.names = TRUE,
recursive = TRUE)
files <- list.files(path = pars$path$sac,
full.names = FALSE,
recursive = TRUE)
## extract time stamp
files_time <- substr(x = files,
start = 15,
stop = 30)
## account for effect of different station names
files_time <- ifelse(test = substr(x = files_time,
start = 1,
stop = 1) == ".",
yes = substr(x = files_time,
start = 2,
stop = 16),
no = substr(x = files_time,
start = 1,
stop = 15))
## convert time string to POSIX format
files_start <- as.POSIXct(x = files_time,
format = "%y.%j.%H.%M.%S",
tz = "UTC")
## get station IDs
files_station <- do.call(c, lapply(X = files, FUN = function(files) {
x <- strsplit(x = files, split = "/")[[1]][3]
x <- strsplit(x = x, split = ".", fixed = TRUE)[[1]][1]
return(x)
}))
## get components
files_component <- do.call(c, lapply(X = files, FUN = function(files) {
x <- strsplit(x = files[1], split = "/")[[1]][3]
x <- strsplit(x = x, split = ".", fixed = TRUE)[[1]]
return(x[length(x) - 1])
}))
## compile results
file_data <- data.frame(time = files_start,
station = files_station,
component = files_component,
stringsAsFactors = FALSE)
## separate by station and order meaningfully
stations_unique <- unique(file_data$station)
stations_unique <- stations_unique[c(2:7, 1, 8:12)]
file_by_stat <- lapply(X = stations_unique, FUN = function(sta, file_data) {
file_data[file_data$station == sta,]
}, file_data)
## get total time span of data coverage
t_range <- range(files_start)
## define periods of interest for further analysis
t_interest <- data.frame(
start = as.POSIXct(c(file_by_stat[[1]]$time[730],
file_by_stat[[12]]$time[1]), tz = "UTC"),
stop = as.POSIXct(c(file_by_stat[[1]]$time[6090],
file_by_stat[[8]]$time[6980]), tz = "UTC"),
SA_1 = c(TRUE, FALSE),
SA_2 = c(TRUE, FALSE),
SA_3 = c(TRUE, FALSE),
SA_4 = c(TRUE, FALSE),
SA_5 = c(TRUE, FALSE),
SA_6 = c(TRUE, FALSE),
HVGL1 = c(TRUE, FALSE),
HVGL2 = c(TRUE, TRUE),
HVGL3 = c(TRUE, TRUE),
HVGL4 = c(TRUE, TRUE),
SA_21 = c(FALSE, TRUE),
SA_22 = c(FALSE, TRUE))
t_interest_network <-
data.frame(
start = as.POSIXct(c(file_by_stat[[1]]$time[730],
file_by_stat[[1]]$time[6090]),
tz = "UTC"),
stop = as.POSIXct(c(file_by_stat[[1]]$time[6090],
file_by_stat[[8]]$time[6980]),
tz = "UTC"),
SA_1 = c(TRUE, FALSE),
SA_2 = c(TRUE, FALSE),
SA_3 = c(TRUE, FALSE),
SA_4 = c(TRUE, FALSE),
SA_5 = c(TRUE, FALSE),
SA_6 = c(TRUE, FALSE),
HVGL1 = c(TRUE, FALSE),
HVGL2 = c(TRUE, TRUE),
HVGL3 = c(TRUE, TRUE),
HVGL4 = c(TRUE, TRUE),
SA_21 = c(FALSE, TRUE),
SA_22 = c(FALSE, TRUE))
} else {
load(file = pars$path$save_sac)
}
## optionally save results
if(pars$flag$SAC_PROCESS == TRUE) {
save(t_interest, t_interest_network, file = pars$path$save_sac)
}
## PART 4 - ANALYSIS OF CUBE FILE PARAMETERS ----------------------------------
if(pars$flag$CUBE_PROCESS == TRUE) {
## list all cube files in data directory with full paths
files_cube_path <- list.files(path = pars$path$cube,
recursive = TRUE,
full.names = TRUE)
## list all cube files in data directory without full paths
files_cube_name <- list.files(path = pars$path$cube,
recursive = TRUE)
## check for files to exclude
i_out <- grepl(x = files_cube_path, pattern = "config") |
grepl(x = files_cube_path, pattern = "CONFIG")
## remove unsuitable files
files_cube_path <- files_cube_path[!i_out]
files_cube_name <- files_cube_name[!i_out]
## get cube info data from all files
data_cube <- lapply(X = files_cube_path,
FUN = aux_cubeinfo,
gipptools = pars$path$gipptools)
## remove unsuitable output (e.g., processed config files)
i_OK <- do.call(c, lapply(data_cube, FUN = ncol)) == 20
data_cube_ok <- data_cube[i_OK]
## convert list to data frame
data_cube_ok <- do.call(rbind, data_cube_ok)
## append file names and paths
data_cube <- cbind(files_cube_name[i_OK],
files_cube_path[i_OK],
data_cube_ok)
} else {
load(file = pars$path$save_cube)
}
## optionally save results
if(pars$flag$CUBE_SAVE == TRUE) {
save(data_cube, file = pars$path$save_cube)
}
## group data by logger ID
cube_id <- substr(x = data_cube$`recording unit`, start = 3, stop = 5)
cube_id_unique <- unique(cube_id)
cube_data_by_cube <-
lapply(X = cube_id_unique, FUN = function(id, data_cube, cube_id) {
data_cube[cube_id == id,]
}, data_cube, cube_id)
names(cube_data_by_cube) <- cube_id_unique
## PART 5 - PLOTS -------------------------------------------------------------
if(pars$flag$SAC_PLOT == TRUE) {
## set up plot area
par(mfcol = c(6, 2))
## plot data coverage by station with all available data
for(i in 1:length(stations_unique)) {
plot(file_by_stat[[i]]$time,
y = rep(1, length(file_by_stat[[i]]$time)),
pch = 15,
xlim = t_range,
main = stations_unique[i])
}
## plot two periods of interest separately
## set up plot device and area
jpeg(filename = pars$path$jpeg_sac,
width = 2000,
height = 1200,
res = 100)
par(mfcol = c(5, 3))
## plot data coverage by station for the two periods of interest
station_ok <- names(t_interest)[t_interest[1,] == TRUE]
file_by_stat_ok <- file_by_stat[t_interest[1,3:14] == TRUE]
for(j in 1:10) {
try(plot(file_by_stat_ok[[j]]$time,
y = rep(1, length(file_by_stat_ok[[j]]$time)),
pch = 15,
xlim = c(t_interest$start[1], t_interest$stop[1]),
main = stations_unique[j],
col = 4))
}
file_by_stat_ok <- file_by_stat
file_by_stat_ok <- file_by_stat_ok[t_interest[2,3:14] == TRUE]
for(j in 1:5) {
try(plot(file_by_stat_ok[[j]]$time,
y = rep(1, length(file_by_stat_ok[[j]]$time)),
pch = 15,
xlim = c(t_interest$start[2], t_interest$stop[2]),
main = stations_unique[t_interest[2,3:14] == TRUE][j],
col = 3))
}
dev.off()
}
if(pars$flag$CUBE_PLOT == TRUE) {
## set up plot device and area
jpeg(filename = pars$path$jpeg_cube,
width = 2000,
height = 1200,
res = 100)
par(mfcol = c(1, 3))
## plot
boxplot(lapply(cube_data_by_cube,
FUN = function(x) {
as.numeric(x$`amplification factor`)
}),
main = "Gain")
boxplot(lapply(cube_data_by_cube,
FUN = function(x) {
as.numeric(substr(x$`sample rate`, 1, 3))
}),
main = "Sample rate")
boxplot(lapply(cube_data_by_cube,
FUN = function(x) {
as.numeric(nchar(x$`recorded channels`) / 3 + 1/3)
}),
main = "Number of channels")
## close plot device
dev.off()
}