## PART 0 - DOCUMENTATION AND INFORMATION -------------------------------------
#' 
#' This script screens all Cube files and the SAC files into which they
#' were converted using the R package 'eseis'. It returns numeric and 
#' graphical information about the time coverage of the data by station, 
#' as well as essential setup parameters of the Cube data loggers (i.e,
#' gain, sampling rate and number of recording channels).
#' 
#' The script can be customised and extended if needed. It was primarily 
#' written to get an overview of the data coverage and properties for one 
#' specific project and assumes other projects have the data organised in 
#' the same way. Any way, make sure that the paths are correctly adjusted,
#' that the required gipptools are installed, and that the directories 
#' for saving plots and data sets are adequate.
#' 
#' The flags (first list element of the parameter list 'par') allow 
#' using already calculated data or recalculating everything from scratch. 
#' Also, plots can be toggled on or off.
#' 
#' Author: Michael Dietze (mdietze@gfz-potsdam.de)
#' Version: 0.1.0
#' Date of last modification: 2019-03-15

## PART 1 - SET PARAMETERS ----------------------------------------------------
pars <- list(
  flag = list(SAC_PROCESS = TRUE,
              SAC_SAVE = TRUE,
              SAC_PLOT = TRUE,
              CUBE_PROCESS = TRUE,
              CUBE_SAVE = TRUE,
              CUBE_PLOT = TRUE),
  path = list(sac = "hochvogel/data/seismic/sac/",
              cube = "hochvogel/data/seismic/cube/",
              gipptools = "software/gipptools-2018.310/",
              save_sac = "hochvogel/data/sac_periods_of_interest.rda",
              save_cube = "hochvogel/data/cube_setups.rda",
              jpeg_sac = "hochvogel/plot/speriods_of_interest.jpg",
              jpeg_cube = "hochvogel/plots/cube_parameters.jpg"))

## PART 2 - SCRIPT INITIATION -------------------------------------------------

## define additional function (might be included to 'eseis' package, later)
aux_cubeinfo <- function(file, gipptools) {
  
  ## get cube info output
  x <- system(command = paste0(gipptools, 
                               "bin/cubeinfo ", 
                               file), 
              intern = TRUE)
  
  ## remove comments and empty rows
  x <- x[grepl(x = x, 
               pattern = "# ---") == FALSE]
  x <- x[nchar(x) > 0]
  
  ## separate parameter names and values
  x_parameter <- trimws(substr(x = x, 
                               start = 3, 
                               stop = 26), 
                        which = "left")
  
  x_value <- trimws(substr(x = x, 
                           start = 30, 
                           stop = 100), 
                    which = "right")
  
  ## organise output as data frame
  x_tabular <- as.data.frame(rbind(x_value), 
                             stringsAsFactors = FALSE)
  names(x_tabular) <- x_parameter
  rownames(x_tabular) <- ""
  
  ## return output
  return(x_tabular)
}

## PART 3 - ANALYSIS OF SAC FILE TIME COVERAGE --------------------------------
if(pars$flag$SAC_PROCESS == TRUE) {
  
  ## get all files, with and without paths
  files_path <- list.files(path = pars$path$sac, 
                           full.names = TRUE, 
                           recursive = TRUE)
  
  files <- list.files(path = pars$path$sac, 
                      full.names = FALSE, 
                      recursive = TRUE)
  
  ## extract time stamp
  files_time <- substr(x = files, 
                       start = 15, 
                       stop = 30)
  
  ## account for effect of different station names
  files_time <- ifelse(test = substr(x = files_time,
                                     start = 1, 
                                     stop = 1) == ".",
                       yes = substr(x = files_time, 
                                    start = 2, 
                                    stop = 16),
                       no = substr(x = files_time, 
                                   start = 1, 
                                   stop = 15))
  
  ## convert time string to POSIX format
  files_start <- as.POSIXct(x = files_time, 
                            format = "%y.%j.%H.%M.%S", 
                            tz = "UTC")
  
  ## get station IDs
  files_station <- do.call(c, lapply(X = files, FUN = function(files) {
    
    x <- strsplit(x = files, split = "/")[[1]][3]
    x <- strsplit(x = x, split = ".", fixed = TRUE)[[1]][1]
    return(x)
  }))
  
  ## get components
  files_component <- do.call(c, lapply(X = files, FUN = function(files) {
    
    x <- strsplit(x = files[1], split = "/")[[1]][3]
    x <- strsplit(x = x, split = ".", fixed = TRUE)[[1]]
    return(x[length(x) - 1])
  }))
  
  ## compile results
  file_data <- data.frame(time = files_start, 
                          station = files_station, 
                          component = files_component, 
                          stringsAsFactors = FALSE)
  
  ## separate by station and order meaningfully
  stations_unique <- unique(file_data$station)
  stations_unique <- stations_unique[c(2:7, 1, 8:12)]
  file_by_stat <- lapply(X = stations_unique, FUN = function(sta, file_data) {
    
    file_data[file_data$station == sta,]
  }, file_data)
  
  ## get total time span of data coverage
  t_range <- range(files_start)
  
  ## define periods of interest for further analysis
  t_interest <- data.frame(
    start = as.POSIXct(c(file_by_stat[[1]]$time[730],
                         file_by_stat[[12]]$time[1]), tz = "UTC"),
    stop = as.POSIXct(c(file_by_stat[[1]]$time[6090],
                        file_by_stat[[8]]$time[6980]), tz = "UTC"),
    SA_1 = c(TRUE, FALSE),
    SA_2 = c(TRUE, FALSE),
    SA_3 = c(TRUE, FALSE),
    SA_4 = c(TRUE, FALSE),
    SA_5 = c(TRUE, FALSE),
    SA_6 = c(TRUE, FALSE),
    HVGL1 = c(TRUE, FALSE),
    HVGL2 = c(TRUE, TRUE),
    HVGL3 = c(TRUE, TRUE),
    HVGL4 = c(TRUE, TRUE),
    SA_21 = c(FALSE, TRUE),
    SA_22 = c(FALSE, TRUE))
  
  t_interest_network <- 
    data.frame(
      start = as.POSIXct(c(file_by_stat[[1]]$time[730],
                           file_by_stat[[1]]$time[6090]),
                         tz = "UTC"),
      stop = as.POSIXct(c(file_by_stat[[1]]$time[6090],
                          file_by_stat[[8]]$time[6980]), 
                        tz = "UTC"),
      SA_1 = c(TRUE, FALSE),
      SA_2 = c(TRUE, FALSE),
      SA_3 = c(TRUE, FALSE),
      SA_4 = c(TRUE, FALSE),
      SA_5 = c(TRUE, FALSE),
      SA_6 = c(TRUE, FALSE),
      HVGL1 = c(TRUE, FALSE),
      HVGL2 = c(TRUE, TRUE),
      HVGL3 = c(TRUE, TRUE),
      HVGL4 = c(TRUE, TRUE),
      SA_21 = c(FALSE, TRUE),
      SA_22 = c(FALSE, TRUE))
  
} else {
  
  load(file = pars$path$save_sac)
}

## optionally save results
if(pars$flag$SAC_PROCESS == TRUE) {
  
  save(t_interest, t_interest_network, file = pars$path$save_sac)
}


## PART 4 - ANALYSIS OF CUBE FILE PARAMETERS ----------------------------------
if(pars$flag$CUBE_PROCESS == TRUE) {
  
  ## list all cube files in data directory with full paths
  files_cube_path <- list.files(path = pars$path$cube, 
                           recursive = TRUE, 
                           full.names = TRUE)
  
  ## list all cube files in data directory without full paths
  files_cube_name <- list.files(path = pars$path$cube, 
                                recursive = TRUE)
  
  ## check for files to exclude
  i_out <- grepl(x = files_cube_path, pattern = "config") | 
    grepl(x = files_cube_path, pattern = "CONFIG")
  
  ## remove unsuitable files
  files_cube_path <- files_cube_path[!i_out]
  files_cube_name <- files_cube_name[!i_out]
  
  ## get cube info data from all files
  data_cube <- lapply(X = files_cube_path, 
                      FUN = aux_cubeinfo, 
                      gipptools = pars$path$gipptools)
  
  ## remove unsuitable output (e.g., processed config files)
  i_OK <- do.call(c, lapply(data_cube, FUN = ncol)) == 20
  data_cube_ok <- data_cube[i_OK] 
  
  ## convert list to data frame
  data_cube_ok <- do.call(rbind, data_cube_ok)
  
  ## append file names and paths
  data_cube <- cbind(files_cube_name[i_OK],
                     files_cube_path[i_OK],
                     data_cube_ok)
} else {
  
  load(file = pars$path$save_cube)
}

## optionally save results
if(pars$flag$CUBE_SAVE == TRUE) {
  
  save(data_cube, file = pars$path$save_cube)
}

## group data by logger ID
cube_id <- substr(x = data_cube$`recording unit`, start = 3, stop = 5)
cube_id_unique <- unique(cube_id)
cube_data_by_cube <- 
  lapply(X = cube_id_unique, FUN = function(id, data_cube, cube_id) {
    
    data_cube[cube_id == id,]
  }, data_cube, cube_id)
names(cube_data_by_cube) <- cube_id_unique

## PART 5 - PLOTS -------------------------------------------------------------
if(pars$flag$SAC_PLOT == TRUE) {

  ## set up plot area
  par(mfcol = c(6, 2))
  
  ## plot data coverage by station with all available data
  for(i in 1:length(stations_unique)) {
    
    plot(file_by_stat[[i]]$time, 
         y = rep(1, length(file_by_stat[[i]]$time)), 
         pch = 15,
         xlim = t_range,
         main = stations_unique[i])
  }
  
  ## plot two periods of interest separately
  
  ## set up plot device and area
  jpeg(filename = pars$path$jpeg_sac, 
       width = 2000, 
       height = 1200,
       res = 100)
  
  par(mfcol = c(5, 3))
  
  ## plot data coverage by station for the two periods of interest
  station_ok <- names(t_interest)[t_interest[1,] == TRUE]
  
  file_by_stat_ok <- file_by_stat[t_interest[1,3:14] == TRUE]
  
  for(j in 1:10) {
    
    try(plot(file_by_stat_ok[[j]]$time, 
             y = rep(1, length(file_by_stat_ok[[j]]$time)), 
             pch = 15,
             xlim = c(t_interest$start[1], t_interest$stop[1]),
             main = stations_unique[j],
             col = 4))
  }
  
  file_by_stat_ok <- file_by_stat
  file_by_stat_ok <- file_by_stat_ok[t_interest[2,3:14] == TRUE]
  
  for(j in 1:5) {
    
    try(plot(file_by_stat_ok[[j]]$time, 
             y = rep(1, length(file_by_stat_ok[[j]]$time)), 
             pch = 15,
             xlim = c(t_interest$start[2], t_interest$stop[2]),
             main = stations_unique[t_interest[2,3:14] == TRUE][j], 
             col = 3))
  }
  
  dev.off()
}


if(pars$flag$CUBE_PLOT == TRUE) {
  
  ## set up plot device and area
  jpeg(filename = pars$path$jpeg_cube, 
       width = 2000, 
       height = 1200,
       res = 100)
  par(mfcol = c(1, 3))

  ## plot 
  boxplot(lapply(cube_data_by_cube, 
                 FUN = function(x) {
                   as.numeric(x$`amplification factor`)
                   }),
          main = "Gain")
  
  boxplot(lapply(cube_data_by_cube, 
                 FUN = function(x) {
                   as.numeric(substr(x$`sample rate`, 1, 3))
                   }),
          main = "Sample rate")
  
  boxplot(lapply(cube_data_by_cube, 
                 FUN = function(x) {
                   as.numeric(nchar(x$`recorded channels`) / 3 + 1/3)
                   }),
          main = "Number of channels")
  
  ## close plot device
  dev.off()
}