Cropping data recorded before deployment and after retrieval

Editable script

Here is a script you can use if you would prefer to have all the cropping code in a single script rather than an installable package format. Just click the clipboard icon in the top right corner of this code chunk to copy the code, then open a new R script in RStudio and paste it there, and save the script wherever you would like to store it on your computer.

# load necessary libraries
library(readxl)
# all the following six libraries can be loaded either in a single line using
# library(tidyverse)
# or separately as below
library(lubridate) # for handling dates
library(stringr)
library(readr)
library(dplyr)
library(tidyr)
library(ggplot2)

base_loc <- "data/2022_summer/"

# directory containing your raw data
rawdata_loc <- paste0(base_loc, "1_raw_csv/")

# directory where your cropped data will be (or currently is) stored
cropped_loc <- paste0(base_loc, "2_cropped_csv/")

# directory where plots of cropped and raw data will be stored
croppedplots_loc <- paste0(base_loc, "2_cropped_plots/")

# name of your LDRTimes file that has the lookup table with deployment/retrieval dates
ldrtimes_fn <- "LDRTimes_summer22.xlsx"

# check that R can find your raw data files
# Get all temperature data filenames
# note: * is called a glob, short for global
# IMPORTANT: filenames must be in the format sitename_medium_deployseason_deployyear.csv
#            e.g. NolanLower_air_sum_23.csv
csv_files = list.files(path = rawdata_loc, pattern = '*csv')
# this is now a list of all filenames; we haven't read in the data yet, but
# make sure this lists all the raw files you want to crop
csv_files

# read in LDR file and take a look at it
# note: this assumes the LDR file is in the folder indicated by rawdata_loc
ldrtimes = readxl::read_xlsx(paste0(rawdata_loc, ldrtimes_fn))

# once you're sure that file paths are working and your ldrtimes looks right,
# crop the files!

i = 0
for(this.file in csv_files){
  i = i + 1
  #this.file = csv_files[1] # uncomment to troubleshoot within loop
  cat(paste0("Reading file ", i, " of ", length(csv_files), ": ", this.file), fill = TRUE)

  # extract metadata from the filename
  filename.parts = stringr::str_split_1(this.file, '[_.]')
  csv.site = filename.parts[1]
  csv.media = filename.parts[2]
  csv.season = filename.parts[3]
  csv.year = filename.parts[4]

  # convert the character-format datetime to an R POSIXct object
  # ymd_hm is the format the character string is in initially; it tells R
  # how to read and interpret the character string
  # sometimes R reads in the datetime format as mdy_hms and sometimes mdy_hm.
  # This tryCatch handles either hh:mm:ss or hh:mm format in csv files
  this.data =  tryCatch(
    {
      readr::read_csv(paste0(rawdata_loc, this.file),
                      skip = 2, # skip the first two lines of the file
                      col_select = 1:3, # read only the first three columns of data
                      col_names = FALSE, # don't try to name columns from a row of the file
                      show_col_types = FALSE) %>% # suppresses print message
        dplyr::rename("row.num" = X1,
                      "datetime" = X2,
                      "temperature" = X3) %>%
        dplyr::mutate(datetime = lubridate::mdy_hms(datetime)) #for datetime in hh:mm:ss
    },
    warning = function(cond) { #if datetime isn't in hh:mm:ss, will now try hh:mm format
      readr::read_csv(paste0(rawdata_loc, this.file),
                      skip = 2, # skip the first two lines of the file
                      col_select = 1:3, # read only the first three columns of data
                      col_names = FALSE, # don't try to name columns from a row of the file
                      show_col_types = FALSE) %>% # suppresses print message
        dplyr::rename("row.num" = X1,
                      "datetime" = X2,
                      "temperature" = X3) %>%
        dplyr::mutate(datetime = lubridate::mdy_hm(datetime)) #for datetime in hh:mm
    }
  )

  # crop the data
  deploy.retrieval = ldrtimes %>%
    # select the row(s) of ldrtimes that match this datafile
    # should be exactly one row, but if there are no rows or multiple rows that
    # match, this step will pull that many rows
    dplyr::filter(site == csv.site, deploy_season == csv.season,
                  deploy_year == csv.year, media == csv.media) %>%
    # keep just the deploy_time and retrieval_time variables/columns
    dplyr::select(deploy_time, retrieval_time)

  if(nrow(deploy.retrieval) == 0){
    stop("no rows of ldrtimes matched this csv file.")
  }
  if(nrow(deploy.retrieval) > 1){
    stop("multiple rows of ldrtimes matched this csv file.")
  }

  deploy = deploy.retrieval$deploy_time
  retrieval = deploy.retrieval$retrieval_time

  if(retrieval > deploy) {
    cropped.data = dplyr::filter(this.data,
                                 datetime > deploy,
                                 datetime < retrieval)

  } # if(retrieval > deploy)

  # write cropped csv files to cropped folder
  readr::write_csv(cropped.data,
                   file=paste0(cropped_loc,
                               stringr::str_split_i(this.file, "[.]", 1), "_cropped.csv"))

  #Create a dataframe of the raw and cropped data
  cropvraw <- dplyr::left_join(this.data, cropped.data, by=c("row.num", "datetime")) %>%
    dplyr::rename(raw.temp = temperature.x,
                  cropped.temp = temperature.y) %>%#rename temperature from each file
    #create new column of data type (raw or cropped for plotting in ggplot)
    tidyr::pivot_longer(cols = raw.temp:cropped.temp,
                        names_to="type", values_to="temp")

  cropvraw.plot <- ggplot2::ggplot(cropvraw,
                                   ggplot2::aes(x = datetime,
                                                y = temp,
                                                color = type)) +
    ggplot2::geom_line(na.rm=TRUE) +
    ggplot2::geom_point(na.rm=TRUE) +
    ggplot2::labs(title = paste0(" Raw versus Cropped data"),
                  x = "Date", y = "Temperature (C)")+
    ggplot2::theme(axis.text = ggplot2::element_text(colour = "black", size = (12)))

  ggplot2::ggsave(paste0(croppedplots_loc, csv.site, "_rawvscroppeddata.png"),
                  cropvraw.plot,
                  width = 11, height = 8.5, units = "in")

}; cat("Done.", fill = TRUE)

Demo using `dataQCtools::crop_raw_data()`

In development

library(dataQCtools)

Editable script

Demo using dataQCtools::crop_raw_data()

Demo using `dataQCtools::crop_raw_data()`