### --- PDC Explorer Visualization ---
### Developed by Patrick Bosworth and Greg Vey
### Initial development: 2023 January through 2023 March
### Release candidate development 2023 May
#### Changes: Production API released. New URL, rate limits (500/day), admin API changed URL. 
#### Not yet implemented:Add caching functions, logos, and funding statement: "made possible by funding from Compute Ontario"

## Install and define libraries needed for the visualization 
## Uncomment the install lines if your development environment does not have the libraries installed.
#install.packages("shiny")
#install.packages("shinyWidgets")
#install.packages("leaflet")
#install.packages("leaflet.extras2")
#install.packages("stringr")
#install.packages("httr")
#install.packages("jsonlite")
#install.packages("tidyverse")
#install.packages("shiny.i18n")
#install.packages("rsconnect")
library(shiny)
library(shinyWidgets)
library(leaflet)
library(leaflet.extras2)
library(stringr)
library(httr)
library(jsonlite)
library(tidyverse)
library(shiny.i18n)
library(rsconnect)

## The following lines define files that must be included when the app is packaged for deployment:
## pdc.png; metadata.csv; translationPDC.json
# Define the icon that appears within rectangles on the map
pdcIcon <- makeIcon(iconUrl = "pdc.png")
computeOntarioIcon <- makeIcon(iconUrl = "COlogo.png")
# Flat file backup used in case the API is unresponsive
backupMetadata <- read.csv("metadata.csv") # CSV file is updated as of 2023-05-10
#metadata <- backupMetadata  # Temporary workaround for API/caching. 
# Language translation setup
i18n <- Translator$new(translation_json_path = "translationPDC.json")
i18n$set_translation_language("en")
english <- TRUE

## Due to new API rate limits, program will only use the backed-up metadata.csv file
## Once caching functions are complete, program will only request minimally from the API
## --- Get Programs List from API ---
# Retrieve from API the list of all PDC programs by composing a GET request 
progURL <- "https://polardata.ca/api/admin/programs/list"  # URL is new, admin access otherwise the same
response <-
  GET(progURL, add_headers(
    .headers = c("PDC-ADMIN-USER" = "pdcAdmin", "X-API-KEY" = "raldbthar")
  ))
json_text <- content(response, "text")
programList <- fromJSON(json_text, flatten = TRUE)

## --- Get Complete Metadata from API ---
# Iterate through the list of PDC programs, compose an API GET request, and store the metadata into a dataframe 
metadataURL <- "https://polardata.ca/api/metadata/program/"  # URL is new 10-May 2023
metadata <- data.frame()
programMenu <- list()
authorList <- list()
index <- 1
for (p in programList$"programs") {
  skip_to_next <- FALSE
  page <- 0
  status <- 200
  listed <- FALSE
  tryCatch({
    while (status == 200) {
      apiCall <-
        paste0(
          metadataURL,
          str_replace_all(str_replace_all(p, "/", ""), " ", "%20"),
          "?page=",
          page
        )
      response <- GET(apiCall)
      status = status_code(response)
      if (status == 200) {
        json_text <- content(response, "text", encoding = "UTF-8")
        json_data <- fromJSON(json_text, flatten = TRUE)
        dat <-
          data.frame(
            json_data$"itemListElement",
            .id = str_replace_all(p, "Qubec-Ocan", "Québec-Océan")
          )
        metadata <- bind_rows(metadata, dat)
        if (!listed) {
          programMenu[[index]] <- str_replace_all(p, "Qubec-Ocan", "Québec-Océan")
          index <- index + 1
          listed <- TRUE
        }
        page <- page + 1
      }
    }
  }, error = function(e) {
    skip_to_next <<- TRUE
  })
  if (skip_to_next) {
    next
  }
}
# Check if API was unresponsive, and use flat file backup if the API server goes down
if (length(metadata) < 10) {
  metadata <- backupMetadata 
}
metadata$item.keywords <- vapply(metadata$item.keywords, paste, collapse = ", ", character(1L))
##write.csv(metadata, "metadata.csv") # to make a flat-file backup of the metadata, pre-modification


## --- Data Adjustment Section --- 
# Split temporal coverage into separate start and end dates, with standard formatting
metadata <- separate(data = metadata, col = item.temporalCoverage, into = c("coverageStart", "coverageEnd"), sep = "\\/", remove = FALSE)
metadata[metadata == "Not Defined"] <- NA
metadata$coverageStart <- as.Date(metadata$coverageStart, format = "%Y-%b-%d")
metadata$coverageEnd <- as.Date(metadata$coverageEnd, format = "%Y-%b-%d")

# Also fix dates in other columns
metadata$item.datePublished <- as.Date(metadata$item.datePublished)
metadata$item.dateModified <- as.Date(metadata$item.dateModified)

# Pull out the CCIN identifier from item.url - the non-ID part is fixed width of 57 chars
# Note: CCIN IDs are duplicated over 1000 times. Generally, the Project column is different
metadata$ccinID <- str_sub(metadata$item.url, 57,)

# Pull out map lat/long coordinates into their own columns, making map processing faster
metadata <- separate(data = metadata, col = item.spatialCoverage.geo.box, into = c("lat1", "lng1", "lat2", "lng2"), sep = " ", remove = FALSE, convert = TRUE)

# Convert item.keywords into columns by looping through each item.keyword in the dataframe
# The loop will attempt to skip badly formatted entries (see ccID 11277) where improper UTF-8 formatting mangles the item.keyword list
# Afterward, the columns are renamed for simplicity
kwGroups <- c("Geographic locations", "Health sciences and contaminants", "Inuktitut keywords", "Methods", "Natural sciences", "Northern communities", "Social sciences, economics and policy", "Transdisciplinary")
metadata[kwGroups] <- "None"
for (i in 1:length(metadata$item.keywords)) {
  for (j in 1:length(metadata$item.keywords[[i]])) {
    group <- str_split_i(metadata$item.keywords[[i]][[j]], " -> ", 1)
    keyw <- str_split_i(metadata$item.keywords[[i]][[j]], " -> ", 2)
    if(!is.null(metadata[[group]][[i]]))
      if(is.na(metadata[[group]][[i]])) {
        metadata[[group]][[i]] <- keyw
      } else {
        metadata[[group]][[i]] <- paste(metadata[[group]][[i]], ", ", keyw, sep = "")
      }
    else {
      next
    }
  }
}

# Misc Metadata cleaning and helper columns
metadata <- metadata %>% rename("kwGeogLoc" = "Geographic locations",
                                "kwHealthContam" = "Health sciences and contaminants",
                                "kwInuktitut" = "Inuktitut keywords", 
                                "kwMethods" = "Methods", 
                                "kwNatSci" = "Natural sciences", 
                                "kwNorthComm" = "Northern communities", 
                                "kwSocEconPol" = "Social sciences, economics and policy", 
                                "kwTransdisc" = "Transdisciplinary")
metadata$Link <- paste0('<a href="', metadata$item.url, '" target="_blank">', metadata$item.url, '</a>')
metadata$ccinID <- as.integer(metadata$ccinID) # this ensures good sorting in the table view
# Other helper variables
authorList <- as.list(sort(unique(metadata$item.creator.name)))
# Create list of all tags, with associated tag category, for use in the tags inputPicker
tagsListGeogLoc <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwGeogLoc), ", ")))))
colnames(tagsListGeogLoc)[1] <- "Tags"
tagsListGeogLoc$category <- "Geographic"
tagsListHealthContam <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwHealthContam), ", ")))))
colnames(tagsListHealthContam)[1] <- "Tags"
tagsListHealthContam$category <- "Health/Contaminants"
tagsListInuktitut <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwInuktitut), ", ")))))
colnames(tagsListInuktitut)[1] <- "Tags"
tagsListInuktitut$category <- "Inuktitut"
tagsListMethods <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwMethods), ", ")))))
colnames(tagsListMethods)[1] <- "Tags"
tagsListMethods$category <- "Methods"
tagsListNatSci <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwNatSci), ", ")))))
colnames(tagsListNatSci)[1] <- "Tags"
tagsListNatSci$category <- "Natural sciences"
tagsListNorthComm <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwNorthComm), ", ")))))
colnames(tagsListNorthComm)[1] <- "Tags"
tagsListNorthComm$category <- "Northern communities"
tagsListSocEconPol <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwSocEconPol), ", ")))))
colnames(tagsListSocEconPol)[1] <- "Tags"
tagsListSocEconPol$category <- "Social sciences, economics and policy"
tagsListTransdisc <- as.data.frame(sort(unique(unlist(strsplit(as.character(metadata$kwTransdisc), ", ")))))
colnames(tagsListTransdisc)[1] <- "Tags"
tagsListTransdisc$category <- "Transdisciplinary"
tagsList <- bind_rows(tagsListGeogLoc, tagsListHealthContam, tagsListInuktitut, tagsListMethods, tagsListNatSci, tagsListNorthComm, tagsListSocEconPol, tagsListTransdisc)
tagsList <- tagsList[tagsList$Tags != "None", ]
tagsListCategories <- as.list(tagsList$category)
tagsList <- as.list(tagsList$Tags)
rm(tagsListGeogLoc, tagsListHealthContam, tagsListInuktitut, tagsListMethods, tagsListNatSci, tagsListNorthComm, tagsListSocEconPol, tagsListTransdisc)

programList <- as.list(sort(unique(metadata$.id)))
# 2023-May-29 - rename the .id column
colnames(metadata)[which(names(metadata) == ".id")] <- "Project"
tabularCols <- c("ccinID", "Project", "item.name", "item.datePublished", "item.dateModified", "coverageStart", "coverageEnd", "item.creator.name", "item.creator.contactPoint.email", "Link")


## --- Shiny UI Section ---
# --- Setup UI to control map, filters---
ui <- fluidPage(
  # Translation uses browser-side approach with the below and update_lang function to refresh the whole display
  shiny.i18n::usei18n(i18n),
  # Sidebar layout with input and output definitions ----
  sidebarLayout(
    sidebarPanel(
      titlePanel(i18n$t("PDC Explorer"), windowTitle = "PDC Explorer"),
      # Translation switcher button and associated live language refresh code
      actionButton("refresh", "Française", icon("refresh")),
      helpText(i18n$t("Adjust filters to view projects by study site. Click on a bounding box for more details. Send boxes to the back by clicking.")),
      # Programs filter
      pickerInput(
        inputId = "program",
        label = i18n$t("Research Programs"),
        choices = programList, # 
        multiple = TRUE,
        selected = "Amundsen Science-Amundsen Science",
        options = pickerOptions(
          actionsBox = TRUE, 
          liveSearch = TRUE,
          deselectAllText = i18n$t("Deselect All"), selectAllText = i18n$t("Select All"),
          noneSelectedText = "Nothing selected" # i18n doesnt work here - displays html instead of actual translation
        )
      ),
      # Date range filter
      sliderInput(
        inputId = "daterange2", 
        label = i18n$t("Publishing Date Range:"),
        min = as.Date("2000-01-01"),  
        max = Sys.Date(),
        timeFormat = "%Y",
        value = c(as.Date("2005-01-01"), Sys.Date())
      ),
      # Author name filter
      pickerInput(
        inputId = "author",
        label = i18n$t("Author Name"),
        choices = authorList,
        multiple = TRUE,
        selected = authorList,
        options = pickerOptions(
          actionsBox = TRUE, 
          liveSearch = TRUE, 
          deselectAllText = i18n$t("Deselect All"), selectAllText = i18n$t("Select All"),
          noneSelectedText = "Nothing selected", # i18n doesn't work here - displays html instead of actual translation
          noneResultsText = i18n$t("No results matched ")
        )
      ),
      # Tags filter
      pickerInput(
        inputId = "tags",
        label = i18n$t("Tags Search"),
        choices = tagsList,
        choicesOpt = list(subtext = tagsListCategories),
        multiple = TRUE,
        selected = tagsList,
        options = pickerOptions(
          actionsBox = TRUE, 
          liveSearch = TRUE, 
          deselectAllText = i18n$t("Deselect All"), selectAllText = i18n$t("Select All"),
          noneSelectedText = "Nothing selected" # i18n doesnt work here - displays html instead of actual translation
        )
      ),
      # Misc displays in the sidebar
      textOutput("displayCount"),
      actionButton("resetButton", i18n$t("Reset Filters")),
      downloadButton("downloadData", i18n$t("Download as CSV")),
      helpText(i18n$t("Made possible by funding from Compute Ontario")),
      img(src="https://images.squarespace-cdn.com/content/v1/5fdabf184103dc3d39174616/a64032f7-f7d9-4871-8495-631a53d4bb06/ComputeOntario_LOGO_300dpi_RGB.png?format=1500w",width=43,style="display: block; margin-left: auto; margin-right: auto; margin-top: 10px;"),
      width = 3 # out of 12 units ~ 1/4 of the window
      # sidebarPanel ends here
    ),
    # Define map area to the right/underneath of the sidebar
    mainPanel(  # map and table go here
      tags$style(type = "text/css", "#map {height: calc(100vh - 100px) !important;}"),
      width = 9, # out of 12 units ~ 3/4 of the window
      # Output: Tabset:a map and a tabular view
      tabsetPanel(type = "tabs",
                  tabPanel(i18n$t("Map"), leafletOutput("map")),
                  tabPanel(i18n$t("Table"), dataTableOutput("table"))
      )
    )
  ), # Layout ends here
  title = i18n$t("PDC Data Explorer")
)

#----------------------------------------------------------------

# --- Setup Server ---
server <- function(input, output, session) {
  
  # Reactive expression for the data subsetted to what the user selected
  # If the user filters out all responses, return a blank frame
  filteredMeta <- reactive({
    tempMeta <- metadata
    if (is.null(input$program)) {
      tempMeta <- tempMeta[tempMeta$Project %in% "Amundsen Science-Amundsen Science",] # name of default program changed 2023-May
    } else {
      tempMeta <- tempMeta[tempMeta$Project %in% input$program &
                             tempMeta$item.creator.name %in% input$author & 
                             tempMeta$item.datePublished >= input$daterange2[1] & 
                             tempMeta$item.datePublished <= input$daterange2[2] &
                             grepl(paste(input$tags, collapse='|'), tempMeta$item.keywords), ]
    }
    if (nrow(tempMeta) == 0) { 
      tempMeta[1, ] <- NA # this is a blank frame which keeps the map from erroring out
    }
    return(tempMeta)
  })
  # Define the Leaflet map content, including base tiles, rectangles, labels, and PDC icon marker
  output$map <- renderLeaflet({
    # Display rectangles and generate popups/markers if search/input params match the user-selected
    leaflet(width = "100%", height = "100%") %>%
      # Map setup requirements
      setView(lng = -100, lat = 72, zoom = 3) %>%
      addMiniMap() %>%
      addProviderTiles(providers$OpenStreetMap, group = "OSM") %>%
      addProviderTiles(providers$Stamen.TonerLite, group = "Toner Lite") %>%
      addProviderTiles(providers$CartoDB.DarkMatter, group = "Dark Matter") %>%
      addLayersControl(baseGroups = c("Colour", "Toner Lite", "Dark Matter")) %>%
      addEasyprint(options = easyprintOptions(
        title = 'Download Map',
        position = 'bottomleft',
        exportOnly = TRUE
      )) %>%
      addRectangles(
        data = filteredMeta(),
        lng1 = ~ lng1,
        lat1 = ~ lat1,
        lng2 = ~ lng2,
        lat2 = ~ lat2,
        noClip = TRUE,
        weight = 2,
        fillColor = "transparent",
        smoothFactor = 0,
        highlightOptions = highlightOptions(
          color = "green",
          weight = 4,
          fillColor = "green",
          fillOpacity = 0.2,
          sendToBack = TRUE
        ), 
        label = ~ substr(item.name, 1, 80), # TODO: this label does not wrap, so long names will roll off the screen
        popup = ~ paste0("<b>", item.name, ": </b>",
                         "<br>", i18n$t("Author: "), item.creator.name,
                         "<br>", i18n$t("Published: "), item.datePublished,
                         "<br>", i18n$t("Time Coverage: "), coverageStart, " to ", coverageEnd,
                         "<br><br>", item.description,
                         "<br><br><a href='", item.url, "' target='_blank'>", i18n$t("Jump to Metadata Record</a>")
        ),
      ) %>%
      addMarkers(
        data = filteredMeta(),
        lng =  ~ lng1,
        lat =  ~ lat1,
        icon = pdcIcon,
        #label = ~ str_wrap(item.name, width = 10),
        #labelOptions = labelOptions(noHide = FALSE),
      )
  })
  # Define the table
  output$table <- renderDataTable(filteredMeta()[,tabularCols],
                                  escape = FALSE,
                                  options = list(
                                    paging = FALSE, 
                                    filter = 'top'
                                  ))
  
  # Display a count of the number of results after filtering
  output$displayCount <- renderText({
    if(is.na(filteredMeta()[1,1])){ # this is a special case for when the filters exclude all results
      paste("Results: 0")
    } else {
      paste("Results:", nrow(filteredMeta())) 
    }
  })
  # Reset the filters to default when the reset button is pressed
  observeEvent(input$resetButton, {
    updateTextInput(session, "program", value = "Amundsen Science-Amundsen Science") # name changed 2023-May
    updateTextInput(session, "daterange2", value = c(as.Date("2000-01-01"), Sys.Date()))
    updateTextInput(session, "author", value = authorList)
    updateTextInput(session, "tags", value = tagsList)
  })
  # Downloadable csv of selected dataset
  output$downloadData <- downloadHandler(
    filename = function() {
      paste("metadata", Sys.Date(), ".csv", sep = "")
    },
    content = function(file) {
      # To output metadata to a CSV the list of keywords must be flattened
      metadata2 <- filteredMeta()
      # Update some fields here
      exportCols <- c("ccinID", "Project","item.name", "item.datePublished", "item.dateModified", "item.description", "item.url", "item.keywords", "item.temporalCoverage", 
                      "coverageStart", "coverageEnd", "item.license", "item.creator..type", "item.creator.name", "item.creator.url", "item.creator.contactPoint..type", 
                      "item.creator.contactPoint.contactType", "item.creator.contactPoint.telephone", "item.creator.contactPoint.email", "item.includedInDataCatalog..type", 
                      "item.includedInDataCatalog.name", "item.spatialCoverage..type", "item.spatialCoverage.geo..type", "item.spatialCoverage.geo.box", 
                      "lat1", "lng1", "lat2", "lng2", "kwGeogLoc", "kwHealthContam", "kwInuktitut", "kwMethods", "kwNatSci", "kwNorthComm", "kwSocEconPol", "kwTransdisc", "Link")
      metadata2 <- metadata2[,exportCols] 
      metadata2$item.keywords <- vapply(metadata2$item.keywords, paste, collapse = ", ", character(1L)) # flatten list to text
      write.csv(metadata2, file, row.names = FALSE)
    }
  )
  # Language switch observer
  observeEvent(input$refresh, {
    if(english == TRUE) {
      shiny.i18n::update_lang("fr")
      updateActionButton(session, "refresh", label = "English")
      english <<- FALSE
    }
    else {
      shiny.i18n::update_lang("en")
      updateActionButton(session, "refresh", label = "Française")
      english <<- TRUE
    }
  })
}

# --- Run Visualization ---
shinyApp(ui = ui, server = server)