kdaily · May 1, 2020 14:45
diff --git a/semi-annual-usage-report.Rmd b/semi-annual-usage-report.Rmd
 ---
 title: "AMP-AD Semi-Annual Report"
 author: "Kenneth Daily"
 date: "`r format(Sys.time(), '%B %d, %Y')`"
 output: html_document
 ---

 ```{r setup, include=FALSE}
 knitr::opts_chunk$set(echo = FALSE, 
                      message = FALSE,
                      warning = FALSE)

 library(tidyverse)
 library(lubridate)
 library(synapser)
 library(synapseusagereports)

 keep_cols <- c('id','DATE','TIMESTAMP',
               'NODE_TYPE','NAME','recordType','date','userId',
               'dateGrouping','monthYear')

 synLogin()
 ```

 ```{r}
 # First, download from S3 or other location the files needed to this directory
 data_dir <- "/tmp/usagestats"

 download_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-download.csv")
 filedownloadrecord_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-filedownloadrecord.csv")

 # download_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-download.csv")
 # filedownloadrecord_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-filedownloadrecord.csv")
 ```

 ```{r readdata}
 download_data <- read_csv(download_file) %>% 
  select(one_of(keep_cols))
 fdr_data <- read_csv(filedownloadrecord_file) %>% 
  select(one_of(keep_cols)) %>% 
  mutate(recordType = 'download')

 # Combine download and fdr data
 query_data <- rbind(download_data, fdr_data) %>% 
  distinct() %>% 
  mutate(id = paste0("syn", id),
         userId = as.character(userId),
         quarter = lubridate::quarter(DATE),
         year = lubridate::year(DATE),
         quarteryear = glue::glue("{year} Q{quarter}"))


 team_order <- c(3320424, 273957)
 user_list <- processTeamMemberList(team_order)

 all_users <- getQueryUserProfiles(query_data, 
                                  useTeamGrouping = TRUE, 
                                  userList = user_list)

 query_data <- query_data %>%
  left_join(., all_users) 

 query_data_single <- query_data # %>% 
  # group_by(id, userId) %>% 
  # slice(1) %>% 
  # ungroup() %>% 

 # All data files in this file view are the ones we're interested in
 file_view_id <- "syn11346063"
 file_view_res <- synTableQuery(glue::glue("SELECT id,study from {file_view_id}"))

 file_view_df <- file_view_res$asDataFrame() %>%
  tibble::as_tibble() %>% 
  select(id, study)

 query_data_annotated <- dplyr::left_join(query_data_single, 
                                         file_view_df, by = "id")

 # Test here for number of entities without a study annotated
 # query_data_annotated %>% filter(is.na(study)) %>% summarize(n=n_distinct(id))

 query_data_annotated_filtered <- query_data_annotated %>% 
  filter(!is.na(study))

 ```

 ```{r}
 res <- query_data_annotated %>%
  count(teamName) %>% 
  pivot_wider(names_from = "teamName", values_from = "n")
 ```

 ```{r}
 prop_ampad <- res$`AMP-AD Consortium` / (res$`AMP-AD Consortium` + res$`Registered Synapse User`)
 ```
 ## Summary

 ```{r}
 query_data_annotated %>% 
  summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id),
            min_date = min(DATE), max_date = max(DATE))
 ```

 ```{r}
 query_data_annotated_filtered %>% 
  summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id),
            min_date = min(DATE), max_date = max(DATE))
 ```

 Between `r min(query_data_annotated$dateGrouping)` and `r max(query_data_annotated$dateGrouping)` there are:

 - `r nrow(query_data_annotated)` downloads.
 - `r round(prop_ampad, digits = 2) * 100`% are from the AMP-AD Consortium.

 ## Downloads by study

 ```{r}
 bystudy <- query_data_annotated %>% 
  group_by(study) %>% 
  summarize(`Users` = n_distinct(userId), 
            Downloads = n(), 
            Files = n_distinct(id)) %>% 
  ungroup() %>% 
  mutate(`Downloads Per File`=Downloads / Files) %>% 
  select(Study = study, Files, Users, Downloads, `Downloads Per File`) %>% 
  DT::datatable(options = list(pageLength = 75,
                               lengthChange = FALSE))

 bystudy
 ```

 ```{r}
 query_data_annotated %>% 
  group_by(dateGrouping) %>% 
  summarize(Downloads = n(), Users=n_distinct(userId))
 ```
	---
	title: "AMP-AD Semi-Annual Report"
	author: "Kenneth Daily"
	date: "`r format(Sys.time(), '%B %d, %Y')`"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(echo = FALSE,
	message = FALSE,
	warning = FALSE)

	library(tidyverse)
	library(lubridate)
	library(synapser)
	library(synapseusagereports)

	keep_cols <- c('id','DATE','TIMESTAMP',
	'NODE_TYPE','NAME','recordType','date','userId',
	'dateGrouping','monthYear')

	synLogin()
	```

	```{r}
	# First, download from S3 or other location the files needed to this directory
	data_dir <- "/tmp/usagestats"

	download_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-download.csv")
	filedownloadrecord_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-filedownloadrecord.csv")

	# download_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-download.csv")
	# filedownloadrecord_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-filedownloadrecord.csv")
	```

	```{r readdata}
	download_data <- read_csv(download_file) %>%
	select(one_of(keep_cols))
	fdr_data <- read_csv(filedownloadrecord_file) %>%
	select(one_of(keep_cols)) %>%
	mutate(recordType = 'download')

	# Combine download and fdr data
	query_data <- rbind(download_data, fdr_data) %>%
	distinct() %>%
	mutate(id = paste0("syn", id),
	userId = as.character(userId),
	quarter = lubridate::quarter(DATE),
	year = lubridate::year(DATE),
	quarteryear = glue::glue("{year} Q{quarter}"))


	team_order <- c(3320424, 273957)
	user_list <- processTeamMemberList(team_order)

	all_users <- getQueryUserProfiles(query_data,
	useTeamGrouping = TRUE,
	userList = user_list)

	query_data <- query_data %>%
	left_join(., all_users)

	query_data_single <- query_data # %>%
	# group_by(id, userId) %>%
	# slice(1) %>%
	# ungroup() %>%

	# All data files in this file view are the ones we're interested in
	file_view_id <- "syn11346063"
	file_view_res <- synTableQuery(glue::glue("SELECT id,study from {file_view_id}"))

	file_view_df <- file_view_res$asDataFrame() %>%
	tibble::as_tibble() %>%
	select(id, study)

	query_data_annotated <- dplyr::left_join(query_data_single,
	file_view_df, by = "id")

	# Test here for number of entities without a study annotated
	# query_data_annotated %>% filter(is.na(study)) %>% summarize(n=n_distinct(id))

	query_data_annotated_filtered <- query_data_annotated %>%
	filter(!is.na(study))

	```

	```{r}
	res <- query_data_annotated %>%
	count(teamName) %>%
	pivot_wider(names_from = "teamName", values_from = "n")
	```

	```{r}
	prop_ampad <- res$`AMP-AD Consortium` / (res$`AMP-AD Consortium` + res$`Registered Synapse User`)
	```
	## Summary

	```{r}
	query_data_annotated %>%
	summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id),
	min_date = min(DATE), max_date = max(DATE))
	```

	```{r}
	query_data_annotated_filtered %>%
	summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id),
	min_date = min(DATE), max_date = max(DATE))
	```

	Between `r min(query_data_annotated$dateGrouping)` and `r max(query_data_annotated$dateGrouping)` there are:

	- `r nrow(query_data_annotated)` downloads.
	- `r round(prop_ampad, digits = 2) * 100`% are from the AMP-AD Consortium.

	## Downloads by study

	```{r}
	bystudy <- query_data_annotated %>%
	group_by(study) %>%
	summarize(`Users` = n_distinct(userId),
	Downloads = n(),
	Files = n_distinct(id)) %>%
	ungroup() %>%
	mutate(`Downloads Per File`=Downloads / Files) %>%
	select(Study = study, Files, Users, Downloads, `Downloads Per File`) %>%
	DT::datatable(options = list(pageLength = 75,
	lengthChange = FALSE))

	bystudy
	```

	```{r}
	query_data_annotated %>%
	group_by(dateGrouping) %>%
	summarize(Downloads = n(), Users=n_distinct(userId))
	```