Created
May 1, 2020 14:45
-
-
Save kdaily/69234c977fccefb2e59f03368e426a0d to your computer and use it in GitHub Desktop.
Semi-annual report for AMP-AD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "AMP-AD Semi-Annual Report" | |
author: "Kenneth Daily" | |
date: "`r format(Sys.time(), '%B %d, %Y')`" | |
output: html_document | |
--- | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = FALSE, | |
message = FALSE, | |
warning = FALSE) | |
library(tidyverse) | |
library(lubridate) | |
library(synapser) | |
library(synapseusagereports) | |
keep_cols <- c('id','DATE','TIMESTAMP', | |
'NODE_TYPE','NAME','recordType','date','userId', | |
'dateGrouping','monthYear') | |
synLogin() | |
``` | |
```{r} | |
# First, download from S3 or other location the files needed to this directory | |
data_dir <- "/tmp/usagestats" | |
download_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-download.csv") | |
filedownloadrecord_file <- glue::glue("{data_dir}/another-syn2580853-20190601-20191201-filedownloadrecord.csv") | |
# download_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-download.csv") | |
# filedownloadrecord_file <- glue::glue("{data_dir}/syn2580853_20181201-20190601-filedownloadrecord.csv") | |
``` | |
```{r readdata} | |
download_data <- read_csv(download_file) %>% | |
select(one_of(keep_cols)) | |
fdr_data <- read_csv(filedownloadrecord_file) %>% | |
select(one_of(keep_cols)) %>% | |
mutate(recordType = 'download') | |
# Combine download and fdr data | |
query_data <- rbind(download_data, fdr_data) %>% | |
distinct() %>% | |
mutate(id = paste0("syn", id), | |
userId = as.character(userId), | |
quarter = lubridate::quarter(DATE), | |
year = lubridate::year(DATE), | |
quarteryear = glue::glue("{year} Q{quarter}")) | |
team_order <- c(3320424, 273957) | |
user_list <- processTeamMemberList(team_order) | |
all_users <- getQueryUserProfiles(query_data, | |
useTeamGrouping = TRUE, | |
userList = user_list) | |
query_data <- query_data %>% | |
left_join(., all_users) | |
query_data_single <- query_data # %>% | |
# group_by(id, userId) %>% | |
# slice(1) %>% | |
# ungroup() %>% | |
# All data files in this file view are the ones we're interested in | |
file_view_id <- "syn11346063" | |
file_view_res <- synTableQuery(glue::glue("SELECT id,study from {file_view_id}")) | |
file_view_df <- file_view_res$asDataFrame() %>% | |
tibble::as_tibble() %>% | |
select(id, study) | |
query_data_annotated <- dplyr::left_join(query_data_single, | |
file_view_df, by = "id") | |
# Test here for number of entities without a study annotated | |
# query_data_annotated %>% filter(is.na(study)) %>% summarize(n=n_distinct(id)) | |
query_data_annotated_filtered <- query_data_annotated %>% | |
filter(!is.na(study)) | |
``` | |
```{r} | |
res <- query_data_annotated %>% | |
count(teamName) %>% | |
pivot_wider(names_from = "teamName", values_from = "n") | |
``` | |
```{r} | |
prop_ampad <- res$`AMP-AD Consortium` / (res$`AMP-AD Consortium` + res$`Registered Synapse User`) | |
``` | |
## Summary | |
```{r} | |
query_data_annotated %>% | |
summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id), | |
min_date = min(DATE), max_date = max(DATE)) | |
``` | |
```{r} | |
query_data_annotated_filtered %>% | |
summarize(nrecords = n(), nusers = n_distinct(userId), nfiles = n_distinct(id), | |
min_date = min(DATE), max_date = max(DATE)) | |
``` | |
Between `r min(query_data_annotated$dateGrouping)` and `r max(query_data_annotated$dateGrouping)` there are: | |
- `r nrow(query_data_annotated)` downloads. | |
- `r round(prop_ampad, digits = 2) * 100`% are from the AMP-AD Consortium. | |
## Downloads by study | |
```{r} | |
bystudy <- query_data_annotated %>% | |
group_by(study) %>% | |
summarize(`Users` = n_distinct(userId), | |
Downloads = n(), | |
Files = n_distinct(id)) %>% | |
ungroup() %>% | |
mutate(`Downloads Per File`=Downloads / Files) %>% | |
select(Study = study, Files, Users, Downloads, `Downloads Per File`) %>% | |
DT::datatable(options = list(pageLength = 75, | |
lengthChange = FALSE)) | |
bystudy | |
``` | |
```{r} | |
query_data_annotated %>% | |
group_by(dateGrouping) %>% | |
summarize(Downloads = n(), Users=n_distinct(userId)) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment