Last active
December 21, 2019 06:10
-
-
Save rrodrigueznt/0312c2bc3015f990ba93e3fea434e86c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
library(readxl) | |
library(openxlsx) # extremelly slow, but it guess much better column types for locally edited | |
library(dplyr, warn.conflicts = FALSE) | |
library(stringr) | |
library(lubridate, warn.conflicts = FALSE) | |
library(ggplot2) | |
# | |
setwd("C:/Users/Ricardo Rodríguez/Universidade de Santiago de Compostela/IGFAE-Metrics - Documents/DataReceivedFromUSC/") | |
# | |
setwd("/Users/rrodriguez/Universidade de Santiago de Compostela/igfae-metrics---documents/DataReceivedFromUSC/") | |
# | |
## The difference between income and expenses is simple: income is the money your business takes in and expenses are what it spends money on. Your net income is generally your revenue, or all the money coming into your business, minus all of your expenses. | |
## https://smallbusiness.chron.com/difference-between-income-expense-26088.html | |
# | |
readxl::excel_sheets("2019-03-05_Dotacions_IPs_IGFAE_aData.xlsx") | |
# | |
IGFAEincome_20190305 <- openxlsx::read.xlsx("2019-03-05_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE) | |
IGFAEincome_20190320 <- openxlsx::read.xlsx("2019-03-20_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE) | |
IGFAEincome_20190328 <- openxlsx::read.xlsx("2019-03-28_Dotacions_IPs_IGFAE_aData.xlsx", sheet = "IPs IGFAE", detectDates = TRUE) | |
# | |
readxl::excel_sheets("Staff_IGFAE_1999-2018_FINAL.xlsx") | |
# | |
IGFAEincomeHc <- openxlsx::read.xlsx("Staff_IGFAE_1999-2018_FINAL.xlsx", sheet = "RRHH1999-2018", detectDates = TRUE) | |
IGFAEexpensesHc <- openxlsx::read.xlsx("Staff_IGFAE_1999-2018_FINAL.xlsx", sheet = "Contratados", detectDates = TRUE, ) | |
names(IGFAEexpensesHc) <- c('Rexistro','Titulo','TotalIngresos','IP','NIFip','NomeContratado','NIFcon','Categoría','TipoContrato','Prorroga','DtaDesde','DtaAta','DtaBaixa') | |
# | |
IGFAEincomeHcSs01 <- IGFAEincomeHc %>% dplyr::filter(stringr::str_detect(TipoContrato, "Cajal|Parga")) %>% dplyr::mutate(DtaFinal = lubridate::year(DtaFinal), DtaInicial = lubridate::year(DtaInicial), Who = stringr::str_extract(NomeInvestigador, "^.{15}")) %>% dplyr::mutate(Titulo=replace(Titulo, DocumentoId=='033332678C', 'RAMÓN Y CAJAL 2017')) %>% dplyr::select('TipoContrato','DocumentoId','Who','Titulo') %>% dplyr::arrange(TipoContrato, DocumentoId) %>% dplyr::distinct() %>% dplyr::filter(!stringr::str_detect(Who, "Casarejos|Ave Pernas")) %>% dplyr::mutate(CallYear = as.numeric(stringr::str_extract_all(Titulo, "[0-9]+"))) %>% dplyr::mutate(CallTitle=dplyr::case_when(stringr::str_detect(TipoContrato, "Cajal") ~ 'Ramón y Cajal' , stringr::str_detect(TipoContrato, "Parga") ~ 'Parga Pondal')) %>% dplyr::add_row(TipoContrato='Contratados Ramón y Cajal' , DocumentoId='044840037L' , Who='Cid Vidal, Xabi' , Titulo='Programa RAMÓN Y CAJAL 2016' , CallTitle='Ramón y Cajal' , CallYear=2016) | |
# | |
IGFAEincomeHcSs02 <- IGFAEincomeHcSs01 %>% dplyr::select('CallYear','CallTitle' ) %>% dplyr::add_row(CallYear=2002) %>% dplyr::add_row(CallYear=2008) %>% dplyr::add_row(CallYear=2009) %>% dplyr::add_row(CallYear=2011) %>% dplyr::add_row(CallYear=2013) %>% dplyr::add_row(CallYear=2014) %>% dplyr::add_row(CallYear=2015) | |
# | |
g01 <- ggplot(IGFAEincomeHcSs02, aes(as.character(CallYear),fill = CallTitle)) | |
g01 + geom_bar(na.rm = FALSE) + | |
scale_fill_manual(values=c("Parga Pondal"="slategray2", "Ramón y Cajal"="steelblue2"), guide="none") + | |
xlab("\ncall") + | |
ylab("n ") + | |
theme(plot.title = element_text(size=14, face = "bold"), | |
legend.title=element_text(size=12), | |
legend.text=element_text(size=12), | |
axis.text=element_text(size=12), | |
axis.title.x=element_text(size=14, face="bold"), | |
axis.title.y=element_text(size=14, face="bold", angle = 0) | |
) | |
# | |
## Be carefull with the concepts behind gem_histogram() and scale_x_continuous; the scale breaks allows to split the x continuous axis | |
## but this is not what we want! See g01 above! | |
## https://stackoverflow.com/questions/14138247/ggplot-geom-bar-vs-geom-histogram | |
## g02 <- ggplot(IGFAEincomeHcSs02, aes(CallYear,fill = CallTitle)) | |
## g02 + geom_histogram() + scale_x_continuous(breaks = c(2001:2017)) + labs(x = NULL, y = NULL) | |
# | |
IGFAEincome6060 <- IGFAEincome_20190305 %>% dplyr::filter(Organica == 6060) %>% dplyr::select('Organica','Funcional','Economica','Anualidade','Importe') %>% dplyr::group_by(Funcional) %>% dplyr::filter(Funcional == 'G1KE') | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment