Last active
March 16, 2022 13:35
-
-
Save k3yavi/a486647c35158a8296cec543ed9b526f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"--2019-10-31 07:37:55-- ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/gencode.vM23.transcripts.fa.gz\n", | |
" => ‘gencode.vM23.transcripts.fa.gz.1’\n", | |
"Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.197.74\n", | |
"Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.197.74|:21... connected.\n", | |
"Logging in as anonymous ... Logged in!\n", | |
"==> SYST ... done. ==> PWD ... done.\n", | |
"==> TYPE I ... done. ==> CWD (1) /pub/databases/gencode/Gencode_mouse/release_M23 ... done.\n", | |
"==> SIZE gencode.vM23.transcripts.fa.gz ... 54441060\n", | |
"==> PASV ... done. ==> RETR gencode.vM23.transcripts.fa.gz ... done.\n", | |
"Length: 54441060 (52M) (unauthoritative)\n", | |
"\n", | |
" 0K .......... .......... .......... .......... .......... 0% 332K 2m40s\n", | |
" 50K .......... .......... .......... .......... .......... 0% 648K 2m1s\n", | |
" 53050K .......... .......... .......... .......... .......... 99% 35.8M 0s\n", | |
" 53100K .......... .......... .......... .......... .......... 99% 66.4M 0s\n", | |
" 53150K .......... ..... 100% 15.8M=3.3s\n", | |
"\n", | |
"2019-10-31 07:37:59 (15.9 MB/s) - ‘gencode.vM23.transcripts.fa.gz.1’ saved [54441060]\n", | |
"\n", | |
"--2019-10-31 07:37:59-- ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz\n", | |
" => ‘GRCm38.primary_assembly.genome.fa.gz.1’\n", | |
"Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.197.74\n", | |
"Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.197.74|:21... connected.\n", | |
"Logging in as anonymous ... Logged in!\n", | |
"==> SYST ... done. ==> PWD ... done.\n", | |
"==> TYPE I ... done. ==> CWD (1) /pub/databases/gencode/Gencode_mouse/release_M23 ... done.\n", | |
"==> SIZE GRCm38.primary_assembly.genome.fa.gz ... 773507376\n", | |
"==> PASV ... done. ==> RETR GRCm38.primary_assembly.genome.fa.gz ... done.\n", | |
"Length: 773507376 (738M) (unauthoritative)\n", | |
"\n", | |
" 0K .......... .......... .......... .......... .......... 0% 281K 44m45s\n", | |
" 50K .......... .......... .......... .......... .......... 0% 617K 32m34s\n", | |
"755300K .......... .......... .......... .......... .......... 99% 17.4M 0s\n", | |
"755350K .......... .......... ........ 100% 17.2M=59s\n", | |
"\n", | |
"2019-10-31 07:38:59 (12.5 MB/s) - ‘GRCm38.primary_assembly.genome.fa.gz.1’ saved [773507376]\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"%%bash\n", | |
"wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/gencode.vM23.transcripts.fa.gz\n", | |
"wget ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M23/GRCm38.primary_assembly.genome.fa.gz" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%%bash\n", | |
"grep \"^>\" <(zcat GRCm38.primary_assembly.genome.fa.gz) | cut -d \" \" -f 1 > decoys.txt\n", | |
"sed -i -e 's/>//g' decoys.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%%bash\n", | |
"cat gencode.vM23.transcripts.fa.gz GRCm38.primary_assembly.genome.fa.gz > gentrome.fa.gz" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Threads = 12\n", | |
"Vertex length = 31\n", | |
"Hash functions = 5\n", | |
"Filter size = 68719476736\n", | |
"Capacity = 2\n", | |
"Files: \n", | |
"salmon_index/ref_k31_fixed.fa\n", | |
"--------------------------------------------------------------------------------\n", | |
"Round 0, 0:68719476736\n", | |
"Pass\tFilling\tFiltering\n", | |
"1\t126\t224\t\n", | |
"2\t261\t4\n", | |
"True junctions count = 14963867\n", | |
"False junctions count = 1213676\n", | |
"Hash table size = 16177543\n", | |
"Candidate marks count = 338137689\n", | |
"--------------------------------------------------------------------------------\n", | |
"Reallocating bifurcations time: 8\n", | |
"True marks count: 336727201\n", | |
"Edges construction time: 300\n", | |
"--------------------------------------------------------------------------------\n", | |
"Distinct junctions = 14963867\n", | |
"\n", | |
"for info, total work write each : 2.331 total work inram from level 3 : 4.322 total work raw : 25.000 \n", | |
"Bitarray 11923680384 bits (100.00 %) (array + ranks )\n", | |
"final hash 225456 bits (0.00 %) (nb in final hash 671)\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[2019-10-31 07:39:52.034] [jLog] [info] building index\n", | |
"out : salmon_index\n", | |
"[2019-10-31 07:39:53.725] [puff::index::jointLog] [info] Running fixFasta\n", | |
"\n", | |
"[Step 1 of 4] : counting k-mers\n", | |
"[2019-10-31 07:39:59.287] [puff::index::jointLog] [warning] It appears that this may be a GENCODE transcriptome (from analyzing the separators in the FASTA header). However, you have not set '|' as a header separator. If this is a GENCODE transcriptome, consider passing --gencode to the pufferfish index command.\n", | |
"\n", | |
"\n", | |
"[2019-10-31 07:39:59.303] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000191703.1|ENSMUSG00000103282.1|OTTMUSG00000050290.1|OTTMUST00000127724.1|Gm37275-201|Gm37275|30|processed_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:00.143] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000229312.1|ENSMUSG00000056486.18|OTTMUSG00000013428.7|OTTMUST00000171565.1|Chn1-211|Chn1|20|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:01.007] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179793.1|ENSMUSG00000095386.1|-|-|Gm17662-201|Gm17662|24|protein_coding|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:01.384] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000200713.1|ENSMUSG00000107308.1|OTTMUSG00000056069.1|OTTMUST00000138636.1|Gm43234-201|Gm43234|18|processed_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:01.688] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178537.1|ENSMUSG00000095668.1|OTTMUSG00000051351.2|OTTMUST00000129883.2|Trbd1-201|Trbd1|12|TR_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:01.688] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178862.1|ENSMUSG00000094569.1|OTTMUSG00000051360.2|OTTMUST00000129892.2|Trbd2-201|Trbd2|14|TR_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:02.770] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000165700.1|ENSMUSG00000091041.1|-|-|Gm17720-201|Gm17720|27|protein_coding|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179520.1|ENSMUSG00000094028.1|OTTMUSG00000051523.3|OTTMUST00000130131.3|Ighd4-1-201|Ighd4-1|11|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179883.1|ENSMUSG00000094552.1|OTTMUSG00000051524.2|OTTMUST00000130132.2|Ighd3-2-201|Ighd3-2|16|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000195858.1|ENSMUSG00000096420.2|OTTMUSG00000051529.2|OTTMUST00000130137.2|Ighd5-6-202|Ighd5-6|10|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179932.1|ENSMUSG00000096420.2|OTTMUSG00000051529.2|-|Ighd5-6-201|Ighd5-6|12|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000180001.1|ENSMUSG00000095656.1|OTTMUSG00000051530.2|OTTMUST00000130138.2|Ighd2-8-201|Ighd2-8|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178815.1|ENSMUSG00000094957.1|OTTMUSG00000051531.2|OTTMUST00000130141.2|Ighd5-5-201|Ighd5-5|10|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.120] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000177965.1|ENSMUSG00000094057.1|OTTMUSG00000051538.2|OTTMUST00000130152.2|Ighd2-7-201|Ighd2-7|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178909.1|ENSMUSG00000094268.1|OTTMUSG00000051557.2|OTTMUST00000130174.2|Ighd5-8-201|Ighd5-8|29|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000177646.1|ENSMUSG00000096884.1|OTTMUSG00000051571.2|OTTMUST00000130189.2|Ighd5-4-201|Ighd5-4|10|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178230.1|ENSMUSG00000096250.1|OTTMUSG00000051569.2|OTTMUST00000130197.2|Ighd2-6-201|Ighd2-6|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178483.1|ENSMUSG00000095592.1|OTTMUSG00000051572.2|OTTMUST00000130199.2|Ighd5-7-201|Ighd5-7|29|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179262.1|ENSMUSG00000093876.1|OTTMUSG00000051581.2|OTTMUST00000130180.2|Ighd5-3-201|Ighd5-3|10|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178549.1|ENSMUSG00000095897.1|OTTMUSG00000051560.2|OTTMUST00000130181.2|Ighd2-5-201|Ighd2-5|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000193012.1|ENSMUSG00000103203.1|OTTMUSG00000051561.2|OTTMUST00000130184.2|Gm37327-201|Gm37327|29|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179166.1|ENSMUSG00000096396.1|OTTMUSG00000051562.2|OTTMUST00000130185.2|Ighd5-2-201|Ighd5-2|10|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179560.1|ENSMUSG00000095444.1|OTTMUSG00000051582.2|OTTMUST00000130217.2|Ighd2-4-201|Ighd2-4|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178902.1|ENSMUSG00000096301.1|OTTMUSG00000051583.1|OTTMUST00000130218.1|Ighd6-2-201|Ighd6-2|29|IG_D_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000192089.1|ENSMUSG00000102532.1|OTTMUSG00000051589.1|OTTMUST00000130224.1|Gm37227-201|Gm37227|10|IG_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000177839.1|ENSMUSG00000096568.1|OTTMUSG00000051584.2|OTTMUST00000130219.2|Ighd2-3-201|Ighd2-3|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000170385.1|ENSMUSG00000095399.1|OTTMUSG00000051585.1|OTTMUST00000130220.1|Ighd6-1-201|Ighd6-1|29|IG_D_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000103439.1|ENSMUSG00000076630.1|OTTMUSG00000051586.2|OTTMUST00000130221.2|Ighd1-1-201|Ighd1-1|23|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000180266.1|ENSMUSG00000093818.1|OTTMUSG00000051588.2|OTTMUST00000130223.2|Ighd3-1-201|Ighd3-1|17|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000178811.1|ENSMUSG00000095549.1|OTTMUSG00000051590.1|OTTMUST00000130225.1|Ighd5-1-201|Ighd5-1|10|IG_D_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.135] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000103441.1|ENSMUSG00000076632.1|OTTMUSG00000051591.2|OTTMUST00000130226.2|Gm16968-201|Gm16968|23|IG_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.488] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000231713.1|ENSMUSG00000032727.14|OTTMUSG00000019751.6|OTTMUST00000173336.1|Mier3-208|Mier3|16|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.488] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000231466.1|ENSMUSG00000032727.14|OTTMUSG00000019751.6|OTTMUST00000173337.1|Mier3-207|Mier3|18|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.555] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226589.1|ENSMUSG00000021767.19|OTTMUSG00000043207.9|OTTMUST00000170357.1|Kat6b-213|Kat6b|18|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.587] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000228114.1|ENSMUSG00000006522.17|OTTMUSG00000035523.5|OTTMUST00000170497.1|Itih3-206|Itih3|14|protein_coding|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.612] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226226.1|ENSMUSG00000021798.14|OTTMUSG00000069987.3|OTTMUST00000170519.1|Ldb3-206|Ldb3|17|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.685] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000196221.1|ENSMUSG00000096749.2|OTTMUSG00000054129.2|OTTMUST00000134478.2|Trdd1-202|Trdd1|9|TR_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.686] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000179664.1|ENSMUSG00000096749.2|OTTMUSG00000054129.2|-|Trdd1-201|Trdd1|11|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.686] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000177564.1|ENSMUSG00000096176.1|OTTMUSG00000054130.2|OTTMUST00000134479.2|Trdd2-201|Trdd2|16|TR_D_gene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.686] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000103739.3|ENSMUSG00000076927.3|OTTMUSG00000054279.1|OTTMUST00000134692.1|Traj1-201|Traj1|29|TR_J_pseudogene|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.803] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226961.1|ENSMUSG00000033004.15|OTTMUSG00000034690.3|OTTMUST00000170059.1|Mycbp2-208|Mycbp2|15|protein_coding|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:04.963] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000227223.1|ENSMUSG00000060794.8|OTTMUSG00000069888.2|OTTMUST00000169180.1|Tssk5-202|Tssk5|18|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:05.068] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226615.1|ENSMUSG00000044250.8|OTTMUSG00000069720.1|OTTMUST00000168599.1|Pced1b-203|Pced1b|10|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:05.316] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226153.1|ENSMUSG00000033450.8|OTTMUSG00000034757.2|OTTMUST00000168259.1|Tagap-202|Tagap|14|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:05.380] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000227002.1|ENSMUSG00000035435.17|OTTMUSG00000031262.3|OTTMUST00000168155.1|Abca17-203|Abca17|9|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"[2019-10-31 07:40:05.426] [puff::index::jointLog] [warning] Entry with header [ENSMUST00000226172.1|ENSMUSG00000002249.21|OTTMUSG00000024245.6|OTTMUST00000167695.2|Tead3-208|Tead3|11|lncRNA|], had length less than equal to the k-mer length of 31 (perhaps after poly-A clipping)\n", | |
"\n", | |
"[2019-10-31 07:41:09.442] [puff::index::jointLog] [warning] Removed 1603 transcripts that were sequence duplicates of indexed transcripts.\n", | |
"[2019-10-31 07:41:09.442] [puff::index::jointLog] [warning] If you wish to retain duplicate transcripts, please use the `--keepDuplicates` flag\n", | |
"[2019-10-31 07:41:09.444] [puff::index::jointLog] [info] Replaced 78,088,277 non-ATCG nucleotides\n", | |
"[2019-10-31 07:41:09.444] [puff::index::jointLog] [info] Clipped poly-A tails from 753 transcripts\n", | |
"wrote 140775 cleaned references\n", | |
"seqHash 256 : 6f92253eb7397009ce667653d94538bc1f0bd85fad71e5c45a7395f6cfe07ffe\n", | |
"seqHash 512 : dcbc04f4ca9b00b551dd2fb47c5c5adf4773ce09e589af3c32611a59216cf0d9733b5534a486d295ab01e315d9cee4efa8109039a6a92c34dfc584f7104d996a\n", | |
"nameHash 256 : 04f5965de3bd77b32ab9edb145ba349e7b187a3a80af10ae49c4e7651d7669c0\n", | |
"nameHash 512 : 58d3dba414d4acc3fa3c81f8306c1c09ecb9ded91b9706ad6cef63d222740630a169f7093de2d6d34237e5fc7d31a74cf2bba7d2d2ac9b9bf883edc717f99210\n", | |
"[2019-10-31 07:41:14.700] [puff::index::jointLog] [info] Filter size not provided; estimating from number of distinct k-mers\n", | |
"[2019-10-31 07:41:46.386] [puff::index::jointLog] [info] ntHll estimated 2272527625 distinct k-mers, setting filter size to 2^36\n", | |
"approximateContigTotalLength: 1120132586\n", | |
"counters:\n", | |
"3124333 946 850 35\n", | |
"contig count: 25054792 element count: 3027293225 complex nodes: 3126164\n", | |
"size: 3027293225\n", | |
"# of ones in rank vector: 25054791\n", | |
"size: 3027293225\n", | |
"[2019-10-31 08:03:30.189] [puff::index::jointLog] [info] Setting the index/BinaryGfa directory salmon_index\n", | |
"size = 3027293225\n", | |
"-----------------------------------------\n", | |
"| Loading contigs | Time = 497.28 ms\n", | |
"-----------------------------------------\n", | |
"size = 3027293225\n", | |
"-----------------------------------------\n", | |
"| Loading contig boundaries | Time = 245.75 ms\n", | |
"-----------------------------------------\n", | |
"Number of ones: 25054791\n", | |
"Number of ones per inventory item: 512\n", | |
"Inventory entries filled: 48936\n", | |
"[2019-10-31 08:03:36.644] [puff::index::jointLog] [info] Done wrapping the rank vector with a rank9sel structure.\n", | |
"[2019-10-31 08:03:47.922] [puff::index::jointLog] [info] contig count for validation: 25054791\n", | |
"[2019-10-31 08:04:22.119] [puff::index::jointLog] [info] Total # of Contigs : 25,054,791\n", | |
"[2019-10-31 08:04:22.119] [puff::index::jointLog] [info] Total # of numerical Contigs : 25,054,791\n", | |
"[2019-10-31 08:06:21.460] [puff::index::jointLog] [info] \n", | |
"Total # of segments we have position for : 25,054,791\n", | |
"[2019-10-31 08:06:23.012] [puff::index::jointLog] [info] total contig vec entries 346,506,409\n", | |
"[2019-10-31 08:06:23.012] [puff::index::jointLog] [info] bits per offset entry 29\n", | |
"[2019-10-31 08:06:45.809] [puff::index::jointLog] [info] there were 1,974,686 equivalence classes\n", | |
"[2019-10-31 08:07:33.112] [puff::index::jointLog] [info] # segments = 25,054,791\n", | |
"[2019-10-31 08:07:33.112] [puff::index::jointLog] [info] total length = 3,027,293,225\n", | |
"[2019-10-31 08:07:33.555] [puff::index::jointLog] [info] Reading the reference files ...\n", | |
"[2019-10-31 08:08:16.031] [puff::index::jointLog] [info] positional integer width = 32\n", | |
"[2019-10-31 08:08:16.031] [puff::index::jointLog] [info] seqSize = 3,027,293,225\n", | |
"[2019-10-31 08:08:16.031] [puff::index::jointLog] [info] rankSize = 3,027,293,225\n", | |
"[2019-10-31 08:08:16.031] [puff::index::jointLog] [info] edgeVecSize = 0\n", | |
"[2019-10-31 08:08:16.031] [puff::index::jointLog] [info] num keys = 2,275,649,495\n", | |
"\r", | |
"[Building BooPHF] 1.09 % elapsed: 0 min 1 sec remaining: 1 min 32 sec\r", | |
"[Building BooPHF] 1.1 % elapsed: 0 min 1 sec remaining: 1 min 32 sec\r", | |
"[Building BooPHF] 99.8 % elapsed: 1 min 8 sec remaining: 0 min 0 sec\r", | |
"[Building BooPHF] 100 % elapsed: 1 min 8 sec remaining: 0 min 0 sec\n", | |
"[2019-10-31 08:09:24.054] [puff::index::jointLog] [info] mphf size = 1421.44 MB\n", | |
"[2019-10-31 08:09:24.054] [puff::index::jointLog] [info] chunk size = 252,274,436\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 0 = [0, 252,274,436)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 1 = [252,274,436, 504,548,881)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 2 = [504,548,881, 756,823,336)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 3 = [756,823,336, 1,009,097,772)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 4 = [1,009,097,772, 1,261,372,208)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 5 = [1,261,372,208, 1,513,646,644)\n", | |
"[2019-10-31 08:09:24.076] [puff::index::jointLog] [info] chunk 6 = [1,513,646,644, 1,765,921,080)\n", | |
"[2019-10-31 08:09:24.085] [puff::index::jointLog] [info] chunk 7 = [1,765,921,080, 2,018,195,516)\n", | |
"[2019-10-31 08:09:24.085] [puff::index::jointLog] [info] chunk 8 = [2,018,195,516, 2,270,469,952)\n", | |
"[2019-10-31 08:09:24.085] [puff::index::jointLog] [info] chunk 9 = [2,270,469,952, 2,522,744,388)\n", | |
"[2019-10-31 08:09:24.085] [puff::index::jointLog] [info] chunk 10 = [2,522,744,388, 2,775,018,824)\n", | |
"[2019-10-31 08:09:24.085] [puff::index::jointLog] [info] chunk 11 = [2,775,018,824, 3,027,293,195)\n", | |
"[2019-10-31 08:10:53.218] [puff::index::jointLog] [info] finished populating pos vector\n", | |
"[2019-10-31 08:10:53.218] [puff::index::jointLog] [info] writing index components\n", | |
"[2019-10-31 08:12:16.922] [puff::index::jointLog] [info] finished writing dense pufferfish index\n", | |
"[2019-10-31 08:12:18.849] [jLog] [info] done building index\n" | |
] | |
} | |
], | |
"source": [ | |
"%%bash\n", | |
"salmon index -t gentrome.fa.gz -d decoys.txt -p 12 -i salmon_index" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment