Created
April 3, 2018 15:35
-
-
Save ricalanis/b663c80889746cc29cbee57e28419ded to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"curl 'http://pgjesin.gob.mx:8090/desaparecidos/Lista_Desaparecidos.asp' -H 'Pragma: no-cache' -H 'Origin: http://pgjesin.gob.mx:8090' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: es-ES,es;q=0.9,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: no-cache' -H 'Referer: http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=8&nomestado=Chihuahua' -H 'Cookie: ASPSESSIONIDQQQTCSBT=EDHJEPHBGABFNIGJDGLJLAHP' -H 'Connection: keep-alive' --data 'Estados=8&Tipos=0&Sexos=0&Edad1=0&Edad2=100&txtpaterno=&txtmaterno=&txtNombre=&rgfotos=1&rgordenado=1&Complexion=0&Tez=0&Pelo=0&Color=0&Labios=0&Cara=0&Ojos=0&Ojoscolor=0&Boca=0&Cejas=0&Nariz=0&Frente=0&txtropa=&txtse%F1as=&Submit1=Buscar' --compressed" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from selenium import webdriver\n", | |
"from selenium.webdriver.common.keys import Keys\n", | |
"from bs4 import BeautifulSoup\n", | |
"import requests\n", | |
"import pandas\n", | |
"import simplejson" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"driver = webdriver.Chrome()\n", | |
"driver.get(\"http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=0&nomestado=Todos\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"button_start = driver.find_element_by_id(\"Submit1\")\n", | |
"button_start.click()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Collecting PyPDF\n", | |
" Downloading pyPdf-1.13.tar.gz\n", | |
"Building wheels for collected packages: PyPDF\n", | |
" Running setup.py bdist_wheel for PyPDF ... \u001b[?25ldone\n", | |
"\u001b[?25h Stored in directory: /Users/rdalanist/Library/Caches/pip/wheels/44/8b/98/27e1f4566acef44a7fbc2b0db6a10e2a493833e2d2e34fa110\n", | |
"Successfully built PyPDF\n", | |
"Installing collected packages: PyPDF\n", | |
"Successfully installed PyPDF-1.13\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip3 install PyPDF" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "ModuleNotFoundError", | |
"evalue": "No module named 'pypdf'", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m-------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-34-fc75eae59478>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpypdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pypdf'" | |
] | |
} | |
], | |
"source": [ | |
"import pypdf" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"1\n", | |
"3\n", | |
"4\n", | |
"5\n", | |
"6\n", | |
"7\n", | |
"8\n", | |
"9\n", | |
"10\n", | |
"11\n", | |
"12\n", | |
"13\n", | |
"14\n", | |
"15\n", | |
"16\n", | |
"17\n", | |
"18\n", | |
"19\n", | |
"20\n", | |
"21\n", | |
"22\n", | |
"23\n", | |
"24\n", | |
"25\n", | |
"26\n", | |
"27\n", | |
"28\n", | |
"29\n", | |
"30\n", | |
"31\n", | |
"32\n", | |
"34\n", | |
"36\n", | |
"37\n", | |
"38\n", | |
"39\n", | |
"40\n", | |
"41\n", | |
"42\n", | |
"43\n", | |
"44\n", | |
"45\n", | |
"46\n", | |
"47\n", | |
"48\n", | |
"49\n", | |
"50\n", | |
"51\n", | |
"52\n", | |
"53\n", | |
"54\n", | |
"55\n", | |
"56\n", | |
"58\n", | |
"59\n", | |
"60\n", | |
"61\n", | |
"62\n", | |
"63\n", | |
"64\n", | |
"65\n", | |
"66\n", | |
"67\n", | |
"69\n", | |
"70\n", | |
"71\n", | |
"72\n", | |
"73\n", | |
"74\n", | |
"75\n", | |
"76\n", | |
"77\n", | |
"78\n", | |
"79\n", | |
"80\n", | |
"81\n", | |
"82\n", | |
"83\n", | |
"84\n", | |
"85\n", | |
"86\n", | |
"87\n", | |
"88\n", | |
"89\n", | |
"90\n", | |
"91\n", | |
"92\n", | |
"93\n", | |
"94\n", | |
"95\n", | |
"96\n", | |
"97\n", | |
"98\n", | |
"99\n", | |
"100\n", | |
"101\n", | |
"102\n", | |
"103\n", | |
"104\n", | |
"105\n", | |
"106\n", | |
"107\n", | |
"108\n", | |
"109\n", | |
"110\n", | |
"111\n", | |
"112\n", | |
"113\n", | |
"114\n", | |
"115\n", | |
"116\n", | |
"117\n", | |
"118\n", | |
"119\n", | |
"120\n", | |
"121\n", | |
"122\n", | |
"123\n", | |
"124\n", | |
"125\n", | |
"126\n", | |
"127\n", | |
"128\n", | |
"130\n", | |
"131\n", | |
"132\n", | |
"133\n", | |
"134\n", | |
"135\n", | |
"136\n", | |
"137\n", | |
"138\n", | |
"139\n", | |
"140\n", | |
"141\n", | |
"142\n", | |
"143\n", | |
"144\n", | |
"145\n", | |
"146\n", | |
"147\n", | |
"148\n", | |
"149\n", | |
"150\n", | |
"151\n", | |
"152\n", | |
"153\n", | |
"154\n", | |
"155\n", | |
"156\n", | |
"157\n", | |
"158\n", | |
"159\n", | |
"160\n", | |
"161\n", | |
"162\n", | |
"163\n", | |
"164\n", | |
"165\n", | |
"166\n", | |
"167\n", | |
"168\n", | |
"169\n", | |
"170\n", | |
"171\n", | |
"172\n", | |
"173\n", | |
"174\n", | |
"175\n", | |
"176\n", | |
"177\n", | |
"178\n", | |
"179\n", | |
"180\n", | |
"181\n", | |
"182\n", | |
"183\n", | |
"184\n", | |
"186\n", | |
"187\n", | |
"188\n", | |
"189\n", | |
"190\n", | |
"191\n", | |
"192\n", | |
"193\n", | |
"194\n", | |
"195\n", | |
"196\n", | |
"197\n", | |
"198\n", | |
"199\n", | |
"200\n", | |
"201\n", | |
"202\n", | |
"203\n", | |
"204\n", | |
"205\n", | |
"209\n", | |
"210\n", | |
"211\n", | |
"212\n", | |
"213\n", | |
"214\n", | |
"215\n", | |
"216\n", | |
"217\n", | |
"218\n", | |
"219\n", | |
"220\n", | |
"221\n", | |
"222\n", | |
"223\n", | |
"224\n", | |
"225\n", | |
"226\n", | |
"227\n", | |
"228\n", | |
"229\n", | |
"230\n", | |
"231\n", | |
"232\n", | |
"233\n", | |
"234\n", | |
"235\n", | |
"236\n", | |
"237\n", | |
"238\n", | |
"239\n", | |
"240\n", | |
"241\n", | |
"242\n", | |
"243\n", | |
"244\n", | |
"245\n", | |
"246\n", | |
"247\n", | |
"248\n", | |
"249\n", | |
"250\n", | |
"251\n", | |
"252\n", | |
"253\n", | |
"254\n", | |
"255\n", | |
"256\n", | |
"257\n", | |
"258\n", | |
"259\n", | |
"260\n", | |
"261\n", | |
"262\n", | |
"263\n", | |
"264\n", | |
"265\n", | |
"266\n", | |
"267\n", | |
"268\n", | |
"269\n", | |
"270\n", | |
"272\n", | |
"273\n", | |
"274\n", | |
"275\n", | |
"276\n", | |
"277\n", | |
"278\n", | |
"280\n", | |
"281\n", | |
"282\n", | |
"283\n", | |
"284\n", | |
"285\n", | |
"286\n", | |
"287\n", | |
"288\n", | |
"289\n", | |
"290\n", | |
"291\n", | |
"292\n", | |
"293\n", | |
"294\n", | |
"296\n", | |
"298\n", | |
"299\n", | |
"300\n", | |
"301\n", | |
"302\n", | |
"303\n", | |
"304\n", | |
"305\n", | |
"306\n", | |
"307\n", | |
"308\n", | |
"309\n", | |
"310\n", | |
"311\n", | |
"312\n", | |
"313\n", | |
"314\n", | |
"315\n", | |
"316\n", | |
"317\n", | |
"318\n", | |
"319\n", | |
"320\n", | |
"321\n", | |
"322\n", | |
"323\n", | |
"324\n", | |
"326\n", | |
"327\n", | |
"328\n", | |
"329\n", | |
"330\n", | |
"331\n", | |
"332\n", | |
"333\n", | |
"334\n", | |
"335\n", | |
"336\n", | |
"338\n", | |
"339\n", | |
"340\n", | |
"341\n", | |
"342\n", | |
"343\n", | |
"344\n", | |
"345\n", | |
"346\n", | |
"347\n", | |
"348\n", | |
"349\n", | |
"350\n", | |
"351\n", | |
"352\n", | |
"353\n", | |
"354\n", | |
"355\n", | |
"356\n", | |
"357\n", | |
"358\n", | |
"359\n", | |
"360\n", | |
"361\n", | |
"362\n", | |
"363\n", | |
"364\n", | |
"365\n", | |
"366\n", | |
"367\n", | |
"368\n", | |
"369\n", | |
"370\n", | |
"371\n", | |
"372\n", | |
"373\n", | |
"374\n", | |
"375\n", | |
"376\n", | |
"377\n", | |
"378\n", | |
"379\n", | |
"380\n", | |
"381\n", | |
"382\n", | |
"383\n", | |
"384\n", | |
"385\n", | |
"386\n", | |
"387\n", | |
"388\n", | |
"389\n", | |
"391\n", | |
"392\n", | |
"393\n", | |
"394\n", | |
"395\n", | |
"396\n", | |
"397\n", | |
"398\n", | |
"399\n", | |
"400\n", | |
"401\n", | |
"402\n", | |
"403\n", | |
"404\n", | |
"405\n", | |
"406\n", | |
"407\n", | |
"408\n", | |
"409\n", | |
"410\n", | |
"411\n", | |
"412\n", | |
"413\n", | |
"414\n", | |
"415\n", | |
"416\n", | |
"417\n", | |
"418\n", | |
"419\n", | |
"420\n", | |
"421\n", | |
"422\n", | |
"423\n", | |
"424\n", | |
"425\n", | |
"426\n", | |
"427\n", | |
"428\n", | |
"429\n", | |
"430\n", | |
"431\n", | |
"432\n", | |
"433\n", | |
"434\n", | |
"435\n", | |
"436\n", | |
"437\n", | |
"438\n", | |
"439\n", | |
"440\n", | |
"441\n", | |
"442\n", | |
"443\n", | |
"444\n", | |
"445\n", | |
"447\n", | |
"448\n", | |
"449\n", | |
"450\n", | |
"451\n", | |
"452\n", | |
"453\n", | |
"454\n", | |
"455\n", | |
"456\n", | |
"457\n", | |
"458\n", | |
"459\n", | |
"460\n", | |
"461\n", | |
"462\n", | |
"463\n", | |
"464\n", | |
"465\n", | |
"466\n", | |
"467\n", | |
"468\n", | |
"469\n", | |
"470\n", | |
"471\n", | |
"472\n", | |
"473\n", | |
"474\n", | |
"475\n", | |
"476\n", | |
"477\n", | |
"478\n", | |
"479\n", | |
"480\n", | |
"481\n", | |
"482\n", | |
"483\n", | |
"484\n", | |
"485\n", | |
"486\n", | |
"487\n", | |
"488\n", | |
"489\n", | |
"490\n", | |
"491\n", | |
"492\n", | |
"493\n", | |
"494\n", | |
"495\n", | |
"496\n", | |
"497\n", | |
"498\n", | |
"499\n", | |
"500\n" | |
] | |
} | |
], | |
"source": [ | |
"output_guanajuato = []\n", | |
"for i in range(1,501,1):\n", | |
" try:\n", | |
" data =requests.get(\"https://portal.pgjguanajuato.gob.mx:8443/ProcurApp/api/PortalAMBER/\" + str(i)).json()[\"ficha\"]\n", | |
" output_guanajuato.append(data)\n", | |
" print(i)\n", | |
" except:\n", | |
" next" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.DataFrame([data[0] for data in output_guanajuato]).to_csv(\"AMBER_Guanajuato.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "NameError", | |
"evalue": "name 'output' is not defined", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m-------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-9-838337db0b65>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;31mNameError\u001b[0m: name 'output' is not defined" | |
] | |
} | |
], | |
"source": [ | |
"output" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment