Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save ricalanis/b663c80889746cc29cbee57e28419ded to your computer and use it in GitHub Desktop.
Save ricalanis/b663c80889746cc29cbee57e28419ded to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"curl 'http://pgjesin.gob.mx:8090/desaparecidos/Lista_Desaparecidos.asp' -H 'Pragma: no-cache' -H 'Origin: http://pgjesin.gob.mx:8090' -H 'Accept-Encoding: gzip, deflate' -H 'Accept-Language: es-ES,es;q=0.9,en;q=0.8' -H 'Upgrade-Insecure-Requests: 1' -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' -H 'Content-Type: application/x-www-form-urlencoded' -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8' -H 'Cache-Control: no-cache' -H 'Referer: http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=8&nomestado=Chihuahua' -H 'Cookie: ASPSESSIONIDQQQTCSBT=EDHJEPHBGABFNIGJDGLJLAHP' -H 'Connection: keep-alive' --data 'Estados=8&Tipos=0&Sexos=0&Edad1=0&Edad2=100&txtpaterno=&txtmaterno=&txtNombre=&rgfotos=1&rgordenado=1&Complexion=0&Tez=0&Pelo=0&Color=0&Labios=0&Cara=0&Ojos=0&Ojoscolor=0&Boca=0&Cejas=0&Nariz=0&Frente=0&txtropa=&txtse%F1as=&Submit1=Buscar' --compressed"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from selenium import webdriver\n",
"from selenium.webdriver.common.keys import Keys\n",
"from bs4 import BeautifulSoup\n",
"import requests\n",
"import pandas\n",
"import simplejson"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"driver = webdriver.Chrome()\n",
"driver.get(\"http://pgjesin.gob.mx:8090/desaparecidos/Busqueda_Desaparecidos.asp?estado=0&nomestado=Todos\")"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"button_start = driver.find_element_by_id(\"Submit1\")\n",
"button_start.click()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting PyPDF\n",
" Downloading pyPdf-1.13.tar.gz\n",
"Building wheels for collected packages: PyPDF\n",
" Running setup.py bdist_wheel for PyPDF ... \u001b[?25ldone\n",
"\u001b[?25h Stored in directory: /Users/rdalanist/Library/Caches/pip/wheels/44/8b/98/27e1f4566acef44a7fbc2b0db6a10e2a493833e2d2e34fa110\n",
"Successfully built PyPDF\n",
"Installing collected packages: PyPDF\n",
"Successfully installed PyPDF-1.13\n"
]
}
],
"source": [
"!pip3 install PyPDF"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'pypdf'",
"output_type": "error",
"traceback": [
"\u001b[0;31m-------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-34-fc75eae59478>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpypdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pypdf'"
]
}
],
"source": [
"import pypdf"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"3\n",
"4\n",
"5\n",
"6\n",
"7\n",
"8\n",
"9\n",
"10\n",
"11\n",
"12\n",
"13\n",
"14\n",
"15\n",
"16\n",
"17\n",
"18\n",
"19\n",
"20\n",
"21\n",
"22\n",
"23\n",
"24\n",
"25\n",
"26\n",
"27\n",
"28\n",
"29\n",
"30\n",
"31\n",
"32\n",
"34\n",
"36\n",
"37\n",
"38\n",
"39\n",
"40\n",
"41\n",
"42\n",
"43\n",
"44\n",
"45\n",
"46\n",
"47\n",
"48\n",
"49\n",
"50\n",
"51\n",
"52\n",
"53\n",
"54\n",
"55\n",
"56\n",
"58\n",
"59\n",
"60\n",
"61\n",
"62\n",
"63\n",
"64\n",
"65\n",
"66\n",
"67\n",
"69\n",
"70\n",
"71\n",
"72\n",
"73\n",
"74\n",
"75\n",
"76\n",
"77\n",
"78\n",
"79\n",
"80\n",
"81\n",
"82\n",
"83\n",
"84\n",
"85\n",
"86\n",
"87\n",
"88\n",
"89\n",
"90\n",
"91\n",
"92\n",
"93\n",
"94\n",
"95\n",
"96\n",
"97\n",
"98\n",
"99\n",
"100\n",
"101\n",
"102\n",
"103\n",
"104\n",
"105\n",
"106\n",
"107\n",
"108\n",
"109\n",
"110\n",
"111\n",
"112\n",
"113\n",
"114\n",
"115\n",
"116\n",
"117\n",
"118\n",
"119\n",
"120\n",
"121\n",
"122\n",
"123\n",
"124\n",
"125\n",
"126\n",
"127\n",
"128\n",
"130\n",
"131\n",
"132\n",
"133\n",
"134\n",
"135\n",
"136\n",
"137\n",
"138\n",
"139\n",
"140\n",
"141\n",
"142\n",
"143\n",
"144\n",
"145\n",
"146\n",
"147\n",
"148\n",
"149\n",
"150\n",
"151\n",
"152\n",
"153\n",
"154\n",
"155\n",
"156\n",
"157\n",
"158\n",
"159\n",
"160\n",
"161\n",
"162\n",
"163\n",
"164\n",
"165\n",
"166\n",
"167\n",
"168\n",
"169\n",
"170\n",
"171\n",
"172\n",
"173\n",
"174\n",
"175\n",
"176\n",
"177\n",
"178\n",
"179\n",
"180\n",
"181\n",
"182\n",
"183\n",
"184\n",
"186\n",
"187\n",
"188\n",
"189\n",
"190\n",
"191\n",
"192\n",
"193\n",
"194\n",
"195\n",
"196\n",
"197\n",
"198\n",
"199\n",
"200\n",
"201\n",
"202\n",
"203\n",
"204\n",
"205\n",
"209\n",
"210\n",
"211\n",
"212\n",
"213\n",
"214\n",
"215\n",
"216\n",
"217\n",
"218\n",
"219\n",
"220\n",
"221\n",
"222\n",
"223\n",
"224\n",
"225\n",
"226\n",
"227\n",
"228\n",
"229\n",
"230\n",
"231\n",
"232\n",
"233\n",
"234\n",
"235\n",
"236\n",
"237\n",
"238\n",
"239\n",
"240\n",
"241\n",
"242\n",
"243\n",
"244\n",
"245\n",
"246\n",
"247\n",
"248\n",
"249\n",
"250\n",
"251\n",
"252\n",
"253\n",
"254\n",
"255\n",
"256\n",
"257\n",
"258\n",
"259\n",
"260\n",
"261\n",
"262\n",
"263\n",
"264\n",
"265\n",
"266\n",
"267\n",
"268\n",
"269\n",
"270\n",
"272\n",
"273\n",
"274\n",
"275\n",
"276\n",
"277\n",
"278\n",
"280\n",
"281\n",
"282\n",
"283\n",
"284\n",
"285\n",
"286\n",
"287\n",
"288\n",
"289\n",
"290\n",
"291\n",
"292\n",
"293\n",
"294\n",
"296\n",
"298\n",
"299\n",
"300\n",
"301\n",
"302\n",
"303\n",
"304\n",
"305\n",
"306\n",
"307\n",
"308\n",
"309\n",
"310\n",
"311\n",
"312\n",
"313\n",
"314\n",
"315\n",
"316\n",
"317\n",
"318\n",
"319\n",
"320\n",
"321\n",
"322\n",
"323\n",
"324\n",
"326\n",
"327\n",
"328\n",
"329\n",
"330\n",
"331\n",
"332\n",
"333\n",
"334\n",
"335\n",
"336\n",
"338\n",
"339\n",
"340\n",
"341\n",
"342\n",
"343\n",
"344\n",
"345\n",
"346\n",
"347\n",
"348\n",
"349\n",
"350\n",
"351\n",
"352\n",
"353\n",
"354\n",
"355\n",
"356\n",
"357\n",
"358\n",
"359\n",
"360\n",
"361\n",
"362\n",
"363\n",
"364\n",
"365\n",
"366\n",
"367\n",
"368\n",
"369\n",
"370\n",
"371\n",
"372\n",
"373\n",
"374\n",
"375\n",
"376\n",
"377\n",
"378\n",
"379\n",
"380\n",
"381\n",
"382\n",
"383\n",
"384\n",
"385\n",
"386\n",
"387\n",
"388\n",
"389\n",
"391\n",
"392\n",
"393\n",
"394\n",
"395\n",
"396\n",
"397\n",
"398\n",
"399\n",
"400\n",
"401\n",
"402\n",
"403\n",
"404\n",
"405\n",
"406\n",
"407\n",
"408\n",
"409\n",
"410\n",
"411\n",
"412\n",
"413\n",
"414\n",
"415\n",
"416\n",
"417\n",
"418\n",
"419\n",
"420\n",
"421\n",
"422\n",
"423\n",
"424\n",
"425\n",
"426\n",
"427\n",
"428\n",
"429\n",
"430\n",
"431\n",
"432\n",
"433\n",
"434\n",
"435\n",
"436\n",
"437\n",
"438\n",
"439\n",
"440\n",
"441\n",
"442\n",
"443\n",
"444\n",
"445\n",
"447\n",
"448\n",
"449\n",
"450\n",
"451\n",
"452\n",
"453\n",
"454\n",
"455\n",
"456\n",
"457\n",
"458\n",
"459\n",
"460\n",
"461\n",
"462\n",
"463\n",
"464\n",
"465\n",
"466\n",
"467\n",
"468\n",
"469\n",
"470\n",
"471\n",
"472\n",
"473\n",
"474\n",
"475\n",
"476\n",
"477\n",
"478\n",
"479\n",
"480\n",
"481\n",
"482\n",
"483\n",
"484\n",
"485\n",
"486\n",
"487\n",
"488\n",
"489\n",
"490\n",
"491\n",
"492\n",
"493\n",
"494\n",
"495\n",
"496\n",
"497\n",
"498\n",
"499\n",
"500\n"
]
}
],
"source": [
"output_guanajuato = []\n",
"for i in range(1,501,1):\n",
" try:\n",
" data =requests.get(\"https://portal.pgjguanajuato.gob.mx:8443/ProcurApp/api/PortalAMBER/\" + str(i)).json()[\"ficha\"]\n",
" output_guanajuato.append(data)\n",
" print(i)\n",
" except:\n",
" next"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"pd.DataFrame([data[0] for data in output_guanajuato]).to_csv(\"AMBER_Guanajuato.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'output' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m-------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-9-838337db0b65>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0moutput\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'output' is not defined"
]
}
],
"source": [
"output"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment