Skip to content

Instantly share code, notes, and snippets.

@ricalanis
Created March 11, 2018 08:49
Show Gist options
  • Save ricalanis/8291dface62c4ca9ecad56898cc5a0c9 to your computer and use it in GitHub Desktop.
Save ricalanis/8291dface62c4ca9ecad56898cc5a0c9 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import langdetect"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"string_test = \"\"\"import sudomin\n",
"\n",
"def funcionAProbar:\n",
" a = 2313\n",
" return a*5\n",
"\n",
"thisIsAFunction(2)\n",
"draw_me_a_sheep\n",
"dibujame_una_cabra\n",
"\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"import nltk"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"code_tokens = nltk.tokenize.word_tokenize(string_test)"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [],
"source": [
"def try_detection(token):\n",
" try:\n",
" return langdetect.detect(token)\n",
" except:\n",
" return None"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('sudomin', 'lt'),\n",
" ('funcionAProbar', 'es'),\n",
" ('thisIsAFunction', 'en'),\n",
" ('draw_me_a_sheep', 'en'),\n",
" ('dibujame_una_cabra', 'es')]"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[(token, try_detection(token)) for token in code_tokens if consider_token(token) and try_detection(token)]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"reserved_words = [\"False\",\"class\",\"finally\",\"is\",\"return\",\"None\",\"continue\",\"for\",\"lambda\",\"try\",\"True\",\"def\",\"from\",\"nonlocal\",\"while\",\"and\",\"del\",\"global\",\"not\",\"with\",\"as\",\"elif\",\"if\",\"or\",\"yield\",\"assert\",\"else\",\"import\",\"pass\",\"break\",\"except\",\"in\",\"raise\"]\n",
"mathematical_ops = [\"+\", \"-\", \"*\", \"/\", \"=\"]"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"*\" in \"a*2\""
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"def consider_token(token):\n",
" if len([op for op in mathematical_ops if op in token])>0:\n",
" return False\n",
" if token in reserved_words:\n",
" return False\n",
" if len(token)==1:\n",
" return False\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [],
"source": [
"# Detectar lo que si se debe de checar es el truco de esto\n",
"# Saber un poquito del contexto puede ayudar (como un import o el archivo del que viene la expresión)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment