Created
September 14, 2017 15:21
-
-
Save nzw0301/b569c5f73616c58220176fe8576dd2fd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from gensim.models import word2vec\n", | |
"from gensim.models.keyedvectors import KeyedVectors\n", | |
"import numpy as np" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/text8.vec', binary=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/enwiki9.txt.vec', binary=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('daughter', 0.5485425591468811),\n", | |
" ('empress', 0.5350010991096497),\n", | |
" ('prince', 0.5310357809066772),\n", | |
" ('throne', 0.5217003226280212),\n", | |
" ('son', 0.517214298248291),\n", | |
" ('queen', 0.5135539770126343),\n", | |
" ('minamoto', 0.506763756275177),\n", | |
" ('viii', 0.5048807859420776),\n", | |
" ('mary', 0.498077392578125),\n", | |
" ('heir', 0.49674057960510254)]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('daughter', 0.5485425591468811),\n", | |
" ('empress', 0.5350010991096497),\n", | |
" ('prince', 0.5310357809066772),\n", | |
" ('throne', 0.5217003226280212),\n", | |
" ('son', 0.517214298248291),\n", | |
" ('queen', 0.5135539770126343),\n", | |
" ('minamoto', 0.506763756275177),\n", | |
" ('viii', 0.5048807859420776),\n", | |
" ('mary', 0.498077392578125),\n", | |
" ('heir', 0.49674057960510254)]" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('file', 0.67220139503479),\n", | |
" ('compression', 0.6693793535232544),\n", | |
" ('stored', 0.6680015921592712),\n", | |
" ('files', 0.6367778778076172),\n", | |
" ('routing', 0.6301351189613342),\n", | |
" ('ip', 0.6274353265762329),\n", | |
" ('format', 0.6236443519592285),\n", | |
" ('packet', 0.6220601797103882),\n", | |
" ('bits', 0.619074821472168),\n", | |
" ('ipv', 0.6162351965904236)]" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar(positive=['data'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('elizabeth', 0.7907043695449829),\n", | |
" ('prince', 0.6521976590156555),\n", | |
" ('king', 0.636025607585907),\n", | |
" ('princess', 0.6347065567970276),\n", | |
" ('anne', 0.6185387969017029),\n", | |
" ('crown', 0.6120067834854126),\n", | |
" ('monarch', 0.6079266667366028),\n", | |
" ('victoria', 0.6078702211380005),\n", | |
" ('isabella', 0.6061561107635498),\n", | |
" ('majesty', 0.6058646440505981)]" | |
] | |
}, | |
"execution_count": 30, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar(positive=['queen'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('france', 0.9623157382011414),\n", | |
" ('hirohito', 0.9062392115592957),\n", | |
" ('imposition', 0.8845174312591553),\n", | |
" ('mountbatten', 0.8710086941719055),\n", | |
" ('feudal', 0.8627457022666931),\n", | |
" ('formalised', 0.8624369502067566),\n", | |
" ('discuss', 0.8617070913314819),\n", | |
" ('revolutionary', 0.8540736436843872),\n", | |
" ('overturn', 0.8520448207855225),\n", | |
" ('carthaginian', 0.8508167266845703),\n", | |
" ('sanction', 0.8474399447441101),\n", | |
" ('germany', 0.8420551419258118),\n", | |
" ('declared', 0.8412027359008789),\n", | |
" ('italy', 0.8404319882392883),\n", | |
" ('stalin', 0.837766170501709),\n", | |
" ('tried', 0.83773273229599),\n", | |
" ('napoleon', 0.8374594449996948),\n", | |
" ('portugal', 0.8338634371757507),\n", | |
" ('intervene', 0.8285016417503357),\n", | |
" ('unification', 0.8258130550384521),\n", | |
" ('informed', 0.8245867490768433),\n", | |
" ('impetus', 0.8214332461357117),\n", | |
" ('ambitions', 0.8208622336387634),\n", | |
" ('miko', 0.8205797076225281),\n", | |
" ('undertake', 0.8186885118484497),\n", | |
" ('possessions', 0.8185335397720337),\n", | |
" ('kimmei', 0.8183091878890991),\n", | |
" ('warrant', 0.8182796239852905),\n", | |
" ('nihonshoki', 0.8158888816833496),\n", | |
" ('accession', 0.813630998134613)]" | |
] | |
}, | |
"execution_count": 26, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar_cosmul(positive=['paris', 'japan'], negative=['tokyo'], topn=30)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 29, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[('france', 0.5960692167282104),\n", | |
" ('italy', 0.4693019390106201),\n", | |
" ('germany', 0.4654536843299866),\n", | |
" ('portugal', 0.4551469683647156),\n", | |
" ('hirohito', 0.4550001323223114),\n", | |
" ('napoleon', 0.4326018989086151),\n", | |
" ('mountbatten', 0.42282170057296753),\n", | |
" ('china', 0.41148364543914795),\n", | |
" ('ambitions', 0.40866583585739136),\n", | |
" ('revolutionary', 0.40459465980529785),\n", | |
" ('kimmei', 0.40294909477233887),\n", | |
" ('vienna', 0.40291261672973633),\n", | |
" ('ferdinand', 0.3996574580669403),\n", | |
" ('feudal', 0.39800015091896057),\n", | |
" ('miko', 0.3969685137271881),\n", | |
" ('korea', 0.3895190358161926),\n", | |
" ('kojiki', 0.3892173171043396),\n", | |
" ('declared', 0.3865616023540497),\n", | |
" ('nihonshoki', 0.3864176273345947),\n", | |
" ('kun', 0.38556843996047974),\n", | |
" ('discuss', 0.3840706944465637),\n", | |
" ('netherlands', 0.3836057186126709),\n", | |
" ('possessions', 0.38359445333480835),\n", | |
" ('unification', 0.38304412364959717),\n", | |
" ('stalin', 0.37952858209609985),\n", | |
" ('jing', 0.37798911333084106),\n", | |
" ('isabella', 0.37629127502441406),\n", | |
" ('britain', 0.37476783990859985),\n", | |
" ('habsburgs', 0.37454771995544434),\n", | |
" ('treaty', 0.37411004304885864)]" | |
] | |
}, | |
"execution_count": 29, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"word_vectors.most_similar(positive=['paris', 'japan'], negative=['tokyo'], topn=30)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment