Skip to content

Instantly share code, notes, and snippets.

@nzw0301
Created September 14, 2017 15:21
Show Gist options
  • Save nzw0301/b569c5f73616c58220176fe8576dd2fd to your computer and use it in GitHub Desktop.
Save nzw0301/b569c5f73616c58220176fe8576dd2fd to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from gensim.models import word2vec\n",
"from gensim.models.keyedvectors import KeyedVectors\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/text8.vec', binary=False)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# word_vectors = KeyedVectors.load_word2vec_format('./src/main/resources/enwiki9.txt.vec', binary=False)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('daughter', 0.5485425591468811),\n",
" ('empress', 0.5350010991096497),\n",
" ('prince', 0.5310357809066772),\n",
" ('throne', 0.5217003226280212),\n",
" ('son', 0.517214298248291),\n",
" ('queen', 0.5135539770126343),\n",
" ('minamoto', 0.506763756275177),\n",
" ('viii', 0.5048807859420776),\n",
" ('mary', 0.498077392578125),\n",
" ('heir', 0.49674057960510254)]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('daughter', 0.5485425591468811),\n",
" ('empress', 0.5350010991096497),\n",
" ('prince', 0.5310357809066772),\n",
" ('throne', 0.5217003226280212),\n",
" ('son', 0.517214298248291),\n",
" ('queen', 0.5135539770126343),\n",
" ('minamoto', 0.506763756275177),\n",
" ('viii', 0.5048807859420776),\n",
" ('mary', 0.498077392578125),\n",
" ('heir', 0.49674057960510254)]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar(positive=['woman', 'king'], negative=['man'], topn=10)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('file', 0.67220139503479),\n",
" ('compression', 0.6693793535232544),\n",
" ('stored', 0.6680015921592712),\n",
" ('files', 0.6367778778076172),\n",
" ('routing', 0.6301351189613342),\n",
" ('ip', 0.6274353265762329),\n",
" ('format', 0.6236443519592285),\n",
" ('packet', 0.6220601797103882),\n",
" ('bits', 0.619074821472168),\n",
" ('ipv', 0.6162351965904236)]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar(positive=['data'])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('elizabeth', 0.7907043695449829),\n",
" ('prince', 0.6521976590156555),\n",
" ('king', 0.636025607585907),\n",
" ('princess', 0.6347065567970276),\n",
" ('anne', 0.6185387969017029),\n",
" ('crown', 0.6120067834854126),\n",
" ('monarch', 0.6079266667366028),\n",
" ('victoria', 0.6078702211380005),\n",
" ('isabella', 0.6061561107635498),\n",
" ('majesty', 0.6058646440505981)]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar(positive=['queen'])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('france', 0.9623157382011414),\n",
" ('hirohito', 0.9062392115592957),\n",
" ('imposition', 0.8845174312591553),\n",
" ('mountbatten', 0.8710086941719055),\n",
" ('feudal', 0.8627457022666931),\n",
" ('formalised', 0.8624369502067566),\n",
" ('discuss', 0.8617070913314819),\n",
" ('revolutionary', 0.8540736436843872),\n",
" ('overturn', 0.8520448207855225),\n",
" ('carthaginian', 0.8508167266845703),\n",
" ('sanction', 0.8474399447441101),\n",
" ('germany', 0.8420551419258118),\n",
" ('declared', 0.8412027359008789),\n",
" ('italy', 0.8404319882392883),\n",
" ('stalin', 0.837766170501709),\n",
" ('tried', 0.83773273229599),\n",
" ('napoleon', 0.8374594449996948),\n",
" ('portugal', 0.8338634371757507),\n",
" ('intervene', 0.8285016417503357),\n",
" ('unification', 0.8258130550384521),\n",
" ('informed', 0.8245867490768433),\n",
" ('impetus', 0.8214332461357117),\n",
" ('ambitions', 0.8208622336387634),\n",
" ('miko', 0.8205797076225281),\n",
" ('undertake', 0.8186885118484497),\n",
" ('possessions', 0.8185335397720337),\n",
" ('kimmei', 0.8183091878890991),\n",
" ('warrant', 0.8182796239852905),\n",
" ('nihonshoki', 0.8158888816833496),\n",
" ('accession', 0.813630998134613)]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar_cosmul(positive=['paris', 'japan'], negative=['tokyo'], topn=30)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('france', 0.5960692167282104),\n",
" ('italy', 0.4693019390106201),\n",
" ('germany', 0.4654536843299866),\n",
" ('portugal', 0.4551469683647156),\n",
" ('hirohito', 0.4550001323223114),\n",
" ('napoleon', 0.4326018989086151),\n",
" ('mountbatten', 0.42282170057296753),\n",
" ('china', 0.41148364543914795),\n",
" ('ambitions', 0.40866583585739136),\n",
" ('revolutionary', 0.40459465980529785),\n",
" ('kimmei', 0.40294909477233887),\n",
" ('vienna', 0.40291261672973633),\n",
" ('ferdinand', 0.3996574580669403),\n",
" ('feudal', 0.39800015091896057),\n",
" ('miko', 0.3969685137271881),\n",
" ('korea', 0.3895190358161926),\n",
" ('kojiki', 0.3892173171043396),\n",
" ('declared', 0.3865616023540497),\n",
" ('nihonshoki', 0.3864176273345947),\n",
" ('kun', 0.38556843996047974),\n",
" ('discuss', 0.3840706944465637),\n",
" ('netherlands', 0.3836057186126709),\n",
" ('possessions', 0.38359445333480835),\n",
" ('unification', 0.38304412364959717),\n",
" ('stalin', 0.37952858209609985),\n",
" ('jing', 0.37798911333084106),\n",
" ('isabella', 0.37629127502441406),\n",
" ('britain', 0.37476783990859985),\n",
" ('habsburgs', 0.37454771995544434),\n",
" ('treaty', 0.37411004304885864)]"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_vectors.most_similar(positive=['paris', 'japan'], negative=['tokyo'], topn=30)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment