Created
August 5, 2024 12:09
-
-
Save mikk-c/163b515067a1da624c53adb28fdd16f2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "690e3648-8352-4576-a76a-885edac5f8ef", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import networkx as nx\n", | |
"from gensim.models import Word2Vec\n", | |
"from sklearn.manifold import TSNE\n", | |
"from sklearn.cluster import KMeans\n", | |
"from sklearn.metrics import normalized_mutual_info_score" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "0405b4a3-e629-483c-b912-15954165156a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"H = nx.read_edgelist(\"1/data.txt\", create_using = nx.Graph(), delimiter = \"\\t\", nodetype = int)\n", | |
"G = nx.Graph()\n", | |
"G.add_nodes_from(sorted(H.nodes))\n", | |
"G.add_edges_from(H.edges)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "8623084e-dba5-49db-aa23-3582ec362681", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"array([2, 2, 2, 3, 1, 2, 0, 2, 3, 0, 2, 1, 1, 1, 2, 2, 1, 2, 2, 3, 1, 1,\n", | |
" 0, 1, 1, 2, 3, 1, 2, 1, 2, 0, 2, 2, 0, 2, 2, 0, 3, 2, 1, 0, 0, 1,\n", | |
" 3, 0, 0, 1, 2, 1, 3, 0, 3, 2, 3, 0, 3, 3, 0, 0, 3, 1, 3, 3, 2, 1,\n", | |
" 1, 0, 3, 2, 3, 3, 3, 0, 1, 2, 2, 2, 3, 0, 0, 3, 2, 3, 1, 0, 1, 1,\n", | |
" 0, 0, 0, 1, 2, 3, 0, 0, 2, 3, 2, 1, 2, 1, 3, 1, 2, 1, 1, 1, 0, 0,\n", | |
" 2, 1, 1, 3, 2, 0, 3, 3, 3, 0, 2, 0, 2, 3, 2, 0, 1, 3, 3, 3, 2, 2,\n", | |
" 1, 3, 3, 1, 1, 1, 1, 2, 1, 2, 3, 2, 2, 3, 1, 1, 0, 3, 0, 2, 0, 2,\n", | |
" 3, 1, 1, 0, 3, 1, 2, 0, 0, 1, 0, 2, 2, 1, 0, 0, 2, 0, 0, 2, 0, 2,\n", | |
" 2, 1, 1, 2, 1, 2, 1, 0, 0, 2, 2, 2, 0, 2, 0, 1, 0, 3, 3, 1, 0, 3,\n", | |
" 2, 1, 0, 3, 1, 2, 3, 1, 2, 2, 3, 0, 2, 1, 1, 2, 2, 1, 0, 0, 2, 0,\n", | |
" 2, 1, 3, 2, 1, 1, 1, 3, 1, 3, 3, 1, 2, 1, 2, 1, 1, 0, 3, 1, 0, 1,\n", | |
" 1, 2, 0, 0, 1, 2, 2, 2, 3, 2, 3, 0, 3, 3, 3, 3, 3, 0, 3, 2, 1, 1,\n", | |
" 0, 0, 1, 1, 1, 3, 3, 0, 3, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 3, 0,\n", | |
" 0, 0, 0, 0, 3, 3, 3, 1, 3, 0, 2, 3, 0, 3, 1, 2, 0, 0, 1, 2, 2, 3,\n", | |
" 0, 3, 1, 3, 1, 0, 3, 3, 1, 0, 1, 3, 0, 2, 2, 2, 1, 1, 1, 3, 3, 2,\n", | |
" 1, 1, 2, 1, 0, 3, 1, 2, 1, 3, 1, 1, 3, 1, 2, 1, 1, 1, 3, 0, 2, 3,\n", | |
" 0, 2, 0, 2, 1, 1, 2, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 2, 2, 2, 3, 2,\n", | |
" 1, 1, 0, 0, 3, 0, 2, 3, 0, 1, 2, 1, 2, 2, 2, 1, 2, 0, 3, 2, 0, 2,\n", | |
" 1, 3, 0, 3, 1, 0, 1, 1, 0, 3, 2, 3, 3, 2, 3, 2, 0, 3, 3, 1, 0, 0,\n", | |
" 3, 1, 1, 0, 3, 2, 3, 2, 3, 3, 0, 1, 3, 2, 0, 1, 1, 0, 3, 3, 1, 0,\n", | |
" 3, 3, 2, 3, 1, 1, 2, 0, 0, 2, 2, 3, 2, 1, 0, 0, 3, 0, 1, 2, 0, 0,\n", | |
" 3, 1, 3, 2, 3, 3, 0, 1, 0, 2, 1, 1, 1, 3, 1, 2, 2, 3, 3, 3, 0, 3,\n", | |
" 1, 1, 2, 3, 2, 3, 1, 0, 1, 0, 1, 0, 3, 3, 3, 0, 3, 1, 2, 1, 3, 0,\n", | |
" 2, 3, 2, 3, 1, 2, 1, 0, 0, 2, 2, 3, 3, 0, 2, 2, 1, 2, 1, 0, 2, 3,\n", | |
" 1, 0, 1, 1, 3, 2, 2, 1, 2, 1, 0, 3, 1, 2, 0, 1, 3, 0, 3, 0, 0, 2,\n", | |
" 0, 3, 3, 1, 0, 0, 3, 2, 3, 1, 3, 3, 1, 1, 2, 1, 0, 3, 1, 1, 2, 2,\n", | |
" 2, 1, 3, 2, 3, 0, 2, 1, 3, 0, 1, 1, 2, 0, 3, 3, 3, 0, 3, 0, 3, 2,\n", | |
" 1, 2, 2, 2, 1, 0, 1, 0, 3, 0, 1, 0, 2, 2, 3, 3, 0, 1, 1, 1, 1, 2,\n", | |
" 3, 2, 0, 3, 1, 1, 3, 3, 2, 1, 2, 2, 2, 3, 0, 3, 2, 1, 1, 0, 3, 3,\n", | |
" 0, 0, 1, 0, 1, 1, 1, 0, 0, 2, 1, 3, 3, 1, 1, 3, 1, 0, 3, 2, 0, 1,\n", | |
" 2, 3, 1, 0, 3, 3, 3, 1, 2, 0, 3, 1, 1, 1, 2, 2, 1, 2, 1, 0, 3, 0,\n", | |
" 1, 2, 3, 3, 3, 0, 2, 2, 0, 3, 1, 1, 2, 1, 1, 3, 3, 1, 1, 2, 3, 3,\n", | |
" 2, 2, 2, 0, 2, 3, 2, 1, 2, 1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2,\n", | |
" 1, 2, 3, 2, 0, 1, 1, 0, 3, 0, 1, 2, 1, 0, 2, 0, 3, 3, 3, 0, 2, 2,\n", | |
" 2, 1, 1, 3, 1, 1, 2, 3, 1, 2, 1, 1, 3, 3, 2, 1, 2, 0, 3, 0, 1, 0,\n", | |
" 1, 2, 0, 2, 1, 1, 2, 0, 0, 2, 3, 0, 1, 3, 3, 0, 1, 2, 1, 1, 0, 1,\n", | |
" 1, 0, 1, 1, 2, 1, 0, 3, 1, 0, 0, 3, 2, 0, 3, 1, 3, 0, 3, 1, 0, 3,\n", | |
" 0, 2, 1, 2, 2, 1, 2, 0, 2, 3, 2, 0, 1, 0, 1, 2, 3, 1, 3, 0, 0, 1,\n", | |
" 3, 1, 0, 1, 2, 1, 0, 2, 1, 1, 3, 2, 1, 1, 0, 2, 0, 1, 3, 2, 0, 0,\n", | |
" 1, 2, 1, 1, 2, 0, 1, 1, 0, 3, 0, 3, 0, 1, 1, 1, 2, 3, 1, 3, 3, 0,\n", | |
" 1, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 1, 1, 3, 3, 2,\n", | |
" 2, 3, 1, 3, 3, 1, 1, 2, 3, 0, 1, 3, 3, 1, 2, 2, 0, 3, 1, 1, 3, 1,\n", | |
" 0, 0, 1, 1, 3, 0, 0, 1, 1, 2, 2, 0, 2, 3, 2, 0, 3, 2, 2, 1, 0, 2,\n", | |
" 3, 0, 0, 0, 1, 3, 1, 2, 0, 2, 2, 2, 1, 2, 1, 2, 1, 2, 3, 0, 0, 0,\n", | |
" 3, 1, 1, 2, 1, 1, 2, 2, 1, 0, 2, 3, 1, 3, 0, 3, 0, 2, 1, 1, 3, 1,\n", | |
" 3, 1, 1, 1, 0, 3, 3, 3, 2, 1], dtype=int32)" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rndwalks = list(nx.generate_random_paths(G, 10000, path_length = 6))\n", | |
"model = Word2Vec(sentences = rndwalks, vector_size = 32, min_count = 1, workers = 8)\n", | |
"\n", | |
"nodemap = [None] * len(G.nodes)\n", | |
"for k in model.wv.key_to_index:\n", | |
" nodemap[k] = model.wv.key_to_index[k]\n", | |
"\n", | |
"reducer = TSNE(n_components = 2, init = \"pca\")\n", | |
"embeddings = reducer.fit_transform(model.wv.vectors[nodemap])\n", | |
"reducer = KMeans(n_clusters = 4)\n", | |
"clusters = reducer.fit(embeddings).labels_\n", | |
"\n", | |
"clusters" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "f00c1779-b5c2-4f40-8ca7-dbf9ed52464c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"nodes = list(G.nodes)\n", | |
"\n", | |
"ground_truth = {}\n", | |
"with open(\"1/nodes.txt\", 'r') as f:\n", | |
" for line in f:\n", | |
" fields = line.strip().split('\\t')\n", | |
" ground_truth[int(fields[0])] = int(fields[1])\n", | |
"\n", | |
"ground_truth = [ground_truth[i] for i in nodes]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "f4cc816f-e8fb-4186-b82e-e20edcb23d2c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[0, 0, 0, 1, 0, 0, 2, 0, 1, 2]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"lp = list(nx.algorithms.community.asyn_lpa_communities(G))\n", | |
"lp = {n: c for c in range(len(lp)) for n in lp[c]}\n", | |
"lp_array = []\n", | |
"for n in G.nodes:\n", | |
" lp_array.append(lp[n])\n", | |
"\n", | |
"lp_array[:10]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "36cda6a2-2c92-497a-b237-59b47224fae0", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.9439976142363943\n", | |
"0.8323939993712498\n" | |
] | |
} | |
], | |
"source": [ | |
"print(normalized_mutual_info_score(clusters, ground_truth))\n", | |
"print(normalized_mutual_info_score(lp_array, ground_truth))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.12.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment