Skip to content

Instantly share code, notes, and snippets.

@mikk-c
Created August 5, 2024 12:09
Show Gist options
  • Save mikk-c/163b515067a1da624c53adb28fdd16f2 to your computer and use it in GitHub Desktop.
Save mikk-c/163b515067a1da624c53adb28fdd16f2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "690e3648-8352-4576-a76a-885edac5f8ef",
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"from gensim.models import Word2Vec\n",
"from sklearn.manifold import TSNE\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.metrics import normalized_mutual_info_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "0405b4a3-e629-483c-b912-15954165156a",
"metadata": {},
"outputs": [],
"source": [
"H = nx.read_edgelist(\"1/data.txt\", create_using = nx.Graph(), delimiter = \"\\t\", nodetype = int)\n",
"G = nx.Graph()\n",
"G.add_nodes_from(sorted(H.nodes))\n",
"G.add_edges_from(H.edges)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8623084e-dba5-49db-aa23-3582ec362681",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 2, 2, 3, 1, 2, 0, 2, 3, 0, 2, 1, 1, 1, 2, 2, 1, 2, 2, 3, 1, 1,\n",
" 0, 1, 1, 2, 3, 1, 2, 1, 2, 0, 2, 2, 0, 2, 2, 0, 3, 2, 1, 0, 0, 1,\n",
" 3, 0, 0, 1, 2, 1, 3, 0, 3, 2, 3, 0, 3, 3, 0, 0, 3, 1, 3, 3, 2, 1,\n",
" 1, 0, 3, 2, 3, 3, 3, 0, 1, 2, 2, 2, 3, 0, 0, 3, 2, 3, 1, 0, 1, 1,\n",
" 0, 0, 0, 1, 2, 3, 0, 0, 2, 3, 2, 1, 2, 1, 3, 1, 2, 1, 1, 1, 0, 0,\n",
" 2, 1, 1, 3, 2, 0, 3, 3, 3, 0, 2, 0, 2, 3, 2, 0, 1, 3, 3, 3, 2, 2,\n",
" 1, 3, 3, 1, 1, 1, 1, 2, 1, 2, 3, 2, 2, 3, 1, 1, 0, 3, 0, 2, 0, 2,\n",
" 3, 1, 1, 0, 3, 1, 2, 0, 0, 1, 0, 2, 2, 1, 0, 0, 2, 0, 0, 2, 0, 2,\n",
" 2, 1, 1, 2, 1, 2, 1, 0, 0, 2, 2, 2, 0, 2, 0, 1, 0, 3, 3, 1, 0, 3,\n",
" 2, 1, 0, 3, 1, 2, 3, 1, 2, 2, 3, 0, 2, 1, 1, 2, 2, 1, 0, 0, 2, 0,\n",
" 2, 1, 3, 2, 1, 1, 1, 3, 1, 3, 3, 1, 2, 1, 2, 1, 1, 0, 3, 1, 0, 1,\n",
" 1, 2, 0, 0, 1, 2, 2, 2, 3, 2, 3, 0, 3, 3, 3, 3, 3, 0, 3, 2, 1, 1,\n",
" 0, 0, 1, 1, 1, 3, 3, 0, 3, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 3, 0,\n",
" 0, 0, 0, 0, 3, 3, 3, 1, 3, 0, 2, 3, 0, 3, 1, 2, 0, 0, 1, 2, 2, 3,\n",
" 0, 3, 1, 3, 1, 0, 3, 3, 1, 0, 1, 3, 0, 2, 2, 2, 1, 1, 1, 3, 3, 2,\n",
" 1, 1, 2, 1, 0, 3, 1, 2, 1, 3, 1, 1, 3, 1, 2, 1, 1, 1, 3, 0, 2, 3,\n",
" 0, 2, 0, 2, 1, 1, 2, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 2, 2, 2, 3, 2,\n",
" 1, 1, 0, 0, 3, 0, 2, 3, 0, 1, 2, 1, 2, 2, 2, 1, 2, 0, 3, 2, 0, 2,\n",
" 1, 3, 0, 3, 1, 0, 1, 1, 0, 3, 2, 3, 3, 2, 3, 2, 0, 3, 3, 1, 0, 0,\n",
" 3, 1, 1, 0, 3, 2, 3, 2, 3, 3, 0, 1, 3, 2, 0, 1, 1, 0, 3, 3, 1, 0,\n",
" 3, 3, 2, 3, 1, 1, 2, 0, 0, 2, 2, 3, 2, 1, 0, 0, 3, 0, 1, 2, 0, 0,\n",
" 3, 1, 3, 2, 3, 3, 0, 1, 0, 2, 1, 1, 1, 3, 1, 2, 2, 3, 3, 3, 0, 3,\n",
" 1, 1, 2, 3, 2, 3, 1, 0, 1, 0, 1, 0, 3, 3, 3, 0, 3, 1, 2, 1, 3, 0,\n",
" 2, 3, 2, 3, 1, 2, 1, 0, 0, 2, 2, 3, 3, 0, 2, 2, 1, 2, 1, 0, 2, 3,\n",
" 1, 0, 1, 1, 3, 2, 2, 1, 2, 1, 0, 3, 1, 2, 0, 1, 3, 0, 3, 0, 0, 2,\n",
" 0, 3, 3, 1, 0, 0, 3, 2, 3, 1, 3, 3, 1, 1, 2, 1, 0, 3, 1, 1, 2, 2,\n",
" 2, 1, 3, 2, 3, 0, 2, 1, 3, 0, 1, 1, 2, 0, 3, 3, 3, 0, 3, 0, 3, 2,\n",
" 1, 2, 2, 2, 1, 0, 1, 0, 3, 0, 1, 0, 2, 2, 3, 3, 0, 1, 1, 1, 1, 2,\n",
" 3, 2, 0, 3, 1, 1, 3, 3, 2, 1, 2, 2, 2, 3, 0, 3, 2, 1, 1, 0, 3, 3,\n",
" 0, 0, 1, 0, 1, 1, 1, 0, 0, 2, 1, 3, 3, 1, 1, 3, 1, 0, 3, 2, 0, 1,\n",
" 2, 3, 1, 0, 3, 3, 3, 1, 2, 0, 3, 1, 1, 1, 2, 2, 1, 2, 1, 0, 3, 0,\n",
" 1, 2, 3, 3, 3, 0, 2, 2, 0, 3, 1, 1, 2, 1, 1, 3, 3, 1, 1, 2, 3, 3,\n",
" 2, 2, 2, 0, 2, 3, 2, 1, 2, 1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2,\n",
" 1, 2, 3, 2, 0, 1, 1, 0, 3, 0, 1, 2, 1, 0, 2, 0, 3, 3, 3, 0, 2, 2,\n",
" 2, 1, 1, 3, 1, 1, 2, 3, 1, 2, 1, 1, 3, 3, 2, 1, 2, 0, 3, 0, 1, 0,\n",
" 1, 2, 0, 2, 1, 1, 2, 0, 0, 2, 3, 0, 1, 3, 3, 0, 1, 2, 1, 1, 0, 1,\n",
" 1, 0, 1, 1, 2, 1, 0, 3, 1, 0, 0, 3, 2, 0, 3, 1, 3, 0, 3, 1, 0, 3,\n",
" 0, 2, 1, 2, 2, 1, 2, 0, 2, 3, 2, 0, 1, 0, 1, 2, 3, 1, 3, 0, 0, 1,\n",
" 3, 1, 0, 1, 2, 1, 0, 2, 1, 1, 3, 2, 1, 1, 0, 2, 0, 1, 3, 2, 0, 0,\n",
" 1, 2, 1, 1, 2, 0, 1, 1, 0, 3, 0, 3, 0, 1, 1, 1, 2, 3, 1, 3, 3, 0,\n",
" 1, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 1, 1, 3, 3, 2,\n",
" 2, 3, 1, 3, 3, 1, 1, 2, 3, 0, 1, 3, 3, 1, 2, 2, 0, 3, 1, 1, 3, 1,\n",
" 0, 0, 1, 1, 3, 0, 0, 1, 1, 2, 2, 0, 2, 3, 2, 0, 3, 2, 2, 1, 0, 2,\n",
" 3, 0, 0, 0, 1, 3, 1, 2, 0, 2, 2, 2, 1, 2, 1, 2, 1, 2, 3, 0, 0, 0,\n",
" 3, 1, 1, 2, 1, 1, 2, 2, 1, 0, 2, 3, 1, 3, 0, 3, 0, 2, 1, 1, 3, 1,\n",
" 3, 1, 1, 1, 0, 3, 3, 3, 2, 1], dtype=int32)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rndwalks = list(nx.generate_random_paths(G, 10000, path_length = 6))\n",
"model = Word2Vec(sentences = rndwalks, vector_size = 32, min_count = 1, workers = 8)\n",
"\n",
"nodemap = [None] * len(G.nodes)\n",
"for k in model.wv.key_to_index:\n",
" nodemap[k] = model.wv.key_to_index[k]\n",
"\n",
"reducer = TSNE(n_components = 2, init = \"pca\")\n",
"embeddings = reducer.fit_transform(model.wv.vectors[nodemap])\n",
"reducer = KMeans(n_clusters = 4)\n",
"clusters = reducer.fit(embeddings).labels_\n",
"\n",
"clusters"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f00c1779-b5c2-4f40-8ca7-dbf9ed52464c",
"metadata": {},
"outputs": [],
"source": [
"nodes = list(G.nodes)\n",
"\n",
"ground_truth = {}\n",
"with open(\"1/nodes.txt\", 'r') as f:\n",
" for line in f:\n",
" fields = line.strip().split('\\t')\n",
" ground_truth[int(fields[0])] = int(fields[1])\n",
"\n",
"ground_truth = [ground_truth[i] for i in nodes]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f4cc816f-e8fb-4186-b82e-e20edcb23d2c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0, 0, 0, 1, 0, 0, 2, 0, 1, 2]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lp = list(nx.algorithms.community.asyn_lpa_communities(G))\n",
"lp = {n: c for c in range(len(lp)) for n in lp[c]}\n",
"lp_array = []\n",
"for n in G.nodes:\n",
" lp_array.append(lp[n])\n",
"\n",
"lp_array[:10]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "36cda6a2-2c92-497a-b237-59b47224fae0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.9439976142363943\n",
"0.8323939993712498\n"
]
}
],
"source": [
"print(normalized_mutual_info_score(clusters, ground_truth))\n",
"print(normalized_mutual_info_score(lp_array, ground_truth))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment