Skip to content

Instantly share code, notes, and snippets.

@simplymathematics
Last active October 9, 2024 16:01
Show Gist options
  • Save simplymathematics/65f3e77f8ed5024bcf9a3df7afa9205e to your computer and use it in GitHub Desktop.
Save simplymathematics/65f3e77f8ed5024bcf9a3df7afa9205e to your computer and use it in GitHub Desktop.
Use matrix centrality to find the most central node according to two criteria
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trying n=10\n",
"Found 4 nodes that are in the top 10 for both matrices: {8, 2, 3, 5}.\n",
"Trying n=9\n",
"Found 3 nodes that are in the top 9 for both matrices: {2, 3, 5}.\n",
"Trying n=8\n",
"Found 2 nodes that are in the top 8 for both matrices: {2, 3}.\n",
"Trying n=7\n",
"Found a single node that is in the top 7 for both matrices: {3}.\n"
]
},
{
"data": {
"text/plain": [
"{3}"
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# You might need to install the following packages:\n",
"# python -m pip install networkx pandas numpy\n",
"import numpy as np\n",
"import pandas as pd\n",
"import networkx as nx\n",
"\n",
"\n",
"# Generate 99 random uptimes between 0 and 1\n",
"uptimes = np.random.rand(99)\n",
"\n",
"# Some random latency matrix I found online\n",
"latency_matrix = pd.read_csv(\"https://raw.githubusercontent.com/uofa-rzhu3/NetLatency-Data/refs/heads/master/Seattle/SeattleData_1\", delimiter=\"\\t\", header=None)\n",
"latency_matrix.head()\n",
"\n",
"\n",
"# Initialize the adjacency matrix\n",
"uptime_matrix = np.ones((99, 99))\n",
"\n",
"# Fill in the adjacency matrix\n",
"for i in range(99):\n",
" for j in range(99):\n",
" if i != j: # If i and j are not the same, then the joint probability of both being up is the product of their individual probabilities (assuming independence-- otherwise, use Bayes Theorem).\n",
" uptime_matrix[i, j] = 1 - uptimes[i] * uptimes[j]\n",
" else: # otherwise, the uptime is just the uptime of the node itself\n",
" uptime_matrix[i, j] = uptimes[i]\n",
"\n",
"# Convert to numpy arrays\n",
"latency_matrix = latency_matrix.values\n",
"# uptime_matrix is already a numpy array\n",
"assert isinstance(latency_matrix, np.ndarray)\n",
"assert isinstance(uptime_matrix, np.ndarray)\n",
"\n",
"\n",
"\n",
"A1 = nx.from_numpy_array(latency_matrix)\n",
"A2 = nx.from_numpy_array(uptime_matrix)\n",
"\n",
"degree_centrality1 = nx.degree_centrality(A1)\n",
"degree_centrality2 = nx.degree_centrality(A2)\n",
"\n",
"N_MOST_CENTRAL = 10\n",
"\n",
"def find_best_for_both(mat1, mat2, n_most_central = 10):\n",
" sorted_mat1 = sorted(mat1.items(), key=lambda x: x[1], reverse=True)\n",
" sorted_mat2 = sorted(mat2.items(), key=lambda x: x[1], reverse=True)\n",
" # Compute the set of nodes that are in the top n_most_central for both matrices\n",
" top_nodes1 = set([node for node, _ in sorted_mat1[:n_most_central]])\n",
" top_nodes2 = set([node for node, _ in sorted_mat2[:n_most_central]])\n",
" top_nodes = top_nodes1.intersection(top_nodes2)\n",
" length = len(top_nodes)\n",
" if length == 0:\n",
" print(f\"Could not find any nodes that are in the top {n_most_central} for both matrices. Try increasing n_most_central.\")\n",
" elif length > 1:\n",
" print(f\"Found {length} nodes that are in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
" else:\n",
" print(f\"Found a single node that is in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
" return top_nodes\n",
"\n",
"\n",
"def find_top_node(mat1, mat2, n_most_central = 10, max_loops = 100):\n",
" top_nodes = []\n",
" i = 1\n",
" add_flag = False\n",
" subtract_flag = False\n",
" while len(top_nodes) != 1 and i <= max_loops:\n",
" print(f\"Trying n={n_most_central}\")\n",
" if add_flag is True and subtract_flag is True:\n",
" print(f\"Could not find a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes.\")\n",
" break\n",
" top_nodes = find_best_for_both(mat1, mat2, n_most_central=n_most_central)\n",
" if len(top_nodes) < 1:\n",
" n_most_central = n_most_central + 1\n",
" add_flag = True\n",
" elif len(top_nodes) > 1:\n",
" n_most_central = n_most_central - 1\n",
" subtract_flag = True\n",
" else:\n",
" if i +1 > max_loops:\n",
" print(f\"Completed {max_loops} loops without finding a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes. Try increasing max_loops or decreasing the value of n_most_central.\")\n",
" i = i + 1\n",
" return top_nodes\n",
"\n",
"find_top_node(degree_centrality1, degree_centrality2, N_MOST_CENTRAL, 10)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'gzip_classifer'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgzip_classifer\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GzipKNNClassifier\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gzip_classifer'"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment