simplymathematics · October 9, 2024 16:01
diff --git a/adjacency.ipynb b/adjacency.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Trying n=10\n",
      "Found 4 nodes that are in the top 10 for both matrices: {8, 2, 3, 5}.\n",
      "Trying n=9\n",
      "Found 3 nodes that are in the top 9 for both matrices: {2, 3, 5}.\n",
      "Trying n=8\n",
      "Found 2 nodes that are in the top 8 for both matrices: {2, 3}.\n",
      "Trying n=7\n",
      "Found a single node that is in the top 7 for both matrices: {3}.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{3}"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# You might need to install the following packages:\n",
    "# python -m pip install networkx pandas numpy\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import networkx as nx\n",
    "\n",
    "\n",
    "# Generate 99 random uptimes between 0 and 1\n",
    "uptimes = np.random.rand(99)\n",
    "\n",
    "# Some random latency matrix I found online\n",
    "latency_matrix = pd.read_csv(\"https://raw.githubusercontent.com/uofa-rzhu3/NetLatency-Data/refs/heads/master/Seattle/SeattleData_1\", delimiter=\"\\t\", header=None)\n",
    "latency_matrix.head()\n",
    "\n",
    "\n",
    "# Initialize the adjacency matrix\n",
    "uptime_matrix = np.ones((99, 99))\n",
    "\n",
    "# Fill in the adjacency matrix\n",
    "for i in range(99):\n",
    "    for j in range(99):\n",
    "        if i != j: # If i and j are not the same, then the joint probability of both being up is the product of their individual probabilities (assuming independence-- otherwise, use Bayes Theorem).\n",
    "            uptime_matrix[i, j] = 1 - uptimes[i] * uptimes[j]\n",
    "        else: # otherwise, the uptime is just the uptime of the node itself\n",
    "            uptime_matrix[i, j] = uptimes[i]\n",
    "\n",
    "# Convert to numpy arrays\n",
    "latency_matrix = latency_matrix.values\n",
    "# uptime_matrix is already a numpy array\n",
    "assert isinstance(latency_matrix, np.ndarray)\n",
    "assert isinstance(uptime_matrix, np.ndarray)\n",
    "\n",
    "\n",
    "\n",
    "A1 = nx.from_numpy_array(latency_matrix)\n",
    "A2 = nx.from_numpy_array(uptime_matrix)\n",
    "\n",
    "degree_centrality1 = nx.degree_centrality(A1)\n",
    "degree_centrality2 = nx.degree_centrality(A2)\n",
    "\n",
    "N_MOST_CENTRAL = 10\n",
    "\n",
    "def find_best_for_both(mat1, mat2, n_most_central = 10):\n",
    "    sorted_mat1 = sorted(mat1.items(), key=lambda x: x[1], reverse=True)\n",
    "    sorted_mat2 = sorted(mat2.items(), key=lambda x: x[1], reverse=True)\n",
    "    # Compute the set of nodes that are in the top n_most_central for both matrices\n",
    "    top_nodes1 = set([node for node, _ in sorted_mat1[:n_most_central]])\n",
    "    top_nodes2 = set([node for node, _ in sorted_mat2[:n_most_central]])\n",
    "    top_nodes = top_nodes1.intersection(top_nodes2)\n",
    "    length = len(top_nodes)\n",
    "    if length == 0:\n",
    "        print(f\"Could not find any nodes that are in the top {n_most_central} for both matrices. Try increasing n_most_central.\")\n",
    "    elif length > 1:\n",
    "        print(f\"Found {length} nodes that are in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
    "    else:\n",
    "        print(f\"Found a single node that is in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
    "    return top_nodes\n",
    "\n",
    "\n",
    "def find_top_node(mat1, mat2, n_most_central = 10, max_loops = 100):\n",
    "    top_nodes = []\n",
    "    i = 1\n",
    "    add_flag = False\n",
    "    subtract_flag = False\n",
    "    while len(top_nodes) != 1 and i <= max_loops:\n",
    "        print(f\"Trying n={n_most_central}\")\n",
    "        if add_flag is True and subtract_flag is True:\n",
    "            print(f\"Could not find a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes.\")\n",
    "            break\n",
    "        top_nodes = find_best_for_both(mat1, mat2, n_most_central=n_most_central)\n",
    "        if len(top_nodes) < 1:\n",
    "            n_most_central = n_most_central + 1\n",
    "            add_flag = True\n",
    "        elif len(top_nodes) > 1:\n",
    "            n_most_central = n_most_central - 1\n",
    "            subtract_flag = True\n",
    "        else:\n",
    "            if i +1 > max_loops:\n",
    "                print(f\"Completed {max_loops} loops without finding a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes. Try increasing max_loops or decreasing the value of n_most_central.\")\n",
    "        i = i + 1\n",
    "    return top_nodes\n",
    "\n",
    "find_top_node(degree_centrality1, degree_centrality2, N_MOST_CENTRAL, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'gzip_classifer'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgzip_classifer\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GzipKNNClassifier\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gzip_classifer'"
     ]
    }
   ],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Trying n=10\n",
	"Found 4 nodes that are in the top 10 for both matrices: {8, 2, 3, 5}.\n",
	"Trying n=9\n",
	"Found 3 nodes that are in the top 9 for both matrices: {2, 3, 5}.\n",
	"Trying n=8\n",
	"Found 2 nodes that are in the top 8 for both matrices: {2, 3}.\n",
	"Trying n=7\n",
	"Found a single node that is in the top 7 for both matrices: {3}.\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"{3}"
	]
	},
	"execution_count": 70,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# You might need to install the following packages:\n",
	"# python -m pip install networkx pandas numpy\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import networkx as nx\n",
	"\n",
	"\n",
	"# Generate 99 random uptimes between 0 and 1\n",
	"uptimes = np.random.rand(99)\n",
	"\n",
	"# Some random latency matrix I found online\n",
	"latency_matrix = pd.read_csv(\"https://raw.githubusercontent.com/uofa-rzhu3/NetLatency-Data/refs/heads/master/Seattle/SeattleData_1\", delimiter=\"\\t\", header=None)\n",
	"latency_matrix.head()\n",
	"\n",
	"\n",
	"# Initialize the adjacency matrix\n",
	"uptime_matrix = np.ones((99, 99))\n",
	"\n",
	"# Fill in the adjacency matrix\n",
	"for i in range(99):\n",
	" for j in range(99):\n",
	" if i != j: # If i and j are not the same, then the joint probability of both being up is the product of their individual probabilities (assuming independence-- otherwise, use Bayes Theorem).\n",
	" uptime_matrix[i, j] = 1 - uptimes[i] * uptimes[j]\n",
	" else: # otherwise, the uptime is just the uptime of the node itself\n",
	" uptime_matrix[i, j] = uptimes[i]\n",
	"\n",
	"# Convert to numpy arrays\n",
	"latency_matrix = latency_matrix.values\n",
	"# uptime_matrix is already a numpy array\n",
	"assert isinstance(latency_matrix, np.ndarray)\n",
	"assert isinstance(uptime_matrix, np.ndarray)\n",
	"\n",
	"\n",
	"\n",
	"A1 = nx.from_numpy_array(latency_matrix)\n",
	"A2 = nx.from_numpy_array(uptime_matrix)\n",
	"\n",
	"degree_centrality1 = nx.degree_centrality(A1)\n",
	"degree_centrality2 = nx.degree_centrality(A2)\n",
	"\n",
	"N_MOST_CENTRAL = 10\n",
	"\n",
	"def find_best_for_both(mat1, mat2, n_most_central = 10):\n",
	" sorted_mat1 = sorted(mat1.items(), key=lambda x: x[1], reverse=True)\n",
	" sorted_mat2 = sorted(mat2.items(), key=lambda x: x[1], reverse=True)\n",
	" # Compute the set of nodes that are in the top n_most_central for both matrices\n",
	" top_nodes1 = set([node for node, _ in sorted_mat1[:n_most_central]])\n",
	" top_nodes2 = set([node for node, _ in sorted_mat2[:n_most_central]])\n",
	" top_nodes = top_nodes1.intersection(top_nodes2)\n",
	" length = len(top_nodes)\n",
	" if length == 0:\n",
	" print(f\"Could not find any nodes that are in the top {n_most_central} for both matrices. Try increasing n_most_central.\")\n",
	" elif length > 1:\n",
	" print(f\"Found {length} nodes that are in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
	" else:\n",
	" print(f\"Found a single node that is in the top {n_most_central} for both matrices: {top_nodes}.\")\n",
	" return top_nodes\n",
	"\n",
	"\n",
	"def find_top_node(mat1, mat2, n_most_central = 10, max_loops = 100):\n",
	" top_nodes = []\n",
	" i = 1\n",
	" add_flag = False\n",
	" subtract_flag = False\n",
	" while len(top_nodes) != 1 and i <= max_loops:\n",
	" print(f\"Trying n={n_most_central}\")\n",
	" if add_flag is True and subtract_flag is True:\n",
	" print(f\"Could not find a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes.\")\n",
	" break\n",
	" top_nodes = find_best_for_both(mat1, mat2, n_most_central=n_most_central)\n",
	" if len(top_nodes) < 1:\n",
	" n_most_central = n_most_central + 1\n",
	" add_flag = True\n",
	" elif len(top_nodes) > 1:\n",
	" n_most_central = n_most_central - 1\n",
	" subtract_flag = True\n",
	" else:\n",
	" if i +1 > max_loops:\n",
	" print(f\"Completed {max_loops} loops without finding a single node that is in the top {n_most_central} for both matrices. Found {len(top_nodes)} equivalent nodes. Try increasing max_loops or decreasing the value of n_most_central.\")\n",
	" i = i + 1\n",
	" return top_nodes\n",
	"\n",
	"find_top_node(degree_centrality1, degree_centrality2, N_MOST_CENTRAL, 10)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [
	{
	"ename": "ModuleNotFoundError",
	"evalue": "No module named 'gzip_classifer'",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
	"Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mgzip_classifer\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GzipKNNClassifier\n",
	"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gzip_classifer'"
	]
	}
	],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "env",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.11.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}