{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import glob\n", "import os\n", "import glob\n", "import subprocess\n", "\n", "from os import path\n", "\n", "np.seterr(all='raise')\n", "\n", "def metis_partition_file_converter(inp, out):\n", " import sys\n", "\n", " partitions = {}\n", "\n", " def add(el, idx):\n", " if idx not in partitions:\n", " partitions[idx] = [el]\n", " else:\n", " partitions[idx].append(el)\n", "\n", "\n", " with open(inp, \"r+\") as f:\n", " el = 0\n", " for line in f:\n", " idx = int(line.strip())\n", " add(el, idx)\n", " el += 1\n", "\n", " with open(out, \"w+\") as f:\n", " idxs = list(partitions.keys())\n", " idxs.sort()\n", " assert(idxs[0] <= idxs[-1])\n", " for ix in idxs:\n", " f.write(\" \".join([str(e) for e in partitions[ix]]) + \"\\n\")\n", "\n", "\n", "import numpy as np\n", "\n", "\n", "def graph_and_cut_to_numpy(gf, cf):\n", "\n", " graph = {}\n", " with open(gf, \"r\") as f:\n", " first_row = f.readline().strip(\"\\n\").split(\" \")\n", " while [] in first_row:\n", " first_row.remove([])\n", " print(\"n, e\", first_row)\n", " n, e = [int(e) for e in first_row]\n", " for i, row in enumerate(f.readlines()):\n", " graph[i + 1] = []\n", " for v in row.strip(\"\\n\").split(\" \"):\n", " if v == \"\":\n", " continue\n", " assert(int(v))\n", " graph[i + 1].append(int(v))\n", " try:\n", " graph[int(v)].append(int(i + 1))\n", " except KeyError:\n", " graph[int(v)] = [int(i + 1)]\n", "\n", " clusters = []\n", " #TOFIX: clusters are 0 index\n", " if cf:\n", " with open(cf, \"r\") as f:\n", " n_counter = 0\n", " e_counter = 0\n", " for i, row in enumerate(f.readlines()):\n", " clusters.append([int(e) + 1 for e in row.strip(\"\\n\").split(\" \") if e != \"\"])\n", " #print(clusters[-1])\n", " n_counter += 1\n", " e_counter += len(clusters[-1])\n", " else:\n", " clusters = [[i for i in range(1, len(graph) + 1)]]\n", "\n", " return graph, clusters\n", "\n", "\n", "def test(graph_f, cut_f=None, also_test_full_graph=False):\n", " graph, cuts = graph_and_cut_to_numpy(graph_f, cut_f)\n", " #graph, cuts = graph_and_cut_to_numpy(\"graphs/whitaker3.graph\", \"old_results/whitaker3.graph.part.94.chaco\")\n", " subgs = []\n", "\n", " for clidx, cut in enumerate(cuts):\n", "\n", " print(\"Start working on cluster;\", clidx)\n", " if len(cut) == 1:\n", " print(\"Cut len is 1, continue\")\n", " continue\n", "\n", " v_set = set()\n", " for el in cut:\n", " v_set.add(int(el))\n", "\n", " #print(v_set)\n", " n = len(v_set)\n", "\n", " subg = {k:[] for k in cut}\n", " #print(\"first element in cut\", cut[0])\n", " for k in graph:\n", " for v in graph[k]:\n", " if v in v_set and k in v_set:\n", " subg[k].append(v)\n", "\n", "\n", " rekey_dict = {v:i for i, v in enumerate(sorted(list(subg.keys())))}\n", " rekey_reverse = {i:v for i, v in enumerate(sorted(list(subg.keys())))}\n", "\n", " success = True\n", " #new_subg = [[0 for _ in range(n)] for _ in range(n)]\n", " new_subg = sparse.dok_matrix((n, n))\n", "\n", " for k in subg: \n", "\n", " if len(subg[k]) == 0:\n", " print(\"WARNING - disconnected cluster;\", clidx, \"number of nodes in subgraph is;\", len(cut))\n", " success = False\n", " break\n", "\n", " for v in subg[k]:\n", " new_subg[rekey_dict[k], rekey_dict[v]] = 1\n", " new_subg[rekey_dict[v], rekey_dict[k]] = 1\n", "\n", " #new_subg[rekey_dict[k], rekey_dict[v]] += 1\n", " #new_subg[rekey_dict[v], rekey_dict[k]] += 1\n", "\n", " if not success:\n", " continue\n", "\n", "\n", " steps = test_convergence(new_subg, 0.01)\n", "\n", " print(\"Cluster\", clidx, \"of size: \", n, \"took n steps to converge;\", steps)\n", "\n", " import math\n", " print(\"steps/ log2 nodes;\", steps/math.log2(n))\n", "\n", " #Scipy and numpy can't calculate eigenvalues quick\n", " \n", " n = len(graph)\n", " if also_test_full_graph: #n <= 5000\n", " trivial_subg = sparse.dok_matrix((n, n))\n", " for i, k in enumerate(graph):\n", " for j, _ in enumerate(graph[k]):\n", " #trivial_subg[i][j] = 1\n", " trivial_subg[i, j] = 1\n", "\n", " steps = test_convergence(trivial_subg, 0.01)\n", " print(\"Whole graph took n steps to converge:\", steps)\n", " import math\n", " print(\"steps/ log2 nodes\", steps/math.log2(n))\n", " \n", "\n", "\n", "import glob\n", "\n", "\n", "def run_decomp(graph_files, phi=0.01):\n", " for graph in graph_files:\n", " print(\"decomp on;\", graph)\n", "\n", " #run decomposition on all graphs with time limit\n", " out_path = graph + \".out\"\n", " #os.system\n", " #subprocess.check_call('time -p timeout 15m ./a.out -S --G_phi=0.01 --H_phi=0.4 --vol=1 --h_ratio=0. -f $graph > \"$out_path\"', shell=True)\n", " try:\n", " subprocess.check_call(\"time -p timeout 15m ./a.out -S --G_phi=\" + str(phi) + \" --H_phi=0.4 --vol=1 --h_ratio=0. -f \" + graph + \" > \" + out_path, shell=True)\n", " except Exception as e:\n", " print(\"Decomp failed with\", e)\n", " #Too long time?\n", " continue\n", "\n", " #TODO return code\n", " if not glob.glob(graph + \"cut.txt\"):\n", " print(\"decomp on\", graph, \"did not finish in time\")\n", " continue\n", "\n", " #how many clusters, k, did we get?\n", " with open(out_path) as f:\n", " lns = f.readlines()\n", " cluster_line = next(l for l in lns if \"n clusters\" in l)\n", " cluster_line.strip(\"\\n\")\n", " n_clusters = int(cluster_line.split(\";\")[1])\n", "\n", " if n_clusters <= 1:\n", " print(\"No cut found, continue\")\n", " continue\n", "\n", " print(\"n clusters found\", n_clusters)\n", " #run metis on k\n", " metis_stdio_path = graph + \".out.metis\"\n", "\n", "\n", " try:\n", " subprocess.check_call(\"/usr/bin/gpmetis -ufactor=1000 \" + graph + \" \" + str(n_clusters) + \" -contig > \" + metis_stdio_path, shell=True)\n", " metis_file = graph + \".part.\" + str(n_clusters)\n", " decomp_file = graph + \"cut.txt\"\n", " \n", " metis_partition_file_converter(metis_file, metis_file)\n", "\n", "\n", " except Exception as e:\n", " print(\"METIS failed\", e)\n", "\n", " rw_graph = graph + \".row_whole\"\n", " rw_file_ours = graph + \".rw_ours\"\n", " rw_file_metis = graph + \".rw_metis\"\n", "\n", " #TOFIX This should be saving to file!\n", " try:\n", " print(\"--- DECOMP OURS ---\")\n", " test(graph, decomp_file)\n", " print(\"--- DECOMP METIS ---\")\n", " test(graph, metis_file) \n", " except Exception as e:\n", " print(\"Failed rw?\", e) \n", "\n", " bname = os.path.basename(graph).split(\".\")[0]\n", " os.system('mkdir -p results/\"$bname\"')\n", " for f in glob.glob(\"\".join(graph.split(\".\")[:-1]) + \"*\"):\n", " if f != graph:\n", " os.system('mv $f results/\"$bname\"/')\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from sklearn.preprocessing import normalize\n", "from sklearn.metrics import pairwise_distances\n", "import scipy.sparse as sparse\n", "import torch\n", "import scipy\n", "import time\n", "import datetime\n", "import threading\n", "\n", "#TODO check this for double links, self-loops\n", "def test_convergence(matrix, threshold):\n", "\n", " n = matrix.shape[0]\n", " e = matrix.sum()\n", "\n", " print(\"n;\", n)\n", " print(\"e;\", e)\n", "\n", " #Number of in-links\n", " #Axii are inverted for some reason\n", " colsums = [i.item() for i in scipy.array((matrix.sum(axis=0))).T]\n", " uniform = np.array([0 for _ in range(n)], dtype=np.float64)\n", "\n", " #Stationary distribution is uniform normalized by in-links\n", " for i in range(n):\n", " uniform[i] = colsums[i]/e\n", " \n", " assert(0.99 <= uniform.sum() <= 1.01)\n", " assert(-0.01 < sum(colsums) - e < 0.01)\n", "\n", " #TODO seed the random start\n", " walk = np.array([1.] + [0 for _ in range(n - 1)], dtype=np.float64)\n", " walk = torch.DoubleTensor(walk)\n", "\n", " #Every node has positive in and out link\n", " sums = matrix.sum(axis=1)\n", " assert((sums != 0).any())\n", " sums = matrix.sum(axis=0)\n", " assert((sums != 0).any())\n", "\n", " rowsums = matrix.sum(axis=1)\n", " for i in range(n):\n", " matrix[i, i] = rowsums[i].item()\n", " cnt = rowsums[i] * 2\n", " matrix[i] /= cnt\n", " #assert(0.999 <= matrix[i].sum() <= 1.001)\n", "\n", " matrix = matrix.tocoo() \n", " values = matrix.data\n", " indices = np.vstack((matrix.row, matrix.col))\n", "\n", " #Number of edges and self-loops\n", " assert(len(matrix.row) == len(matrix.col) == e + n)\n", "\n", " matrix = torch.sparse.DoubleTensor(torch.LongTensor(indices), torch.DoubleTensor(values), torch.Size(matrix.shape))\n", "\n", " all_ones = torch.DoubleTensor([1. for n in range(n)])\n", "\n", " assert(torch.allclose(all_ones, torch.sparse.sum(matrix, dim=1).to_dense()))\n", "\n", " uniform = torch.from_numpy(uniform)\n", " dist = torch.norm(walk - uniform, 1)\n", "\n", " steps = 0\n", "\n", " if torch.cuda.is_available(): \n", " matrix = matrix.to(\"cuda\")\n", " walk = walk.to(\"cuda\")\n", " uniform = uniform.to(\"cuda\")\n", " all_ones = all_ones.to(\"cuda\")\n", "\n", " print(\"Start walk\")\n", "\n", " step_lim = 10*n**3\n", " print_max = 15\n", " timeout_minutes = 10\n", "\n", " time_start = time.time()\n", " \n", " TIMER = 15.0\n", "\n", " def progress():\n", " p = threading.Timer(TIMER, lambda: progress())\n", " p.daemon=True\n", " p.start()\n", " print(datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"), \" - \", dist)\n", "\n", " #p = threading.Timer(TIMER, lambda: progress())\n", " #p.daemon=True\n", " #p.start()\n", "\n", " save_sum = dist.item()\n", " mm_transposed = matrix.transpose(1, 0)\n", " while dist > threshold and steps < step_lim: \n", " walk = torch.sparse.mm(mm_transposed, walk.unsqueeze(-1)).squeeze(-1)\n", " #walk = torch.sparse.mm(matrix, walk.unsqueeze(-1)).squeeze(-1)\n", "\n", " assert(0.99 < torch.sum(walk) < 1.01)\n", " assert(0.99 < torch.sum(uniform) < 1.01)\n", " assert(-0.02 < torch.sum(dist) < 2.02)\n", " assert(-0.01 < torch.sum(walk - uniform) < 0.01)\n", "\n", " steps += 1\n", " assert(walk.shape == uniform.shape)\n", " dist = torch.norm(walk - uniform, 1)\n", " #Naive summation for higher precision\n", " #dist = 0.\n", " #np_walk = walk.tonumpy()\n", " #can be created just once\n", " #np_unif = unif.tonumpy()\n", " #dist = np.linalg.norm(np_walk - np_unif, 1)\n", "\n", " #for i in range(n):\n", " # dist += (abs(walk[i] - uniform[i]))\n", "\n", " if steps % 5000 == 0:\n", " print(\"Dist\", dist.item())\n", "\n", " \"\"\"\n", " if (time.time() - time_start) > timeout_minutes:\n", " print(\"No convergence! \")\n", " return -1\n", " \"\"\"\n", "\n", " assert(dist.item() <= save_sum)\n", " save_sum = dist\n", "\n", " #p.cancel()\n", " print(\"dist;\", dist)\n", "\n", " return steps" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "finan512_metis=!awk '{print NF}' results/finan512/finan512.graph.part.32\n", "finan512_metis=[int(e) for e in finan512_metis]\n", "finan512=!awk '{print NF}' results/finan512/finan512.graphcut.txt\n", "finan512=[int(e) for e in finan512]\n", "\n", "plt.hist([finan512, finan512_metis], bins=3, label=['Ours', 'METIS'])\n", "plt.xlabel(\"Cluster volume\")\n", "plt.ylabel(\"Number of clusters\")\n", "plt.legend(loc='upper right')\n", "plt.title(\"finan512\")\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "t60k_metis=!awk '{print NF}' results/t60k/t60k.graph.part.60\n", "t60k_metis=[int(e) for e in t60k_metis]\n", "t60k=!awk '{print NF}' results/t60k/t60k.graphcut.txt\n", "t60k=[int(e) for e in t60k]\n", "plt.xlabel(\"Cluster volume\")\n", "plt.ylabel(\"Number of clusters\")\n", "\n", "plt.hist([t60k, t60k_metis], bins=20, label=['Ours', 'METIS'])\n", "plt.legend(loc='upper right')\n", "plt.title(\"t60k\")\n", "#plt.text(0.99,-0.1,'Volume of induced subgraphs', horizontalalignment='right')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import glob\n", "import os\n", "\n", "our = []\n", "metis = []\n", "\n", "graph_paths = sorted(glob.glob(\"results/*\"))\n", "for graph in [os.path.basename(graph_path) for graph_path in graph_paths]:\n", "\n", " our_path = glob.glob(os.path.join(\"results\", graph) + \"/*graphcut*\")\n", " our_path = our_path[0]\n", "\n", " metis_path = glob.glob(os.path.join(\"results\", graph, \"*.part.*\"))[0]\n", " our_=!awk '{{print NF}}' $our_path\n", " #!awk '{{print NF}}' \"$our_path\"\n", "\n", " our_=[int(e) for e in our_]\n", " metis_=!awk '{{print NF}}' $metis_path\n", "\n", " metis_=[int(e) for e in metis_]\n", " our.append(our_)\n", " metis.append(metis_)\n", "\n", "\n", "from math import log10\n", "\n", "plt.figure()\n", "\n", "our_box = plt.boxplot([[e for e in lst] for lst in our], positions=np.array(range(len(our))) *2.0 -0.5, sym=\"\", widths=0.5)\n", "met_box = plt.boxplot([[e for e in lst] for lst in metis], positions=np.array(range(len(metis)))*2.0 +0.5, sym=\"\", widths=0.5)\n", "\n", "plt.setp(our_box['caps'], color=\"C0\")\n", "plt.setp(our_box['medians'], color=\"C0\")\n", "plt.setp(our_box['boxes'], color=\"C0\")\n", "plt.setp(our_box['whiskers'],color=\"C0\")\n", "\n", "plt.setp(met_box['caps'], color=\"C1\")\n", "plt.setp(met_box['medians'], color=\"C1\")\n", "plt.setp(met_box['boxes'], color=\"C1\")\n", "plt.setp(met_box['whiskers'],color=\"C1\")\n", "\n", "plt.yscale('log')\n", "\n", "plt.plot([], c='C0', label='Ours')\n", "plt.plot([], c='C1', label='METIS')\n", "\n", "plt.legend(loc=\"lower left\")\n", "\n", "plt.xticks(range(0, 24, 2), [\"3elt\", \"4elt\", \"add32\", \"crack\", \"cs4\", \"cti\", \"data\", \"fe_4elt2\", \"finan512\", \"t60k\", \"uk\", \"whitaker3\"], rotation=45, ha=\"right\")\n", "\n", "plt.ylabel(\"cluster size\")\n", "plt.xlim(-2, 24)\n", "plt.ylim(0, 25000)\n", "\n", "plt.tight_layout()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from matplotlib.ticker import PercentFormatter\n", "\n", "balances = !cat results/*/*.out | grep \"vol graph\"\n", "balances = [float(e.split(\";\")[1]) for e in balances]\n", "balances = [b for b in balances if b != -1]\n", "print(balances)\n", "print(len(balances))\n", "print(max(balances))\n", "\n", "plt.xlabel(\"Cut balance \")\n", "plt.ylabel(\"Proportion of clusters\")\n", "\n", "plt.hist(balances, bins=10, label=['Vol(cut) / Vol(graph)'], weights=np.ones(len(balances)) / len(balances))\n", "plt.legend(loc='upper right')\n", "plt.title(\"Cut balance (all graphs)\")\n", "plt.text(0.99,-0.1,'Here cut refers to the side with higher volume.\\n There were 171 cuts in total.', horizontalalignment='right')\n", "plt.gca().yaxis.set_major_formatter(PercentFormatter(1))\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "import networkx as nx\n", "import matplotlib.pyplot as plt\n", "\n", "graph_file = \"synthetic/margulis_25.graph\"\n", "\n", "G = nx.Graph()\n", "with open(graph_file) as f_g:\n", " first_line = f_g.readline().split()\n", " n_nodes, n_edges = int(first_line[0]), int(first_line[1])\n", " iter = 0\n", " G.add_nodes_from([i for i in range(n_nodes)])\n", " for line in f_g:\n", " line_split = line.split()\n", " if len(line_split) <= 0:\n", " iter += 1\n", " continue\n", " base_node = int(line_split[0])\n", " for neighbor in line_split:\n", " G.add_edge(iter, int(neighbor) - 1)\n", " iter += 1\n", "\n", "colors = [10 for _ in range(n_nodes)]\n", "\n", "nx.draw_networkx(G, with_labels=False, node_color=colors, cmap=plt.cm.Blues, pos=nx.spring_layout(G))\n", "plt.show()\n", "nx.draw_circular(G, with_labels=False, node_color=colors, cmap=plt.cm.Blues)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"results/*t60k*/*graphcut*\"))\n", "gls.sort()\n", "print(gls)\n", "#gls = gls[:2]\n", "\n", "for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph \n", " test(graph, gcut)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"results/*add32*/*graphcut*\"))\n", "\n", "gls.sort()\n", "print(gls)\n", "#gls = gls[:2]\n", "\n", "for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " #graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph \n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n", " #test(graph, None) #Use whole graph as cut\n", " test(graph, gcut) #Use whole graph as cut\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Only test graph\n", "\n", "test(\"synthetic/barbell200-200.graph\", None)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Only test graph\n", "\n", "whitelist = [\"3elt\",\"4elt\",\"add20\",\"add32\",\"bcsstk33\",\"crack\",\"cs4\",\"cti\",\"data\",\"fe_4elt2\",\"fe_pwt\",\"fe_sphere\",\"finan512\",\"memplus\",\"t60k\",\"uk\",\"whitaker3\",\"wing_nodal\"]\n", "whitelist = [\"t60k\",\"uk\",\"whitaker3\",\"wing_nodal\"]\n", "\n", "files = []\n", "\n", "for s in whitelist:\n", " files += glob.glob(\"graphs/\" + s + \".graph\")\n", "\n", "files.sort()\n", "for f in files:\n", " print(f)\n", " try:\n", " test(f, None)\n", " except AssertionError:\n", " print(\"walk failed\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"graphs/*\"))\n", "\n", "gls = list(glob.glob(\"results/*/*graphcut*\"))\n", "gls2 = list(glob.glob(\"results/*/*.part.*\"))\n", "print(gls)\n", "print(gls2)\n", "\n", "print(\"START; OUR\")\n", "for g in gls:\n", " cont = False\n", "\n", " gp = \"graphs/\" + os.path.basename(g.split(\".\")[0]) + \".graph\"\n", " #print(g, gp)\n", " #graph, clusters = graph_and_cut_to_numpy(gp, g)\n", " try:\n", " test(gp, g)\n", " except:\n", " print(\"???\", \"FAILED ON;\", g)\n", "\n", "print(\"DONE OUR; METIS\")\n", "for g in gls2:\n", " gp =\"graphs/\" + os.path.basename(g.split(\".\")[0]) + \".graph\"\n", " print(g, gp)\n", " #graph, clusters = graph_and_cut_to_numpy(gp, g)\n", " try:\n", " test(gp, g)\n", " except:\n", " print(\"???\", \"FAILED ON;\", g)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"results/*/add32*part*\"))\n", "gls.sort()\n", "#gls = gls[:2]\n", "for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n", " test(graph, gcut)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"results/add32/*graphcut*\"))\n", "gls.sort()\n", "#gls = gls[:2]\n", "for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n", " test(graph, gcut)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import glob\n", "gls = list(glob.glob(\"results/*/add32*part*\"))\n", "gls.sort()\n", "#gls = gls[:2]\n", "for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n", " test(graph, gcut)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "import glob\n", "import subprocess\n", "\n", "for phi in [0.01, 0.025, 0.05]:\n", " gls = list(glob.glob(\"graphs/whitaker3.graph\"))\n", " run_decomp(gls, phi)\n", " print(\"phi=\", phi)\n", " gls = glob.glob(\"graphs/whitaker3.graphcut.txt\")\n", " for gcut in gls:\n", " print(gcut)\n", " bname = os.path.basename(gcut)\n", " graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph (only works if k is single digit)\n", " test(graph, gcut)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "size_steps_tuples = []\n", "size_steps_tuples_metis = []\n", "with open(\"walks_clusters.txt\") as f:\n", " lines = f.read()\n", " _, lines_our, lines_metis= lines.split(\"OUR\")\n", " print(lines_our[:100])\n", " print(\"###\")\n", " print(lines_metis[:100])\n", " print(\"@@@\")\n", " lines_our = lines_our.split(\"n, e \")\n", " lines_our = [line.split(\"\\n\") for line in lines_our]\n", " lines_metis = lines_metis.split(\"n, e \")\n", " lines_metis = [line.split(\"\\n\") for line in lines_metis]\n", "\n", "\n", " for i, graph_separated_output in enumerate(lines_our):\n", " #print(\"graph\", i)\n", "\n", " counts = [l for l in graph_separated_output if \"to converge\" in l]\n", " #print(counts)\n", " if len(counts) <= 0:\n", " continue\n", " #print([line.split(\" \") for line in counts])\n", " counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n", "\n", " #print(\"Len counts\", len(counts), counts)\n", " \n", " size_steps_tuples += counts\n", " for i, graph_separated_output in enumerate(lines_metis):\n", " #print(\"graph\", i)\n", "\n", " counts = [l for l in graph_separated_output if \"to converge\" in l]\n", " #print(counts)\n", " if len(counts) <= 0:\n", " continue\n", " #print([line.split(\" \") for line in counts])\n", " counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n", "\n", " #print(\"Len counts\", len(counts), counts)\n", " \n", " size_steps_tuples_metis += counts\n", "\n", "\n", "from math import log2\n", "#print(size_steps_tuples)\n", "#print(\"n clusters\", len(size_steps_tuples))\n", "\n", "plt.xlim(5, 15) # = 15\n", "plt.ylim(0, 30000) # = 30000\n", "a, b = np.polyfit(sorted([log2(v[0]) for v in size_steps_tuples], reverse=True), [v[1] for v in size_steps_tuples], 1)\n", "plt.scatter(sorted([log2(v[0]) for v in size_steps_tuples], reverse=True), [v[1] for v in size_steps_tuples], label=\"slope: \" + str(int(round(a))))\n", "plt.xlabel(\"cluster size (log2)\")\n", "plt.ylabel(\"steps until convergence\")\n", "plt.legend(loc='upper right')\n", "plt.title(\"Mixing rate on output clusters (ours)\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "size_steps_tuples = []\n", "size_steps_tuples_metis = []\n", "with open(\"walks_clusters.txt\") as f:\n", " lines = f.read()\n", " _, lines_our, lines_metis= lines.split(\"OUR\")\n", " print(lines_our[:100])\n", " print(\"###\")\n", " print(lines_metis[:100])\n", " print(\"@@@\")\n", " lines_our = lines_our.split(\"n, e \")\n", " lines_our = [line.split(\"\\n\") for line in lines_our]\n", " lines_metis = lines_metis.split(\"n, e \")\n", " lines_metis = [line.split(\"\\n\") for line in lines_metis]\n", "\n", "\n", " for i, graph_separated_output in enumerate(lines_our):\n", " #print(\"graph\", i)\n", "\n", " counts = [l for l in graph_separated_output if \"to converge\" in l]\n", " #print(counts)\n", " if len(counts) <= 0:\n", " continue\n", " #print([line.split(\" \") for line in counts])\n", " counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n", "\n", " #print(\"Len counts\", len(counts), counts)\n", " \n", " size_steps_tuples += counts\n", " for i, graph_separated_output in enumerate(lines_metis):\n", " #print(\"graph\", i)\n", "\n", " counts = [l for l in graph_separated_output if \"to converge\" in l]\n", " #print(counts)\n", " if len(counts) <= 0:\n", " continue\n", " #print([line.split(\" \") for line in counts])\n", " counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n", "\n", " #print(\"Len counts\", len(counts), counts)\n", " \n", " size_steps_tuples_metis += counts\n", "\n", "from math import log2\n", "import numpy as np\n", "\n", "a, b = np.polyfit(sorted([log2(v[0]) for v in size_steps_tuples_metis], reverse=True), [v[1] for v in size_steps_tuples_metis], 1)\n", "print(a, b)\n", "plt.scatter(sorted([log2(v[0]) for v in size_steps_tuples_metis], reverse=True), [v[1] for v in size_steps_tuples_metis], label=\"slope: \" + str(int(round(a))), color='C1')\n", "plt.xlabel(\"cluster size (log2)\")\n", "plt.ylabel(\"steps until convergence\")\n", "plt.legend(loc='upper right')\n", "plt.title(\"Mixing rate on output clusters (METIS)\")\n", "plt.xlim(5, 15)\n", "plt.ylim(0, 31000)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open(\"walks_graphs.txt\") as f:\n", " lines = f.read()\n", " lines = lines.split(\".graph\\n\")\n", " lines = [line.split(\"\\n\") for line in lines]\n", " print(lines[:10])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython" }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python" } }, "nbformat": 4, "nbformat_minor": 2 }