{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import glob\n",
    "import os\n",
    "import glob\n",
    "import subprocess\n",
    "\n",
    "from os import path\n",
    "\n",
    "np.seterr(all='raise')\n",
    "\n",
    "def metis_partition_file_converter(inp, out):\n",
    "    import sys\n",
    "\n",
    "    partitions = {}\n",
    "\n",
    "    def add(el, idx):\n",
    "        if idx not in partitions:\n",
    "            partitions[idx] = [el]\n",
    "        else:\n",
    "            partitions[idx].append(el)\n",
    "\n",
    "\n",
    "    with open(inp, \"r+\") as f:\n",
    "        el = 0\n",
    "        for line in f:\n",
    "            idx = int(line.strip())\n",
    "            add(el, idx)\n",
    "            el += 1\n",
    "\n",
    "    with open(out, \"w+\") as f:\n",
    "        idxs = list(partitions.keys())\n",
    "        idxs.sort()\n",
    "        assert(idxs[0] <= idxs[-1])\n",
    "        for ix in idxs:\n",
    "            f.write(\" \".join([str(e) for e in partitions[ix]]) + \"\\n\")\n",
    "\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def graph_and_cut_to_numpy(gf, cf):\n",
    "\n",
    "    graph = {}\n",
    "    with open(gf, \"r\") as f:\n",
    "        first_row = f.readline().strip(\"\\n\").split(\" \")\n",
    "        while [] in first_row:\n",
    "            first_row.remove([])\n",
    "        print(\"n, e\", first_row)\n",
    "        n, e = [int(e) for e in first_row]\n",
    "        for i, row in enumerate(f.readlines()):\n",
    "            graph[i + 1] = []\n",
    "            for v in row.strip(\"\\n\").split(\" \"):\n",
    "                if v == \"\":\n",
    "                    continue\n",
    "                assert(int(v))\n",
    "                graph[i + 1].append(int(v))\n",
    "                try:\n",
    "                    graph[int(v)].append(int(i + 1))\n",
    "                except KeyError:\n",
    "                    graph[int(v)] = [int(i + 1)]\n",
    "\n",
    "    clusters = []\n",
    "    #TOFIX: clusters are 0 index\n",
    "    if cf:\n",
    "        with open(cf, \"r\") as f:\n",
    "            n_counter = 0\n",
    "            e_counter = 0\n",
    "            for i, row in enumerate(f.readlines()):\n",
    "                clusters.append([int(e) + 1 for e in row.strip(\"\\n\").split(\" \") if e != \"\"])\n",
    "                #print(clusters[-1])\n",
    "                n_counter += 1\n",
    "            e_counter += len(clusters[-1])\n",
    "    else:\n",
    "        clusters = [[i for i in range(1, len(graph) + 1)]]\n",
    "\n",
    "    return graph, clusters\n",
    "\n",
    "\n",
    "def test(graph_f, cut_f=None, also_test_full_graph=False):\n",
    "    graph, cuts = graph_and_cut_to_numpy(graph_f, cut_f)\n",
    "    #graph, cuts = graph_and_cut_to_numpy(\"graphs/whitaker3.graph\", \"old_results/whitaker3.graph.part.94.chaco\")\n",
    "    subgs = []\n",
    "\n",
    "    for clidx, cut in enumerate(cuts):\n",
    "\n",
    "        print(\"Start working on cluster;\", clidx)\n",
    "        if len(cut) == 1:\n",
    "            print(\"Cut len is 1, continue\")\n",
    "            continue\n",
    "\n",
    "        v_set = set()\n",
    "        for el in cut:\n",
    "            v_set.add(int(el))\n",
    "\n",
    "        #print(v_set)\n",
    "        n = len(v_set)\n",
    "\n",
    "        subg = {k:[] for k in cut}\n",
    "        #print(\"first element in cut\", cut[0])\n",
    "        for k in graph:\n",
    "            for v in graph[k]:\n",
    "                if v in v_set and k in v_set:\n",
    "                    subg[k].append(v)\n",
    "\n",
    "\n",
    "        rekey_dict    = {v:i for i, v in enumerate(sorted(list(subg.keys())))}\n",
    "        rekey_reverse = {i:v for i, v in enumerate(sorted(list(subg.keys())))}\n",
    "\n",
    "        success = True\n",
    "        #new_subg = [[0 for _ in range(n)] for _ in range(n)]\n",
    "        new_subg = sparse.dok_matrix((n, n))\n",
    "\n",
    "        for k in subg: \n",
    "\n",
    "            if len(subg[k]) == 0:\n",
    "                print(\"WARNING - disconnected cluster;\", clidx, \"number of nodes in subgraph is;\", len(cut))\n",
    "                success = False\n",
    "                break\n",
    "\n",
    "            for v in subg[k]:\n",
    "                new_subg[rekey_dict[k], rekey_dict[v]] = 1\n",
    "                new_subg[rekey_dict[v], rekey_dict[k]] = 1\n",
    "\n",
    "                #new_subg[rekey_dict[k], rekey_dict[v]] += 1\n",
    "                #new_subg[rekey_dict[v], rekey_dict[k]] += 1\n",
    "\n",
    "        if not success:\n",
    "            continue\n",
    "\n",
    "\n",
    "        steps = test_convergence(new_subg, 0.01)\n",
    "\n",
    "        print(\"Cluster\", clidx, \"of size: \", n, \"took n steps to converge;\", steps)\n",
    "\n",
    "        import math\n",
    "        print(\"steps/ log2 nodes;\", steps/math.log2(n))\n",
    "\n",
    "        #Scipy and numpy can't calculate eigenvalues quick\n",
    "        \n",
    "        n = len(graph)\n",
    "    if also_test_full_graph: #n <= 5000\n",
    "        trivial_subg = sparse.dok_matrix((n, n))\n",
    "        for i, k in enumerate(graph):\n",
    "            for j, _ in enumerate(graph[k]):\n",
    "                #trivial_subg[i][j] = 1\n",
    "                trivial_subg[i, j] = 1\n",
    "\n",
    "        steps = test_convergence(trivial_subg, 0.01)\n",
    "        print(\"Whole graph took n steps to converge:\", steps)\n",
    "        import math\n",
    "        print(\"steps/ log2 nodes\", steps/math.log2(n))\n",
    "        \n",
    "\n",
    "\n",
    "import glob\n",
    "\n",
    "\n",
    "def run_decomp(graph_files, phi=0.01):\n",
    "    for graph in graph_files:\n",
    "        print(\"decomp on;\", graph)\n",
    "\n",
    "        #run decomposition on all graphs with time limit\n",
    "        out_path = graph + \".out\"\n",
    "        #os.system\n",
    "        #subprocess.check_call('time -p timeout 15m ./a.out  -S --G_phi=0.01 --H_phi=0.4 --vol=1 --h_ratio=0. -f $graph > \"$out_path\"', shell=True)\n",
    "        try:\n",
    "            subprocess.check_call(\"time -p timeout 15m ./a.out  -S --G_phi=\" + str(phi) + \" --H_phi=0.4 --vol=1 --h_ratio=0. -f \" +  graph + \" > \" + out_path, shell=True)\n",
    "        except Exception as e:\n",
    "            print(\"Decomp failed with\", e)\n",
    "            #Too long time?\n",
    "            continue\n",
    "\n",
    "        #TODO return code\n",
    "        if not glob.glob(graph + \"cut.txt\"):\n",
    "            print(\"decomp on\", graph, \"did not finish in time\")\n",
    "            continue\n",
    "\n",
    "        #how many clusters, k, did we get?\n",
    "        with open(out_path) as f:\n",
    "            lns = f.readlines()\n",
    "            cluster_line = next(l for l in lns if \"n clusters\" in l)\n",
    "            cluster_line.strip(\"\\n\")\n",
    "            n_clusters = int(cluster_line.split(\";\")[1])\n",
    "\n",
    "        if n_clusters <= 1:\n",
    "            print(\"No cut found, continue\")\n",
    "            continue\n",
    "\n",
    "        print(\"n clusters found\", n_clusters)\n",
    "        #run metis on k\n",
    "        metis_stdio_path = graph + \".out.metis\"\n",
    "\n",
    "\n",
    "        try:\n",
    "            subprocess.check_call(\"/usr/bin/gpmetis -ufactor=1000 \" +  graph + \" \" + str(n_clusters) + \" -contig >  \" + metis_stdio_path, shell=True)\n",
    "            metis_file = graph + \".part.\" + str(n_clusters)\n",
    "            decomp_file = graph + \"cut.txt\"\n",
    "            \n",
    "            metis_partition_file_converter(metis_file, metis_file)\n",
    "\n",
    "\n",
    "        except Exception as e:\n",
    "            print(\"METIS failed\", e)\n",
    "\n",
    "        rw_graph      = graph + \".row_whole\"\n",
    "        rw_file_ours  = graph + \".rw_ours\"\n",
    "        rw_file_metis = graph + \".rw_metis\"\n",
    "\n",
    "        #TOFIX This should be saving to file!\n",
    "        try:\n",
    "            print(\"--- DECOMP OURS ---\")\n",
    "            test(graph, decomp_file)\n",
    "            print(\"--- DECOMP METIS ---\")\n",
    "            test(graph, metis_file)  \n",
    "        except Exception as e:\n",
    "            print(\"Failed rw?\", e)      \n",
    "\n",
    "        bname = os.path.basename(graph).split(\".\")[0]\n",
    "        os.system('mkdir -p results/\"$bname\"')\n",
    "        for f in glob.glob(\"\".join(graph.split(\".\")[:-1]) + \"*\"):\n",
    "            if f != graph:\n",
    "                os.system('mv $f results/\"$bname\"/')\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn.preprocessing import normalize\n",
    "from sklearn.metrics import pairwise_distances\n",
    "import scipy.sparse as sparse\n",
    "import torch\n",
    "import scipy\n",
    "import time\n",
    "import datetime\n",
    "import threading\n",
    "\n",
    "#TODO check this for double links, self-loops\n",
    "def test_convergence(matrix, threshold):\n",
    "\n",
    "    n = matrix.shape[0]\n",
    "    e = matrix.sum()\n",
    "\n",
    "    print(\"n;\", n)\n",
    "    print(\"e;\", e)\n",
    "\n",
    "    #Number of in-links\n",
    "    #Axii are inverted for some reason\n",
    "    colsums = [i.item() for i in scipy.array((matrix.sum(axis=0))).T]\n",
    "    uniform = np.array([0 for _ in range(n)], dtype=np.float64)\n",
    "\n",
    "    #Stationary distribution is uniform normalized by in-links\n",
    "    for i in range(n):\n",
    "        uniform[i] = colsums[i]/e\n",
    "    \n",
    "    assert(0.99 <= uniform.sum() <= 1.01)\n",
    "    assert(-0.01 < sum(colsums) - e < 0.01)\n",
    "\n",
    "    #TODO seed the random start\n",
    "    walk = np.array([1.] + [0 for _ in range(n - 1)], dtype=np.float64)\n",
    "    walk = torch.DoubleTensor(walk)\n",
    "\n",
    "    #Every node has positive in and out link\n",
    "    sums = matrix.sum(axis=1)\n",
    "    assert((sums != 0).any())\n",
    "    sums = matrix.sum(axis=0)\n",
    "    assert((sums != 0).any())\n",
    "\n",
    "    rowsums = matrix.sum(axis=1)\n",
    "    for i in range(n):\n",
    "        matrix[i, i] = rowsums[i].item()\n",
    "        cnt = rowsums[i] * 2\n",
    "        matrix[i] /= cnt\n",
    "        #assert(0.999 <= matrix[i].sum() <= 1.001)\n",
    "\n",
    "    matrix = matrix.tocoo()   \n",
    "    values = matrix.data\n",
    "    indices = np.vstack((matrix.row, matrix.col))\n",
    "\n",
    "    #Number of edges and self-loops\n",
    "    assert(len(matrix.row) == len(matrix.col) == e + n)\n",
    "\n",
    "    matrix = torch.sparse.DoubleTensor(torch.LongTensor(indices), torch.DoubleTensor(values), torch.Size(matrix.shape))\n",
    "\n",
    "    all_ones = torch.DoubleTensor([1. for n in range(n)])\n",
    "\n",
    "    assert(torch.allclose(all_ones, torch.sparse.sum(matrix, dim=1).to_dense()))\n",
    "\n",
    "    uniform = torch.from_numpy(uniform)\n",
    "    dist = torch.norm(walk - uniform, 1)\n",
    "\n",
    "    steps = 0\n",
    "\n",
    "    if torch.cuda.is_available():        \n",
    "        matrix   = matrix.to(\"cuda\")\n",
    "        walk     = walk.to(\"cuda\")\n",
    "        uniform  = uniform.to(\"cuda\")\n",
    "        all_ones = all_ones.to(\"cuda\")\n",
    "\n",
    "    print(\"Start walk\")\n",
    "\n",
    "    step_lim = 10*n**3\n",
    "    print_max = 15\n",
    "    timeout_minutes = 10\n",
    "\n",
    "    time_start = time.time()\n",
    "    \n",
    "    TIMER = 15.0\n",
    "\n",
    "    def progress():\n",
    "        p = threading.Timer(TIMER, lambda: progress())\n",
    "        p.daemon=True\n",
    "        p.start()\n",
    "        print(datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\"), \" - \", dist)\n",
    "\n",
    "    #p = threading.Timer(TIMER, lambda: progress())\n",
    "    #p.daemon=True\n",
    "    #p.start()\n",
    "\n",
    "    save_sum = dist.item()\n",
    "    mm_transposed = matrix.transpose(1, 0)\n",
    "    while dist > threshold and steps < step_lim: \n",
    "        walk = torch.sparse.mm(mm_transposed, walk.unsqueeze(-1)).squeeze(-1)\n",
    "        #walk = torch.sparse.mm(matrix, walk.unsqueeze(-1)).squeeze(-1)\n",
    "\n",
    "        assert(0.99  < torch.sum(walk)    < 1.01)\n",
    "        assert(0.99  < torch.sum(uniform) < 1.01)\n",
    "        assert(-0.02 < torch.sum(dist)    < 2.02)\n",
    "        assert(-0.01 < torch.sum(walk - uniform) < 0.01)\n",
    "\n",
    "        steps += 1\n",
    "        assert(walk.shape == uniform.shape)\n",
    "        dist = torch.norm(walk - uniform, 1)\n",
    "        #Naive summation for higher precision\n",
    "        #dist = 0.\n",
    "        #np_walk = walk.tonumpy()\n",
    "        #can be created just once\n",
    "        #np_unif = unif.tonumpy()\n",
    "        #dist = np.linalg.norm(np_walk - np_unif, 1)\n",
    "\n",
    "        #for i in range(n):\n",
    "        #    dist += (abs(walk[i] - uniform[i]))\n",
    "\n",
    "        if steps % 5000 == 0:\n",
    "            print(\"Dist\", dist.item())\n",
    "\n",
    "        \"\"\"\n",
    "        if (time.time() - time_start) > timeout_minutes:\n",
    "            print(\"No convergence! \")\n",
    "            return -1\n",
    "        \"\"\"\n",
    "\n",
    "        assert(dist.item() <= save_sum)\n",
    "        save_sum  = dist\n",
    "\n",
    "    #p.cancel()\n",
    "    print(\"dist;\", dist)\n",
    "\n",
    "    return steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "finan512_metis=!awk '{print NF}' results/finan512/finan512.graph.part.32\n",
    "finan512_metis=[int(e) for e in finan512_metis]\n",
    "finan512=!awk '{print NF}' results/finan512/finan512.graphcut.txt\n",
    "finan512=[int(e) for e in finan512]\n",
    "\n",
    "plt.hist([finan512, finan512_metis], bins=3, label=['Ours', 'METIS'])\n",
    "plt.xlabel(\"Cluster volume\")\n",
    "plt.ylabel(\"Number of clusters\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.title(\"finan512\")\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "t60k_metis=!awk '{print NF}' results/t60k/t60k.graph.part.60\n",
    "t60k_metis=[int(e) for e in t60k_metis]\n",
    "t60k=!awk '{print NF}' results/t60k/t60k.graphcut.txt\n",
    "t60k=[int(e) for e in t60k]\n",
    "plt.xlabel(\"Cluster volume\")\n",
    "plt.ylabel(\"Number of clusters\")\n",
    "\n",
    "plt.hist([t60k, t60k_metis], bins=20, label=['Ours', 'METIS'])\n",
    "plt.legend(loc='upper right')\n",
    "plt.title(\"t60k\")\n",
    "#plt.text(0.99,-0.1,'Volume of induced subgraphs', horizontalalignment='right')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import glob\n",
    "import os\n",
    "\n",
    "our   = []\n",
    "metis = []\n",
    "\n",
    "graph_paths = sorted(glob.glob(\"results/*\"))\n",
    "for graph in [os.path.basename(graph_path) for graph_path in graph_paths]:\n",
    "\n",
    "    our_path  = glob.glob(os.path.join(\"results\", graph) +  \"/*graphcut*\")\n",
    "    our_path = our_path[0]\n",
    "\n",
    "    metis_path = glob.glob(os.path.join(\"results\", graph, \"*.part.*\"))[0]\n",
    "    our_=!awk '{{print NF}}' $our_path\n",
    "    #!awk '{{print NF}}' \"$our_path\"\n",
    "\n",
    "    our_=[int(e) for e in our_]\n",
    "    metis_=!awk '{{print NF}}' $metis_path\n",
    "\n",
    "    metis_=[int(e) for e in metis_]\n",
    "    our.append(our_)\n",
    "    metis.append(metis_)\n",
    "\n",
    "\n",
    "from math import log10\n",
    "\n",
    "plt.figure()\n",
    "\n",
    "our_box = plt.boxplot([[e  for e in lst] for lst in our],   positions=np.array(range(len(our)))  *2.0 -0.5, sym=\"\", widths=0.5)\n",
    "met_box = plt.boxplot([[e for e in lst] for lst in metis],  positions=np.array(range(len(metis)))*2.0 +0.5,  sym=\"\", widths=0.5)\n",
    "\n",
    "plt.setp(our_box['caps'],    color=\"C0\")\n",
    "plt.setp(our_box['medians'], color=\"C0\")\n",
    "plt.setp(our_box['boxes'],   color=\"C0\")\n",
    "plt.setp(our_box['whiskers'],color=\"C0\")\n",
    "\n",
    "plt.setp(met_box['caps'],    color=\"C1\")\n",
    "plt.setp(met_box['medians'], color=\"C1\")\n",
    "plt.setp(met_box['boxes'],   color=\"C1\")\n",
    "plt.setp(met_box['whiskers'],color=\"C1\")\n",
    "\n",
    "plt.yscale('log')\n",
    "\n",
    "plt.plot([], c='C0', label='Ours')\n",
    "plt.plot([], c='C1', label='METIS')\n",
    "\n",
    "plt.legend(loc=\"lower left\")\n",
    "\n",
    "plt.xticks(range(0, 24, 2), [\"3elt\",  \"4elt\",  \"add32\",  \"crack\",  \"cs4\",  \"cti\",  \"data\",  \"fe_4elt2\",  \"finan512\",  \"t60k\",  \"uk\",  \"whitaker3\"], rotation=45, ha=\"right\")\n",
    "\n",
    "plt.ylabel(\"cluster size\")\n",
    "plt.xlim(-2, 24)\n",
    "plt.ylim(0, 25000)\n",
    "\n",
    "plt.tight_layout()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "from matplotlib.ticker import PercentFormatter\n",
    "\n",
    "balances = !cat results/*/*.out | grep \"vol graph\"\n",
    "balances = [float(e.split(\";\")[1]) for e in balances]\n",
    "balances = [b for b in balances if b != -1]\n",
    "print(balances)\n",
    "print(len(balances))\n",
    "print(max(balances))\n",
    "\n",
    "plt.xlabel(\"Cut balance \")\n",
    "plt.ylabel(\"Proportion of clusters\")\n",
    "\n",
    "plt.hist(balances, bins=10, label=['Vol(cut) / Vol(graph)'], weights=np.ones(len(balances)) / len(balances))\n",
    "plt.legend(loc='upper right')\n",
    "plt.title(\"Cut balance (all graphs)\")\n",
    "plt.text(0.99,-0.1,'Here cut refers to the side with higher volume.\\n There were 171 cuts in total.', horizontalalignment='right')\n",
    "plt.gca().yaxis.set_major_formatter(PercentFormatter(1))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import networkx as nx\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "graph_file = \"synthetic/margulis_25.graph\"\n",
    "\n",
    "G = nx.Graph()\n",
    "with open(graph_file) as f_g:\n",
    "    first_line = f_g.readline().split()\n",
    "    n_nodes, n_edges = int(first_line[0]), int(first_line[1])\n",
    "    iter = 0\n",
    "    G.add_nodes_from([i for i in range(n_nodes)])\n",
    "    for line in f_g:\n",
    "        line_split = line.split()\n",
    "        if len(line_split) <= 0:\n",
    "            iter += 1\n",
    "            continue\n",
    "        base_node = int(line_split[0])\n",
    "        for neighbor in line_split:\n",
    "            G.add_edge(iter, int(neighbor) - 1)\n",
    "        iter += 1\n",
    "\n",
    "colors = [10 for _ in range(n_nodes)]\n",
    "\n",
    "nx.draw_networkx(G, with_labels=False, node_color=colors, cmap=plt.cm.Blues, pos=nx.spring_layout(G))\n",
    "plt.show()\n",
    "nx.draw_circular(G, with_labels=False, node_color=colors, cmap=plt.cm.Blues)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"results/*t60k*/*graphcut*\"))\n",
    "gls.sort()\n",
    "print(gls)\n",
    "#gls = gls[:2]\n",
    "\n",
    "for gcut in gls:\n",
    "    print(gcut)\n",
    "    bname = os.path.basename(gcut)\n",
    "    graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph  \n",
    "    test(graph, gcut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"results/*add32*/*graphcut*\"))\n",
    "\n",
    "gls.sort()\n",
    "print(gls)\n",
    "#gls = gls[:2]\n",
    "\n",
    "for gcut in gls:\n",
    "    print(gcut)\n",
    "    bname = os.path.basename(gcut)\n",
    "    #graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph  \n",
    "    graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n",
    "    #test(graph, None) #Use whole graph as cut\n",
    "    test(graph, gcut) #Use whole graph as cut\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Only test graph\n",
    "\n",
    "test(\"synthetic/barbell200-200.graph\", None)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Only test graph\n",
    "\n",
    "whitelist = [\"3elt\",\"4elt\",\"add20\",\"add32\",\"bcsstk33\",\"crack\",\"cs4\",\"cti\",\"data\",\"fe_4elt2\",\"fe_pwt\",\"fe_sphere\",\"finan512\",\"memplus\",\"t60k\",\"uk\",\"whitaker3\",\"wing_nodal\"]\n",
    "whitelist = [\"t60k\",\"uk\",\"whitaker3\",\"wing_nodal\"]\n",
    "\n",
    "files = []\n",
    "\n",
    "for s in whitelist:\n",
    "    files += glob.glob(\"graphs/\" + s + \".graph\")\n",
    "\n",
    "files.sort()\n",
    "for f in files:\n",
    "    print(f)\n",
    "    try:\n",
    "        test(f, None)\n",
    "    except AssertionError:\n",
    "        print(\"walk failed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"graphs/*\"))\n",
    "\n",
    "gls = list(glob.glob(\"results/*/*graphcut*\"))\n",
    "gls2 = list(glob.glob(\"results/*/*.part.*\"))\n",
    "print(gls)\n",
    "print(gls2)\n",
    "\n",
    "print(\"START; OUR\")\n",
    "for g in gls:\n",
    "    cont = False\n",
    "\n",
    "    gp = \"graphs/\" + os.path.basename(g.split(\".\")[0]) + \".graph\"\n",
    "    #print(g, gp)\n",
    "    #graph, clusters = graph_and_cut_to_numpy(gp, g)\n",
    "    try:\n",
    "        test(gp, g)\n",
    "    except:\n",
    "        print(\"???\", \"FAILED ON;\", g)\n",
    "\n",
    "print(\"DONE OUR; METIS\")\n",
    "for g in gls2:\n",
    "    gp =\"graphs/\" + os.path.basename(g.split(\".\")[0]) + \".graph\"\n",
    "    print(g, gp)\n",
    "    #graph, clusters = graph_and_cut_to_numpy(gp, g)\n",
    "    try:\n",
    "        test(gp, g)\n",
    "    except:\n",
    "        print(\"???\", \"FAILED ON;\", g)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"results/*/add32*part*\"))\n",
    "gls.sort()\n",
    "#gls = gls[:2]\n",
    "for gcut in gls:\n",
    "    print(gcut)\n",
    "    bname = os.path.basename(gcut)\n",
    "    graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n",
    "    test(graph, gcut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"results/add32/*graphcut*\"))\n",
    "gls.sort()\n",
    "#gls = gls[:2]\n",
    "for gcut in gls:\n",
    "    print(gcut)\n",
    "    bname = os.path.basename(gcut)\n",
    "    graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n",
    "    test(graph, gcut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "gls = list(glob.glob(\"results/*/add32*part*\"))\n",
    "gls.sort()\n",
    "#gls = gls[:2]\n",
    "for gcut in gls:\n",
    "    print(gcut)\n",
    "    bname = os.path.basename(gcut)\n",
    "    graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph\n",
    "    test(graph, gcut)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import glob\n",
    "import subprocess\n",
    "\n",
    "for phi in [0.01, 0.025, 0.05]:\n",
    "    gls = list(glob.glob(\"graphs/whitaker3.graph\"))\n",
    "    run_decomp(gls, phi)\n",
    "    print(\"phi=\", phi)\n",
    "    gls = glob.glob(\"graphs/whitaker3.graphcut.txt\")\n",
    "    for gcut in gls:\n",
    "        print(gcut)\n",
    "        bname = os.path.basename(gcut)\n",
    "        graph = os.path.join(\"graphs/\", bname[:-7]) #x.graphcut.txt -> x.graph  (only works if k is single digit)\n",
    "        test(graph, gcut)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "size_steps_tuples = []\n",
    "size_steps_tuples_metis = []\n",
    "with open(\"walks_clusters.txt\") as f:\n",
    "    lines = f.read()\n",
    "    _, lines_our, lines_metis= lines.split(\"OUR\")\n",
    "    print(lines_our[:100])\n",
    "    print(\"###\")\n",
    "    print(lines_metis[:100])\n",
    "    print(\"@@@\")\n",
    "    lines_our = lines_our.split(\"n, e \")\n",
    "    lines_our = [line.split(\"\\n\") for line in lines_our]\n",
    "    lines_metis = lines_metis.split(\"n, e \")\n",
    "    lines_metis = [line.split(\"\\n\") for line in lines_metis]\n",
    "\n",
    "\n",
    "    for i, graph_separated_output in enumerate(lines_our):\n",
    "        #print(\"graph\", i)\n",
    "\n",
    "        counts = [l for l in graph_separated_output if \"to converge\" in l]\n",
    "        #print(counts)\n",
    "        if len(counts) <= 0:\n",
    "            continue\n",
    "        #print([line.split(\" \") for line in counts])\n",
    "        counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n",
    "\n",
    "        #print(\"Len counts\", len(counts), counts)\n",
    "    \n",
    "        size_steps_tuples += counts\n",
    "    for i, graph_separated_output in enumerate(lines_metis):\n",
    "        #print(\"graph\", i)\n",
    "\n",
    "        counts = [l for l in graph_separated_output if \"to converge\" in l]\n",
    "        #print(counts)\n",
    "        if len(counts) <= 0:\n",
    "            continue\n",
    "        #print([line.split(\" \") for line in counts])\n",
    "        counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n",
    "\n",
    "        #print(\"Len counts\", len(counts), counts)\n",
    "    \n",
    "        size_steps_tuples_metis += counts\n",
    "\n",
    "\n",
    "from math import log2\n",
    "#print(size_steps_tuples)\n",
    "#print(\"n clusters\", len(size_steps_tuples))\n",
    "\n",
    "plt.xlim(5, 15) # = 15\n",
    "plt.ylim(0, 30000) #  = 30000\n",
    "a, b = np.polyfit(sorted([log2(v[0]) for v in size_steps_tuples], reverse=True), [v[1] for v in size_steps_tuples], 1)\n",
    "plt.scatter(sorted([log2(v[0]) for v in size_steps_tuples], reverse=True), [v[1] for v in size_steps_tuples], label=\"slope: \" + str(int(round(a))))\n",
    "plt.xlabel(\"cluster size (log2)\")\n",
    "plt.ylabel(\"steps until convergence\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.title(\"Mixing rate on output clusters (ours)\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "size_steps_tuples = []\n",
    "size_steps_tuples_metis = []\n",
    "with open(\"walks_clusters.txt\") as f:\n",
    "    lines = f.read()\n",
    "    _, lines_our, lines_metis= lines.split(\"OUR\")\n",
    "    print(lines_our[:100])\n",
    "    print(\"###\")\n",
    "    print(lines_metis[:100])\n",
    "    print(\"@@@\")\n",
    "    lines_our = lines_our.split(\"n, e \")\n",
    "    lines_our = [line.split(\"\\n\") for line in lines_our]\n",
    "    lines_metis = lines_metis.split(\"n, e \")\n",
    "    lines_metis = [line.split(\"\\n\") for line in lines_metis]\n",
    "\n",
    "\n",
    "    for i, graph_separated_output in enumerate(lines_our):\n",
    "        #print(\"graph\", i)\n",
    "\n",
    "        counts = [l for l in graph_separated_output if \"to converge\" in l]\n",
    "        #print(counts)\n",
    "        if len(counts) <= 0:\n",
    "            continue\n",
    "        #print([line.split(\" \") for line in counts])\n",
    "        counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n",
    "\n",
    "        #print(\"Len counts\", len(counts), counts)\n",
    "    \n",
    "        size_steps_tuples += counts\n",
    "    for i, graph_separated_output in enumerate(lines_metis):\n",
    "        #print(\"graph\", i)\n",
    "\n",
    "        counts = [l for l in graph_separated_output if \"to converge\" in l]\n",
    "        #print(counts)\n",
    "        if len(counts) <= 0:\n",
    "            continue\n",
    "        #print([line.split(\" \") for line in counts])\n",
    "        counts = [(int(l.split(\" \")[5]), int(l.split(\" \")[11])) for l in counts]\n",
    "\n",
    "        #print(\"Len counts\", len(counts), counts)\n",
    "    \n",
    "        size_steps_tuples_metis += counts\n",
    "\n",
    "from math import log2\n",
    "import numpy as np\n",
    "\n",
    "a, b = np.polyfit(sorted([log2(v[0]) for v in size_steps_tuples_metis], reverse=True), [v[1] for v in size_steps_tuples_metis], 1)\n",
    "print(a, b)\n",
    "plt.scatter(sorted([log2(v[0]) for v in size_steps_tuples_metis], reverse=True), [v[1] for v in size_steps_tuples_metis], label=\"slope: \" + str(int(round(a))), color='C1')\n",
    "plt.xlabel(\"cluster size (log2)\")\n",
    "plt.ylabel(\"steps until convergence\")\n",
    "plt.legend(loc='upper right')\n",
    "plt.title(\"Mixing rate on output clusters (METIS)\")\n",
    "plt.xlim(5, 15)\n",
    "plt.ylim(0, 31000)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"walks_graphs.txt\") as f:\n",
    "    lines = f.read()\n",
    "    lines = lines.split(\".graph\\n\")\n",
    "    lines = [line.split(\"\\n\") for line in lines]\n",
    "    print(lines[:10])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython"
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}