Skip to content

Commit

Permalink
implemented greedy clustering
Browse files Browse the repository at this point in the history
  • Loading branch information
sohompaul committed Jun 17, 2020
1 parent 3fdeabd commit cff0f8d
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions multitask/Cluster.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,28 @@
" return clusters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def greedy_clustering(df, bucket_size, clusters=[], symmetric=False):\n",
" if not symmetric:\n",
" mat = df.to_numpy()\n",
" df[:] = (mat+mat.T)/2\n",
" used = [t for c in clusters for t in c]\n",
" available = [t for t in df.columns if t not in used]\n",
" if len(available) <= bucket_size:\n",
" clusters.append(available)\n",
" return clusters\n",
" def score(bucket):\n",
" return sum([df.loc[a, b] for a, b in itertools.combinations(bucket, 2)])\n",
" _, bucket = min([(score(b), b) for b in itertools.combinations(available, bucket_size)])\n",
" clusters.append(list(bucket))\n",
" return greedy_clustering(df, bucket_size, clusters=clusters, symmetric=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit cff0f8d

Please sign in to comment.