Deprecate the common.plot_cytobands method (#71)

sbslee · sbslee · Jun 15, 2024 · Sep 9, 2023 · Jun 6, 2024 · Jun 15, 2024
commit 0f4c601eb8c9612260a01bcbd02e62513b852bfd
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -5,6 +5,7 @@ Changelog
 -----------------------
 
 * Update :meth:`pyvcf.has_chr_prefix` method to ignore the HLA contigs for GRCh38.
+* :issue:`71`: Deprecate :meth:`common.plot_cytobands` method.
 
 0.37.0 (2023-09-09)
 -------------------

diff --git a/fuc/api/common.py b/fuc/api/common.py
@@ -22,7 +22,6 @@
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
-from matplotlib.collections import BrokenBarHCollection
 import matplotlib.patches as mpatches
 import seaborn as sns
 
@@ -841,138 +840,6 @@ def extract_sequence(fasta, region):
  sequence = ''
  return sequence
 
-def plot_cytobands(cytoband, bed, ax=None, figsize=None):
- """
- Create chromosome ideograms along with BED data.
-
- The method's source code is derived from a Python script (ideograms.py)
- written by Ryan Dale. The original script can be found at:
- https://gist.github.com/daler/c98fc410282d7570efc3#file-ideograms-py
-
- Parameters
- ----------
- cytoband : str
- Text file containing cytoband ideogram information.
- bed : str
- BED file to be displayed.
- ax : matplotlib.axes.Axes, optional
- Pre-existing axes for the plot. Otherwise, crete a new one.
- figsize : tuple, optional
- Width, height in inches. Format: (float, float).
-
- Examples
- --------
-
- .. plot::
- :context: close-figs
-
- >>> import matplotlib.pyplot as plt
- >>> from fuc import common
- >>> common.load_dataset('cytoband')
- >>> cytoband_file = '~/fuc-data/cytoband/cytoBandIdeo.txt.gz'
- >>> bed_file = '~/fuc-data/cytoband/ucsc_genes.bed.gz'
- >>> common.plot_cytobands(cytoband_file, bed_file, figsize=(10, 8))
- """
- def chromosome_collections(df, y_positions, height, **kwargs):
- del_width = False
- if 'width' not in df.columns:
- del_width = True
- df['width'] = df['end'] - df['start']
- for chrom, group in df.groupby('chrom'):
- yrange = (y_positions[chrom], height)
- xranges = group[['start', 'width']].values
- yield BrokenBarHCollection(
- xranges, yrange, edgecolors=("black",), facecolors=group['colors'], **kwargs)
- if del_width:
- del df['width']
-
- # Height of each ideogram
- chrom_height = 1
-
- # Spacing between consecutive ideograms
- chrom_spacing = 1
-
- # Height of the gene track. Should be smaller than `chrom_spacing` in order to
- # fit correctly
- gene_height = 0.4
-
- # Padding between the top of a gene track and its corresponding ideogram
- gene_padding = 0.1
-
- # Decide which chromosomes to use
- chromosome_list = [f'chr{i}' for i in list(range(1, 23)) + ['M', 'X', 'Y']]
-
- # Keep track of the y positions for ideograms and genes for each chromosome,
- # and the center of each ideogram (which is where we'll put the ytick labels)
- ybase = 0
- chrom_ybase = {}
- gene_ybase = {}
- chrom_centers = {}
-
- # Iterate in reverse so that items in the beginning of `chromosome_list` will
- # appear at the top of the plot
- for chrom in chromosome_list[::-1]:
- chrom_ybase[chrom] = ybase
- chrom_centers[chrom] = ybase + chrom_height / 2.
- gene_ybase[chrom] = ybase - gene_height - gene_padding
- ybase += chrom_height + chrom_spacing
-
- # Read in ideogram.txt, downloaded from UCSC Table Browser
- ideo = pd.read_table(
- cytoband,
- names=['chrom', 'start', 'end', 'name', 'gieStain']
- )
-
- # Filter out chromosomes not in our list
- ideo = ideo[ideo.chrom.apply(lambda x: x in chromosome_list)]
-
- # Add a new column for width
- ideo['width'] = ideo.end - ideo.start
-
- # Colors for different chromosome stains
- color_lookup = {
- 'gneg': (1., 1., 1.),
- 'gpos25': (.6, .6, .6),
- 'gpos50': (.4, .4, .4),
- 'gpos75': (.2, .2, .2),
- 'gpos100': (0., 0., 0.),
- 'acen': (.8, .4, .4),
- 'gvar': (.8, .8, .8),
- 'stalk': (.9, .9, .9),
- }
-
- # Add a new column for colors
- ideo['colors'] = ideo['gieStain'].apply(lambda x: color_lookup[x])
-
- # Same thing for genes
- genes = pd.read_table(
- bed,
- names=['chrom', 'start', 'end', 'name'],
- usecols=range(4))
- genes = genes[genes.chrom.apply(lambda x: x in chromosome_list)]
- genes['width'] = genes.end - genes.start
- genes['colors'] = '#2243a8'
-
- if ax is None:
- fig, ax = plt.subplots(figsize=figsize)
-
- # Now all we have to do is call our function for the ideogram data...
- for collection in chromosome_collections(ideo, chrom_ybase, chrom_height):
- ax.add_collection(collection)
-
- # ...and the gene data
- for collection in chromosome_collections(
- genes, gene_ybase, gene_height, alpha=0.5, linewidths=0
- ):
- ax.add_collection(collection)
-
- # Axes tweaking
- ax.set_yticks([chrom_centers[i] for i in chromosome_list])
- ax.set_yticklabels(chromosome_list)
- ax.axis('tight')
-
- return ax
-
 def convert_file2list(fn):
  """
  Convert a text file to a list of filenames.