Skip to content
This repository has been archived by the owner on Dec 7, 2021. It is now read-only.

Commit

Permalink
add adhoc analyses of distance to centromeres, fix indexing syntax in…
Browse files Browse the repository at this point in the history
… process_genomic and update conda env to be less stringent on versions
  • Loading branch information
cmdoret committed Jul 30, 2019
1 parent 2ec82b6 commit d487439
Show file tree
Hide file tree
Showing 4 changed files with 189 additions and 165 deletions.
308 changes: 147 additions & 161 deletions docs/csd_env_conda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,167 +3,153 @@ channels:
- r
- defaults
dependencies:
- binutils_impl_linux-64=2.28.1=had2808c_3
- binutils_linux-64=7.2.0=had2808c_27
- biopython=1.72=py27h04863e7_0
- blas=1.0=mkl
- bwidget=1.9.11=0
- bzip2=1.0.6=h14c3975_5
- ca-certificates=2018.03.07=0
- cairo=1.14.12=h8948797_3
- certifi=2018.4.16=py27_0
- curl=7.61.0=h84994c4_0
- cycler=0.10.0=py27_0
- dbus=1.13.2=h714fa37_1
- expat=2.2.5=he0dffb1_0
- fontconfig=2.13.0=h9420a91_0
- freetype=2.9.1=h8a8886c_0
- fribidi=1.0.4=h14c3975_0
- gcc_impl_linux-64=7.2.0=habb00fd_3
- gcc_linux-64=7.2.0=h550dcbe_27
- gfortran_impl_linux-64=7.2.0=hdf63c60_3
- gfortran_linux-64=7.2.0=h550dcbe_27
- glib=2.56.1=h000015b_0
- graphite2=1.3.11=h16798f4_2
- gsl=2.4=h14c3975_4
- gst-plugins-base=1.14.0=hbbd80ab_1
- gstreamer=1.14.0=hb453b48_1
- gxx_impl_linux-64=7.2.0=hdf63c60_3
- gxx_linux-64=7.2.0=h550dcbe_27
- harfbuzz=1.8.4=hec2c2bc_0
- icu=58.2=h9c2bf20_1
- intel-openmp=2018.0.3=0
- jpeg=9b=h024ee3a_2
- krb5=1.14.2=hcdc1b81_6
- libcurl=7.61.0=h1ad7b7a_0
- libedit=3.1=heed3624_0
- libffi=3.2.1=hd88cf55_4
- libgcc=7.2.0=h69d50b8_2
- libgcc-ng=7.2.0=hdf63c60_3
- libgfortran-ng=7.2.0=hdf63c60_3
- libiconv=1.14=0
- libopenblas=0.2.20=h9ac9557_7
- libpng=1.6.34=hb9fc6fc_0
- libssh2=1.8.0=h9cfc8f7_4
- libstdcxx-ng=7.2.0=hdf63c60_3
- libtiff=4.0.9=he85c1e1_1
- libuuid=1.0.3=h1bed415_2
- libxcb=1.13=h1bed415_1
- libxml2=2.9.8=h26e45fe_1
- mkl=2018.0.3=1
- ncurses=6.0=h9df7e31_2
- numpy=1.11.3=py27h1b885b7_9
- numpy-base=1.11.3=py27h3dfced4_9
- openssl=1.0.2o=h20670df_0
- pandas=0.19.2=np111py27_1
- pango=1.42.2=h8589676_0
- pcre=8.42=h439df22_0
- pip=10.0.1=py27_0
- pixman=0.34.0=hceecf20_3
- pycairo=1.13.3=py27hea6d626_0
- pyparsing=2.2.0=py27_1
- pyqt=5.9.2=py27h22d08a2_0
- python=2.7.15=h1571d57_0
- python-dateutil=2.7.3=py27_0
- pytz=2018.5=py27_0
- qt=5.9.6=h52aff34_0
- readline=7.0=ha6073c6_4
- setuptools=39.2.0=py27_0
- sip=4.19.8=py27hf484d3e_0
- six=1.11.0=py27_1
- sqlite=3.23.1=he433501_0
- tk=8.6.7=hc745277_3
- tktable=2.10=h14c3975_0
- wheel=0.31.1=py27_0
- xz=5.2.4=h14c3975_4
- zlib=1.2.11=ha838bed_2
- _r-mutex=1.0.0=anacondar_1
- r-argparse=1.1.0=r343h889e2dd_0
- r-assertthat=0.2.0=r343h889e2dd_0
- r-backports=1.1.2=r343h086d26f_0
- r-base=3.4.3=h290ecf8_1
- r-base64enc=0.1_3=r343h086d26f_4
- r-bh=1.65.0_1=r343h889e2dd_0
- r-bindr=0.1=r343h889e2dd_0
- r-bindrcpp=0.2=r343h599a50d_0
- r-broom=0.4.3=r343h889e2dd_0
- r-callr=1.0.0=r343h889e2dd_0
- r-cellranger=1.1.0=r343h889e2dd_0
- r-cli=1.0.0=r343h889e2dd_0
- r-clipr=0.4.0=r343h889e2dd_0
- r-colorspace=1.3_2=r343h086d26f_0
- r-crayon=1.3.4=r343h889e2dd_0
- r-curl=3.1=r343h086d26f_0
- r-dbi=0.7=r343h889e2dd_0
- r-dbplyr=1.1.0=r343h889e2dd_0
- r-dichromat=2.0_0=r343h889e2dd_4
- r-digest=0.6.13=r343h086d26f_0
- r-dplyr=0.7.4=r343h599a50d_0
- r-evaluate=0.10.1=r343h889e2dd_0
- r-findpython=1.0.3=r343h889e2dd_0
- r-forcats=0.2.0=r343h889e2dd_0
- r-foreign=0.8_69=r343h086d26f_0
- r-getopt=1.20.1=r343h889e2dd_0
- r-ggplot2=2.2.1=r343h889e2dd_0
- r-glue=1.2.0=r343h086d26f_0
- r-gridextra=2.3=r343h889e2dd_0
- r-gtable=0.2.0=r343h889e2dd_0
- r-haven=1.1.0=r343h599a50d_0
- r-highr=0.6=r343h889e2dd_0
- r-hms=0.4.0=r343h889e2dd_0
- r-htmltools=0.3.6=r343h599a50d_0
- r-httr=1.3.1=r343h889e2dd_0
- r-jsonlite=1.5=r343h086d26f_0
- r-knitr=1.18=r343h889e2dd_0
- r-labeling=0.3=r343h889e2dd_4
- r-lattice=0.20_35=r343h086d26f_0
- r-lazyeval=0.2.1=r343h086d26f_0
- r-lubridate=1.7.1=r343h599a50d_0
- r-magrittr=1.5=r343h889e2dd_4
- r-markdown=0.8=r343h086d26f_0
- r-mass=7.3_48=r343h086d26f_0
- r-mime=0.5=r343h086d26f_0
- r-mnormt=1.5_5=r343h4829c52_0
- r-modelr=0.1.1=r343h889e2dd_0
- r-munsell=0.4.3=r343h889e2dd_0
- r-nlme=3.1_131=r343h4829c52_0
- r-openssl=0.9.9=r343h086d26f_0
- r-pillar=1.0.1=r343h889e2dd_0
- r-pkgconfig=2.0.1=r343h889e2dd_0
- r-plogr=0.1_1=r343h889e2dd_0
- r-plyr=1.8.4=r343h599a50d_0
- r-proto=1.0.0=r343h889e2dd_0
- r-psych=1.7.8=r343h889e2dd_0
- r-purrr=0.2.4=r343h086d26f_0
- r-r6=2.2.2=r343h889e2dd_0
- r-rcolorbrewer=1.1_2=r343h889e2dd_0
- r-rcpp=0.12.14=r343h599a50d_0
- r-rcpproll=0.2.2=r343h599a50d_0
- r-readr=1.1.1=r343h599a50d_0
- r-readxl=1.0.0=r343h599a50d_0
- r-rematch=1.0.1=r343h889e2dd_0
- r-reprex=0.1.1=r343h889e2dd_0
- r-reshape2=1.4.3=r343h599a50d_0
- r-rlang=0.1.6=r343h086d26f_0
- r-rmarkdown=1.8=r343h889e2dd_0
- r-rprojroot=1.3_1=r343h889e2dd_0
- r-rstudioapi=0.7=r343h889e2dd_0
- r-rvest=0.3.2=r343h889e2dd_0
- r-scales=0.5.0=r343h599a50d_0
- r-selectr=0.3_1=r343h889e2dd_0
- r-stringi=1.1.6=r343h599a50d_0
- r-stringr=1.2.0=r343h889e2dd_0
- r-tibble=1.4.1=r343h086d26f_0
- r-tidyr=0.7.2=r343h599a50d_0
- r-tidyselect=0.2.3=r343h599a50d_0
- r-tidyverse=1.2.1=r343h889e2dd_0
- r-utf8=1.1.2=r343h086d26f_0
- r-viridis=0.4.0=r343h889e2dd_0
- r-viridislite=0.2.0=r343h889e2dd_0
- r-whisker=0.3_2=r343h889e2dd_4
- r-xml2=1.1.1=r343h599a50d_0
- r-yaml=2.1.16=r343h086d26f_0
- r-zoo=1.8_0=r343h086d26f_0
- binutils_impl_linux-64
- binutils_linux-64
- biopython
- blas
- bwidget
- bzip2
- ca-certificates
- cairo
- certifi
- curl
- cycler
- dbus
- expat
- fontconfig
- freetype
- fribidi
- gcc_impl_linux-64
- gcc_linux-64
- gfortran_impl_linux-64
- gfortran_linux-64
- glib
- graphite2
- gsl
- gst-plugins-base
- gstreamer
- gxx_impl_linux-64
- gxx_linux-64
- harfbuzz
- icu
- intel-openmp
- jpeg
- krb5
- libcurl
- libedit
- libffi
- libgcc
- libgcc-ng
- libgfortran-ng
- libopenblas
- libpng
- libssh2
- libstdcxx-ng
- libtiff
- libuuid
- libxcb
- libxml2
- mkl
- ncurses
- numpy
- numpy-base
- openssl
- pandas
- pango
- pcre
- pip
- pixman
- pycairo
- pyparsing
- pyqt
- python=2.7
- python-dateutil
- pytz
- qt
- readline
- setuptools
- sip
- six
- sqlite
- tk
- tktable
- wheel
- xz
- zlib
- _r-mutex
- r-argparse
- r-assertthat
- r-backports
- r-base=3.5.1
- r-base64enc
- r-bh
- r-bindr
- r-bindrcpp
- r-broom
- r-callr
- r-cellranger
- r-cli
- r-clipr
- r-colorspace
- r-crayon
- r-curl
- r-dbi
- r-dbplyr
- r-dichromat
- r-digest
- r-dplyr
- r-evaluate
- r-findpython
- r-forcats
- r-foreign
- r-getopt
- r-ggplot2
- r-glue
- r-gridextra
- r-gtable
- r-haven
- r-highr
- r-hms
- r-htmltools
- r-httr
- r-jsonlite
- r-knitr
- r-labeling
- r-lattice
- r-lazyeval
- r-lubridate
- r-magrittr
- r-markdown
- r-mass
- r-mime
- r-mnormt
- r-modelr
- r-munsell
- r-nlme
- r-openssl
- r-pillar
- r-pkgconfig
- r-plogr
- r-plyr
- r-psych
- r-purrr
- r-rcolorbrewer
- r-rcpp
- r-rcpproll
- r-readr
- r-reshape2
- r-rlang
- r-rmarkdown
- r-scales
- r-stringr
- r-tibble
- r-tidyr
- r-tidyselect
- r-tidyverse
- r-utf8
- r-viridis
- r-viridislite
- r-zoo
- pip:
- backports.shutil-get-terminal-size==1.0.0
- enum34==1.1.6
Expand Down
2 changes: 1 addition & 1 deletion src/assoc_mapping/chrom_types.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
# Cyril Matthey-Doret

#==== SELECT PARAMETERS ====#
wsize_range <- 50 # Size of the moving average window
wsize_range <- 30 # Size of the moving average window
sp_range <- 0.40 # Proportion of SNPs to be included in each local regression
# wsize_range <- seq(5, 40, 1)
# sp_range <- seq(0.15, 1, 0.01)
Expand Down
6 changes: 3 additions & 3 deletions src/assoc_mapping/process_genomic.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,7 @@ def gen_decode(encoded):
else:
genodict[code] = 'E' # All others are heterozygous
# Rarely, rows are filled this value. I assume this is a STACKS issue.
decoded = encoded.apply(lambda r: np.array([genodict.get(i, 'M')
for i in r]), axis=1)
decoded = encoded.applymap(lambda r: genodict.get(r, 'M'))
return decoded


Expand Down Expand Up @@ -204,7 +203,7 @@ def prop_hom(pop, geno):
# Looping over sexes
dff = {}
for t in ['O', 'M', 'E']:
dff[t] = (geno.loc[:, sex_id[sex]] == t).T.sum().astype(float)
dff[t] = (geno.loc[:, sex_id[sex].values] == t).T.sum().astype(float)
sample_size[sex] = dff['E']+dff['O']
hom[sex] = np.divide(dff['O'], (dff['O'] + dff['E']))

Expand Down Expand Up @@ -275,6 +274,7 @@ def parallel_func(f, df, f_args=[], chunk_size=1000):
result = pool.map(func, chunked_df) # Mapping function to chunks.
# Concatenating into single df. Order is preserved
pool.terminate()

return pd.concat(result)

# ========== LOADING AND PROCESSING DATA ==========#
Expand Down
38 changes: 38 additions & 0 deletions src/misc/centro_dist_comparison.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Comparing distance from centromeres between candidate SNPs and non-candidate SNPs
library('tidyverse')
library('gridExtra')
library('ggpubr')
snps <- read_delim('data/assoc_mapping/case_control/case_control_all.tsv', delim='\t')
centro <- read_delim('data/assoc_mapping/centro/centrolist.tsv', delim='\t')
head(centro)

snps_centro <- snps %>%
inner_join(centro, by="Chr") %>%
filter(Mt > 0 & Ft > 0) %>%
mutate(centrodist = abs(BP - pos),
candidate = ifelse(fisher >= 5, T, F)) %>%
select(Chr, centrodist,Tt, fisher, candidate, effect_str)

zoomfactor <- 1000000

p1 <- ggplot(snps_centro, aes(x=centrodist / zoomfactor)) +
geom_histogram(data=snps_centro %>% filter(candidate == F)) +
geom_vline(data=snps_centro %>% filter(candidate == T), aes(xintercept=centrodist / zoomfactor), col='red') +
facet_grid(~Chr, space='free_x', scales='free_x') +
theme_classic() +
xlab("Absolute distance from centromere [Mbp]") +
ylab("Number of SNPs")

p2 <- ggplot(snps_centro, aes(x=as.character(candidate), y=centrodist / zoomfactor)) +
geom_boxplot() +
geom_point() +
stat_compare_means(comparisons = list(c("FALSE", "TRUE")), label = "p.signif")+ # Add significance levels
stat_compare_means(label.x=0.5, label.y = 8*10e5 / zoomfactor) +
theme_bw() +
coord_flip() +
xlab("CSD candidate") +
ylab("Absolute distance to centromere [Mbp]")

pdf('data/assoc_mapping/centro/plots/centro_dist.pdf', width=16, height=6)
grid.arrange(nrow=2, p1, p2)
dev.off()

0 comments on commit d487439

Please sign in to comment.