-
Notifications
You must be signed in to change notification settings - Fork 0
/
reformat_for_msstats_large_data.R
66 lines (42 loc) · 1.88 KB
/
reformat_for_msstats_large_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/env Rscript
#
#$ -cwd
#$ -pe smp 6
#$ -l mem_free=90G
#
#
#$ -S /hpc/apps/R/4.0.2/bin/Rscript
#$ -e /common/dabkek/re-NewAcquisition/primary_imputation/scratch
#$ -o /common/dabkek/re-NewAcquisition/primary_imputation/scratch
#module load R
#library(MSstats)
library(dplyr)
library(tidyr)
library(tibble)
library(stringr)
library(bit64)
# first input name of file- txt and second is name of output file in .csv format
args <- commandArgs(trailingOnly = TRUE)
fn <- args[1]
fn_2 <- args[2]
df <- read.table(fn, stringsAsFactors = FALSE, header = T, sep = "\t")
#df <- read.csv(fn, stringsAsFactors = F)
#df <- subset(df, select = -X)
#df[, 4:23] <- lapply(4:23, function(x) as.numeric(df[[x]]))
df <- gather(df, key = "Run", value = "Intensity", -FragmentIon, -ProteinName, -PeptideSequence)
df$PrecursorCharge = str_sub(df$FragmentIon,-1)
df$random <- df$FragmentIon
df <- separate(df, col = random, into = c("a", "b", "ProductCharge","d", "e"), sep = "_")
df <- subset(df, select = -c(a,b,d,e))
df$random <- df$Run
df <- separate(df, col = random, into = c("Condition", "b", "c"), sep = "_")
df <- subset(df, select = -c(b,c))
df$BioReplicate <- df$Run
df$BioReplicate <- substring(df$BioReplicate, 5)
df$BioReplicate = substr(df$BioReplicate,1,nchar(df$BioReplicate)-2)
df$IsotopeLabelType <- "light"
df[, c(6:7)] <- lapply(c(6:7), function(x) as.integer(df[[x]]))
df[sapply(df, is.character)] <- lapply(df[sapply(df, is.character)],as.factor)
ref_df <- read.csv("/common/dabkek/Dilution_re_analysis_swath/feat_align_all_replicates_3_norm_msstats.csv")
df <- df[,colnames(ref_df)]
write.csv(df, file = paste0(fn_2), row.names = FALSE)