calculating af_or using function and cmd options now
This commit is contained in:
parent
7686aa39b4
commit
f88e2665e9
5 changed files with 156 additions and 11 deletions
141
scripts/af_or_calcs.R
Executable file
141
scripts/af_or_calcs.R
Executable file
|
@ -0,0 +1,141 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
#########################################################
|
||||||
|
# TASK: To calculate Allele Frequency and
|
||||||
|
# Odds Ratio from master data
|
||||||
|
#########################################################
|
||||||
|
# working dir
|
||||||
|
setwd("~/git/LSHTM_analysis/scripts")
|
||||||
|
getwd()
|
||||||
|
|
||||||
|
# load libraries
|
||||||
|
#source("Header_TT.R")
|
||||||
|
require("getopt", quietly = TRUE) # cmd parse arguments
|
||||||
|
|
||||||
|
# load functions
|
||||||
|
source("functions/plotting_globals.R")
|
||||||
|
source("functions/mychisq_or.R")
|
||||||
|
source("functions/myaf_or_calcs.R")
|
||||||
|
|
||||||
|
#############################################################
|
||||||
|
# command line args
|
||||||
|
#********************
|
||||||
|
spec = matrix(c(
|
||||||
|
"drug" ,"d" , 1, "character",
|
||||||
|
"gene" ,"g" , 1, "character",
|
||||||
|
"master_data" ,"m", 2, "character",
|
||||||
|
"gene_data" ,"G", 2, "character",
|
||||||
|
"outfile" ,"o" , 2, "character",
|
||||||
|
"idcol" ,"I", 2, "character",
|
||||||
|
"drmuts_col" ,"D", 2, "character",
|
||||||
|
"othermuts_col" ,"O", 2, "character"
|
||||||
|
|
||||||
|
), byrow = TRUE, ncol = 4)
|
||||||
|
|
||||||
|
opt = getopt(spec)
|
||||||
|
|
||||||
|
drug = opt$drug
|
||||||
|
gene = opt$gene
|
||||||
|
infile_master = opt$master_data
|
||||||
|
infile_metadata = opt$gene_data
|
||||||
|
outfile = opt$outfile
|
||||||
|
idcol = opt$idcol
|
||||||
|
dr_muts_col = opt$drmuts_col
|
||||||
|
other_muts_col = opt$othermuts_col
|
||||||
|
|
||||||
|
if(is.null(drug)|is.null(gene)) {
|
||||||
|
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# import_dirs()
|
||||||
|
import_dirs(drug, gene)
|
||||||
|
|
||||||
|
# setting sensible defaults for opt args
|
||||||
|
|
||||||
|
#----------------------------
|
||||||
|
# input file 1: master data
|
||||||
|
#----------------------------
|
||||||
|
#class(infile_master)
|
||||||
|
if (is.null(infile_master)){
|
||||||
|
#if (!is.character(infile) && exists("gene")){
|
||||||
|
#in_filename_master = 'original_tanushree_data_v2.csv' #19K
|
||||||
|
in_filename_master = 'mtb_gwas_meta_v6.csv' #35k
|
||||||
|
infile_master = paste0(datadir, in_filename_master)
|
||||||
|
cat("\nInput file 1 not specified, assuming filename: ", infile_master)
|
||||||
|
cat(paste0("\nReading infile 1 i.e raw data: ", infile_master) )
|
||||||
|
}
|
||||||
|
|
||||||
|
#---------------------------------------------------
|
||||||
|
# input file 2: gene associated meta
|
||||||
|
# data file to extract valid snps and add calcs to.
|
||||||
|
#---------------------------------------------------
|
||||||
|
if (is.null(infile_metadata)){
|
||||||
|
# This is outfile_metadata from data_extraction.py
|
||||||
|
in_filename_metadata = paste0(tolower(gene), '_metadata.csv')
|
||||||
|
infile_metadata = paste0(outdir, '/', in_filename_metadata)
|
||||||
|
cat("\nInput file 2 not specified, assuming filename: ", infile_metadata)
|
||||||
|
cat(paste0("\nReading infile 2 i.e gene associated metadata:", infile_metadata))
|
||||||
|
}
|
||||||
|
|
||||||
|
#-------------------------------------------
|
||||||
|
# outfile: csv file containing AF and OR
|
||||||
|
#-------------------------------------------
|
||||||
|
if (is.null(outfile)){
|
||||||
|
# out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv')
|
||||||
|
out_filename_af_or = paste0(tolower(gene), '_af_or.csv')
|
||||||
|
outfile = paste0(outdir, '/', out_filename_af_or)
|
||||||
|
cat("\nOutfile not specified, assuming filename: ", outfile)
|
||||||
|
cat(paste0('\nOutput file with full path:', outfile))
|
||||||
|
}
|
||||||
|
|
||||||
|
#-------------------------------------------
|
||||||
|
# idcol: column name "id"
|
||||||
|
#-------------------------------------------
|
||||||
|
if (is.null(idcol)){
|
||||||
|
idcol = "id"
|
||||||
|
}
|
||||||
|
|
||||||
|
#-------------------------------------------
|
||||||
|
# dr-and-others muts cols: comes from plotting_globals.R
|
||||||
|
# colnames that can be constructed using drug
|
||||||
|
# (dr_mutations_<drug>), (other_mutations_<drug>)
|
||||||
|
#-------------------------------------------
|
||||||
|
if (is.null(dr_muts_col)){
|
||||||
|
dr_muts_col
|
||||||
|
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
||||||
|
, dr_muts_col, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is.null(other_muts_col)){
|
||||||
|
other_muts_col
|
||||||
|
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
||||||
|
, other_muts_col, "\n")
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
# Informing the user of the sensible defaults being used:
|
||||||
|
cat("======================"
|
||||||
|
, "\nParameters passed:"
|
||||||
|
, "\n======================"
|
||||||
|
, "\nDRUG name: ", drug, "\n"
|
||||||
|
, "\nGENE name: ", gene, "\n"
|
||||||
|
, "\nReading infile 1 i.e raw data: ", infile_master, "\n"
|
||||||
|
, "\nReading infile 2 i.e gene associated metadata:", infile_metadata, "\n"
|
||||||
|
, '\nOutput file with full path:', outfile, "\n"
|
||||||
|
, "\nColumn name of id:", idcol, "\n"
|
||||||
|
, "\ndr mutation colname:", dr_muts_col, "\n"
|
||||||
|
, "\nother mutation colname:", other_muts_col, "\n")
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
#============================
|
||||||
|
# call function: my_afor()
|
||||||
|
#=============================
|
||||||
|
my_afor( drug
|
||||||
|
, gene
|
||||||
|
, infile_master
|
||||||
|
, infile_metadata
|
||||||
|
, outfile
|
||||||
|
, idcol
|
||||||
|
, dr_muts_col
|
||||||
|
, other_muts_col
|
||||||
|
)
|
||||||
|
#=======================================================================
|
|
@ -1,15 +1,16 @@
|
||||||
my_afor <- function ( infile_master
|
my_afor <- function ( drug
|
||||||
|
, gene
|
||||||
|
, infile_master
|
||||||
, infile_metadata
|
, infile_metadata
|
||||||
, outfile
|
, outfile
|
||||||
, drug
|
, idcol
|
||||||
, gene
|
|
||||||
, idcol = "id"
|
|
||||||
, dr_muts_col
|
, dr_muts_col
|
||||||
, other_muts_col){
|
, other_muts_col){
|
||||||
|
|
||||||
#===========================================
|
#===========================================
|
||||||
# 1: Read master/raw data stored in Data/
|
# 1: Read master/raw data stored in Data/
|
||||||
#===========================================
|
#===========================================
|
||||||
|
cat(infile_master)
|
||||||
raw_data_all = read.csv(infile_master, stringsAsFactors = F)
|
raw_data_all = read.csv(infile_master, stringsAsFactors = F)
|
||||||
|
|
||||||
cat("\nExtracting columns based on variables:\n"
|
cat("\nExtracting columns based on variables:\n"
|
||||||
|
|
|
@ -69,7 +69,8 @@ if not outdir:
|
||||||
# input
|
# input
|
||||||
#=======
|
#=======
|
||||||
#in_filename = 'merged_df3.csv'
|
#in_filename = 'merged_df3.csv'
|
||||||
in_filename = gene.lower() + '_complex_mcsm_norm.csv'
|
#in_filename = gene.lower() + '_complex_mcsm_norm.csv'
|
||||||
|
in_filename = gene.lower() + '_complex_mcsm_norm_SRY.csv' # gid
|
||||||
infile_merged_df3 = outdir + '/' + in_filename
|
infile_merged_df3 = outdir + '/' + in_filename
|
||||||
print('Input file: ', infile_merged_df3
|
print('Input file: ', infile_merged_df3
|
||||||
, '\n============================================================')
|
, '\n============================================================')
|
||||||
|
|
|
@ -69,7 +69,6 @@ import_dirs(drug, gene)
|
||||||
# dup_muts
|
# dup_muts
|
||||||
#***********************************
|
#***********************************
|
||||||
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
#infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
||||||
#infile = ""
|
|
||||||
|
|
||||||
#if (!exists("infile") && exists("gene")){
|
#if (!exists("infile") && exists("gene")){
|
||||||
if (!is.character(infile) && exists("gene")){
|
if (!is.character(infile) && exists("gene")){
|
||||||
|
@ -104,7 +103,6 @@ cat(paste0("\nVariables imported:"
|
||||||
cat("plots will output to:", plotdir)
|
cat("plots will output to:", plotdir)
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
# begin plots
|
# begin plots
|
||||||
|
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
# barplot for mscm stability
|
# barplot for mscm stability
|
||||||
# ------------------------------
|
# ------------------------------
|
||||||
|
|
|
@ -34,9 +34,10 @@ In progress...
|
||||||
./rd_df.py -d <drug> -g <gene> # fixme: input tsv file is sourced manually from website!
|
./rd_df.py -d <drug> -g <gene> # fixme: input tsv file is sourced manually from website!
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# af_or calcs: different types
|
# af_or_calcs.R: calculates af and or
|
||||||
|
# opt defaults, uses sensible defaults
|
||||||
#==============================
|
#==============================
|
||||||
./af_or_calcs.R -d <drug> -g <gene># fixme: No conditional dir structure
|
./af_or_calcs.R -d <drug> -g <gene>
|
||||||
|
|
||||||
#==============================
|
#==============================
|
||||||
# af_or calcs: kinship
|
# af_or calcs: kinship
|
||||||
|
@ -62,6 +63,9 @@ USE THE BELOW from within the or_kinship_link.py script or something?! as part o
|
||||||
# combining dfs: combining_dfs.py
|
# combining dfs: combining_dfs.py
|
||||||
#==============================
|
#==============================
|
||||||
# FIXME: combining_FIXME.py
|
# FIXME: combining_FIXME.py
|
||||||
./combining_dfs.py --d <drug> -g <gene>
|
./combining_dfs.py -d <drug> -g <gene>
|
||||||
|
|
||||||
|
|
||||||
|
#==============================
|
||||||
|
mut_electrostatic_changes.py
|
||||||
|
#==============================
|
||||||
|
./mut_electrostatic_changes.py -d <drug> -g <gene>
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue