diff --git a/scripts/af_or_calcs.R b/scripts/af_or_calcs.R new file mode 100755 index 0000000..99f7a00 --- /dev/null +++ b/scripts/af_or_calcs.R @@ -0,0 +1,141 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: To calculate Allele Frequency and +# Odds Ratio from master data +######################################################### +# working dir +setwd("~/git/LSHTM_analysis/scripts") +getwd() + +# load libraries +#source("Header_TT.R") +require("getopt", quietly = TRUE) # cmd parse arguments + +# load functions +source("functions/plotting_globals.R") +source("functions/mychisq_or.R") +source("functions/myaf_or_calcs.R") + +############################################################# +# command line args +#******************** +spec = matrix(c( + "drug" ,"d" , 1, "character", + "gene" ,"g" , 1, "character", + "master_data" ,"m", 2, "character", + "gene_data" ,"G", 2, "character", + "outfile" ,"o" , 2, "character", + "idcol" ,"I", 2, "character", + "drmuts_col" ,"D", 2, "character", + "othermuts_col" ,"O", 2, "character" + +), byrow = TRUE, ncol = 4) + +opt = getopt(spec) + +drug = opt$drug +gene = opt$gene +infile_master = opt$master_data +infile_metadata = opt$gene_data +outfile = opt$outfile +idcol = opt$idcol +dr_muts_col = opt$drmuts_col +other_muts_col = opt$othermuts_col + +if(is.null(drug)|is.null(gene)) { + stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)") +} + +# import_dirs() +import_dirs(drug, gene) + +# setting sensible defaults for opt args + +#---------------------------- +# input file 1: master data +#---------------------------- +#class(infile_master) +if (is.null(infile_master)){ +#if (!is.character(infile) && exists("gene")){ + #in_filename_master = 'original_tanushree_data_v2.csv' #19K + in_filename_master = 'mtb_gwas_meta_v6.csv' #35k + infile_master = paste0(datadir, in_filename_master) + cat("\nInput file 1 not specified, assuming filename: ", infile_master) + cat(paste0("\nReading infile 1 i.e raw data: ", infile_master) ) +} + +#--------------------------------------------------- +# input file 2: gene associated meta +# data file to extract valid snps and add calcs to. +#--------------------------------------------------- +if (is.null(infile_metadata)){ + # This is outfile_metadata from data_extraction.py + in_filename_metadata = paste0(tolower(gene), '_metadata.csv') + infile_metadata = paste0(outdir, '/', in_filename_metadata) + cat("\nInput file 2 not specified, assuming filename: ", infile_metadata) + cat(paste0("\nReading infile 2 i.e gene associated metadata:", infile_metadata)) +} + +#------------------------------------------- +# outfile: csv file containing AF and OR +#------------------------------------------- +if (is.null(outfile)){ + # out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv') + out_filename_af_or = paste0(tolower(gene), '_af_or.csv') + outfile = paste0(outdir, '/', out_filename_af_or) + cat("\nOutfile not specified, assuming filename: ", outfile) + cat(paste0('\nOutput file with full path:', outfile)) +} + +#------------------------------------------- +# idcol: column name "id" +#------------------------------------------- +if (is.null(idcol)){ + idcol = "id" +} + +#------------------------------------------- +# dr-and-others muts cols: comes from plotting_globals.R +# colnames that can be constructed using drug +# (dr_mutations_), (other_mutations_) +#------------------------------------------- +if (is.null(dr_muts_col)){ + dr_muts_col + cat("\ndrug and other mut colnames not specified, sourcing from globals: " + , dr_muts_col, "\n") +} + +if (is.null(other_muts_col)){ + other_muts_col + cat("\ndrug and other mut colnames not specified, sourcing from globals: " + , other_muts_col, "\n") + +} + +# Informing the user of the sensible defaults being used: +cat("======================" +, "\nParameters passed:" +, "\n======================" +, "\nDRUG name: ", drug, "\n" +, "\nGENE name: ", gene, "\n" +, "\nReading infile 1 i.e raw data: ", infile_master, "\n" +, "\nReading infile 2 i.e gene associated metadata:", infile_metadata, "\n" +, '\nOutput file with full path:', outfile, "\n" +, "\nColumn name of id:", idcol, "\n" +, "\ndr mutation colname:", dr_muts_col, "\n" +, "\nother mutation colname:", other_muts_col, "\n") + +#======================================================================= +#============================ +# call function: my_afor() +#============================= +my_afor( drug + , gene + , infile_master + , infile_metadata + , outfile + , idcol + , dr_muts_col + , other_muts_col + ) +#======================================================================= diff --git a/scripts/functions/myaf_or_calcs.R b/scripts/functions/myaf_or_calcs.R index 2da3fc8..484c907 100644 --- a/scripts/functions/myaf_or_calcs.R +++ b/scripts/functions/myaf_or_calcs.R @@ -1,15 +1,16 @@ -my_afor <- function ( infile_master +my_afor <- function ( drug + , gene + , infile_master , infile_metadata , outfile - , drug - , gene - , idcol = "id" + , idcol , dr_muts_col , other_muts_col){ #=========================================== # 1: Read master/raw data stored in Data/ #=========================================== + cat(infile_master) raw_data_all = read.csv(infile_master, stringsAsFactors = F) cat("\nExtracting columns based on variables:\n" diff --git a/scripts/mut_electrostatic_changes.py b/scripts/mut_electrostatic_changes.py index d25aaca..6620313 100755 --- a/scripts/mut_electrostatic_changes.py +++ b/scripts/mut_electrostatic_changes.py @@ -69,7 +69,8 @@ if not outdir: # input #======= #in_filename = 'merged_df3.csv' -in_filename = gene.lower() + '_complex_mcsm_norm.csv' +#in_filename = gene.lower() + '_complex_mcsm_norm.csv' +in_filename = gene.lower() + '_complex_mcsm_norm_SRY.csv' # gid infile_merged_df3 = outdir + '/' + in_filename print('Input file: ', infile_merged_df3 , '\n============================================================') diff --git a/scripts/plotting/basic_barplots.R b/scripts/plotting/basic_barplots.R index 2da7104..f81160b 100755 --- a/scripts/plotting/basic_barplots.R +++ b/scripts/plotting/basic_barplots.R @@ -69,7 +69,6 @@ import_dirs(drug, gene) # dup_muts #*********************************** #infile = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv" -#infile = "" #if (!exists("infile") && exists("gene")){ if (!is.character(infile) && exists("gene")){ @@ -104,7 +103,6 @@ cat(paste0("\nVariables imported:" cat("plots will output to:", plotdir) #======================================================================= # begin plots - # ------------------------------ # barplot for mscm stability # ------------------------------ diff --git a/scripts/running_scripts b/scripts/running_scripts index ee109db..6d633e9 100644 --- a/scripts/running_scripts +++ b/scripts/running_scripts @@ -34,9 +34,10 @@ In progress... ./rd_df.py -d -g # fixme: input tsv file is sourced manually from website! #============================== -# af_or calcs: different types +# af_or_calcs.R: calculates af and or +# opt defaults, uses sensible defaults #============================== -./af_or_calcs.R -d -g # fixme: No conditional dir structure +./af_or_calcs.R -d -g #============================== # af_or calcs: kinship @@ -62,6 +63,9 @@ USE THE BELOW from within the or_kinship_link.py script or something?! as part o # combining dfs: combining_dfs.py #============================== # FIXME: combining_FIXME.py -./combining_dfs.py --d -g - +./combining_dfs.py -d -g +#============================== +mut_electrostatic_changes.py +#============================== +./mut_electrostatic_changes.py -d -g