script to combine all ors

This commit is contained in:
Tanushree Tunstall 2020-06-19 14:43:23 +01:00
parent 3497d1ef54
commit 0c3c6fd143
3 changed files with 14 additions and 19 deletions

View file

@ -59,7 +59,7 @@ cat(paste0('Reading infile2: gene associated metadata:', infile_metadata))
# outdir = 'git/Data/pyrazinamide/output' # outdir = 'git/Data/pyrazinamide/output'
outdir = paste0('~/git/Data', '/', drug, '/', 'output') outdir = paste0('~/git/Data', '/', drug, '/', 'output')
#out_filename = paste0(tolower(gene), '_meta_data_with_AF_OR.csv') #out_filename = paste0(tolower(gene), '_meta_data_with_AF_OR.csv')
out_filename = paste0(tolower(gene), '_AF_OR.csv') out_filename = paste0(tolower(gene), '_af_or.csv')
outfile = paste0(outdir, '/', out_filename) outfile = paste0(outdir, '/', out_filename)
cat(paste0('Output file with full path:', outfile)) cat(paste0('Output file with full path:', outfile))
#%% end of variable assignment for input and output files #%% end of variable assignment for input and output files

View file

@ -41,7 +41,7 @@ homedir = os.path.expanduser('~')
# set working dir # set working dir
os.getcwd() os.getcwd()
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis') os.chdir(homedir + '/git/LSHTM_analysis/scripts')
os.getcwd() os.getcwd()
#======================================================================= #=======================================================================
#%% command line args #%% command line args

View file

@ -5,50 +5,45 @@ Created on Wed Jun 10 11:13:49 2020
@author: tanu @author: tanu
""" """
#=======================================================================
#%% useful links #%% useful links
#https://chrisalbon.com/python/data_wrangling/pandas_join_merge_dataframe/ #https://chrisalbon.com/python/data_wrangling/pandas_join_merge_dataframe/
#https://kanoki.org/2019/11/12/how-to-use-regex-in-pandas/ #https://kanoki.org/2019/11/12/how-to-use-regex-in-pandas/
#https://stackoverflow.com/questions/40348541/pandas-diff-with-string #https://stackoverflow.com/questions/40348541/pandas-diff-with-string
#%% #=======================================================================
#%% specify dirs
import os, sys import os, sys
import pandas as pd import pandas as pd
#import numpy as np #import numpy as np
import re import re
from find_missense import find_missense
import argparse import argparse
#%%
# homedir
homedir = os.path.expanduser('~') homedir = os.path.expanduser('~')
#os.chdir(homedir + '/git/Misc/jody_pza') os.chdir(homedir + '/git/LSHTM_analysis/scripts')
# local import
from find_missense import find_missense
#======================================================================= #=======================================================================
#%% command line args #%% command line args
arg_parser = argparse.ArgumentParser() arg_parser = argparse.ArgumentParser()
#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None) arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None) # case sensitive arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None) # case sensitive
#arg_parser.add_argument('-p', '--outpath', help = 'output path', default = outpath)
#arg_parser.add_argument('-o', '--outfile', help = 'output filename', default = outfile_or_kin)
arg_parser.add_argument('-s', '--start_coord', help = 'start of coding region (cds) of gene', default = 2288681) # pnca cds arg_parser.add_argument('-s', '--start_coord', help = 'start of coding region (cds) of gene', default = 2288681) # pnca cds
arg_parser.add_argument('-e', '--end_coord', help = 'end of coding region (cds) of gene', default = 2289241) # pnca cds arg_parser.add_argument('-e', '--end_coord', help = 'end of coding region (cds) of gene', default = 2289241) # pnca cds
args = arg_parser.parse_args() args = arg_parser.parse_args()
#======================================================================= #=======================================================================
#%% variables #%% variables
#or_file
#info_file
#short_info_file
#gene = 'pncA' #gene = 'pncA'
#drug = 'pyrazinamide' #drug = 'pyrazinamide'
start_cds = args.start_coord
end_cds = args.end_coord
# cmd variables
gene = args.gene gene = args.gene
drug = args.drug drug = args.drug
start_cds = args.start_coord
end_cds = args.end_coord
#======================================================================= #=======================================================================
#%% input and output dirs and files #%% input and output dirs and files
#======= #=======
@ -82,7 +77,7 @@ print('gene OR file: ', gene_or
#======= #=======
# output # output
#======= #=======
gene_or_filename = gene.lower() + '_AF_OR_kinship.csv' # other one is called AFandOR gene_or_filename = gene.lower() + '_af_or_kinship.csv' # other one is called AFandOR
outfile_or_kin = outdir + '/' + gene_or_filename outfile_or_kin = outdir + '/' + gene_or_filename
print('Output file: ', outfile_or_kin print('Output file: ', outfile_or_kin
, '\n============================================================') , '\n============================================================')