script to combine all ors

This commit is contained in:
Tanushree Tunstall 2020-06-19 14:43:23 +01:00
parent 07258120de
commit c98ca7c8ae
3 changed files with 14 additions and 19 deletions

View file

@ -5,50 +5,45 @@ Created on Wed Jun 10 11:13:49 2020
@author: tanu
"""
#=======================================================================
#%% useful links
#https://chrisalbon.com/python/data_wrangling/pandas_join_merge_dataframe/
#https://kanoki.org/2019/11/12/how-to-use-regex-in-pandas/
#https://stackoverflow.com/questions/40348541/pandas-diff-with-string
#%%
#=======================================================================
#%% specify dirs
import os, sys
import pandas as pd
#import numpy as np
import re
from find_missense import find_missense
import argparse
#%%
# homedir
homedir = os.path.expanduser('~')
#os.chdir(homedir + '/git/Misc/jody_pza')
os.chdir(homedir + '/git/LSHTM_analysis/scripts')
# local import
from find_missense import find_missense
#=======================================================================
#%% command line args
arg_parser = argparse.ArgumentParser()
#arg_parser.add_argument('-d', '--drug', help='drug name', default = 'pyrazinamide')
#arg_parser.add_argument('-g', '--gene', help='gene name', default = 'pncA') # case sensitive
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = None)
arg_parser.add_argument('-g', '--gene', help = 'gene name (case sensitive)', default = None) # case sensitive
#arg_parser.add_argument('-p', '--outpath', help = 'output path', default = outpath)
#arg_parser.add_argument('-o', '--outfile', help = 'output filename', default = outfile_or_kin)
arg_parser.add_argument('-s', '--start_coord', help = 'start of coding region (cds) of gene', default = 2288681) # pnca cds
arg_parser.add_argument('-e', '--end_coord', help = 'end of coding region (cds) of gene', default = 2289241) # pnca cds
args = arg_parser.parse_args()
#=======================================================================
#%% variables
#or_file
#info_file
#short_info_file
#gene = 'pncA'
#drug = 'pyrazinamide'
start_cds = args.start_coord
end_cds = args.end_coord
# cmd variables
gene = args.gene
drug = args.drug
start_cds = args.start_coord
end_cds = args.end_coord
#=======================================================================
#%% input and output dirs and files
#=======
@ -82,7 +77,7 @@ print('gene OR file: ', gene_or
#=======
# output
#=======
gene_or_filename = gene.lower() + '_AF_OR_kinship.csv' # other one is called AFandOR
gene_or_filename = gene.lower() + '_af_or_kinship.csv' # other one is called AFandOR
outfile_or_kin = outdir + '/' + gene_or_filename
print('Output file: ', outfile_or_kin
, '\n============================================================')