added cmd option for dynamut2 formatting results

This commit is contained in:
Tanushree Tunstall 2021-10-18 13:52:29 +01:00
parent c5c4d74acf
commit a67a07db2d
7 changed files with 65 additions and 16 deletions

0
dynamut/format_results_dynamut.py Normal file → Executable file
View file

0
dynamut/format_results_dynamut2.py Normal file → Executable file
View file

49
dynamut/run_format_results_dynamut.py Normal file → Executable file
View file

@ -20,8 +20,45 @@ from format_results_dynamut2 import *
# variables
# TODO: add cmd line args
gene = 'gid'
drug = 'streptomycin'
#gene =
#drug =
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name (case sensitive)', default = None)
arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', default = None)
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
#arg_parser.add_argument('-m', '--make_dirs', help = 'Make dir for input and output', action='store_true') # should be handled elsewhere!
arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode')
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output paths & filenames
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
#make_dirs = args.make_dirs
#%% input and output dirs and files
#=======
# dirs
#=======
if not datadir:
datadir = homedir + '/' + 'git/Data'
if not indir:
indir = datadir + '/' + drug + '/input'
if not outdir:
outdir = datadir + '/' + drug + '/output'
#%%=====================================================================
datadir = homedir + '/git/Data'
indir = datadir + '/' + drug + '/input'
outdir = datadir + '/' + drug + '/output'
@ -29,12 +66,12 @@ outdir_dynamut = outdir + '/dynamut_results/'
outdir_dynamut2 = outdir + '/dynamut_results/dynamut2/'
# Input file
infile_dynamut = outdir_dynamut + gene + '_dynamut_all_output_clean.csv'
#infile_dynamut = outdir_dynamut + gene + '_dynamut_all_output_clean.csv'
infile_dynamut2 = outdir_dynamut2 + gene + '_dynamut2_output_combined_clean.csv'
# Formatted output filename
outfile_dynamut_f = outdir_dynamut2 + gene + '_complex_dynamut_norm.csv'
outfile_dynamut2_f = outdir_dynamut2 + gene + '_complex_dynamut2_norm.csv'
#outfile_dynamut_f = outdir_dynamut2 + gene + '_dynamut_norm.csv'
outfile_dynamut2_f = outdir_dynamut2 + gene + '_dynamut2_norm.csv'
#===============================
# CALL: format_results_dynamut
@ -69,4 +106,4 @@ print('Finished writing file:'
, '\nExpected no. of cols:', len(dynamut2_df_f.columns)
, '\n=============================================================')
#%%#####################################################################
#%%#####################################################################

View file

@ -17,8 +17,8 @@ my_host = 'http://biosig.unimelb.edu.au'
#headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"}
# TODO: add cmd line args
#gene = 'gid'
drug = 'streptomycin'
# gene =
# drug =
datadir = homedir + '/git/Data/'
indir = datadir + drug + '/input/'
outdir = datadir + drug + '/output/'
@ -41,4 +41,4 @@ get_results(url_file = my_url_file
, output_dir = outdir
, outfile_suffix = my_suffix)
########################################################################
########################################################################

View file

@ -13,10 +13,25 @@ CHUNK=$3
mkdir -p ${OUTDIR}/${CHUNK}/chain_added
cd ${OUTDIR}/${CHUNK}/chain_added
# makes the 2 dirs, hence ../..
# makes the 3 dirs, hence ../..
split ../../../${INFILE} -l ${CHUNK} -d snp_batch_
# use case
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps.csv snp_batches 50 #Date: 20/09/2021
########################################################################
# use cases
# Date: 20/09/2021
# sed -e 's/^/A /g' katg_mcsm_formatted_snps.csv > katg_mcsm_formatted_snps_chain.csv
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 50
# Date: 01/10/2021
# sed -e 's/^/A /g' rpob_mcsm_formatted_snps.csv > rpob_mcsm_formatted_snps_chain.csv
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 50
# Date: 02/10/2021
# sed -e 's/^/A /g' alr_mcsm_formatted_snps.csv > alr_mcsm_formatted_snps_chain.csv
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 50
# Date: 05/10/2021
#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 20
# add .txt to the files
########################################################################

View file

@ -81,9 +81,6 @@ indir = args.input_dir
outdir = args.output_dir
make_dirs = args.make_dirs
#drug = 'streptomycin'
#gene = 'gid'
#%% input and output dirs and files
#=======
# dirs
@ -1373,4 +1370,4 @@ if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0:
print(u'\u2698' * 50,
'\nEnd of script: Data extraction and writing files'
'\n' + u'\u2698' * 50 )
#%% end of script
#%% end of script

0
scripts/deepddg_format.py Normal file → Executable file
View file