diff --git a/dynamut/format_results_dynamut.py b/dynamut/format_results_dynamut.py old mode 100644 new mode 100755 diff --git a/dynamut/format_results_dynamut2.py b/dynamut/format_results_dynamut2.py old mode 100644 new mode 100755 diff --git a/dynamut/run_format_results_dynamut.py b/dynamut/run_format_results_dynamut.py old mode 100644 new mode 100755 index 02af524..dd9f7fb --- a/dynamut/run_format_results_dynamut.py +++ b/dynamut/run_format_results_dynamut.py @@ -20,8 +20,45 @@ from format_results_dynamut2 import * # variables # TODO: add cmd line args -gene = 'gid' -drug = 'streptomycin' +#gene = +#drug = + +#%% command line args +arg_parser = argparse.ArgumentParser() +arg_parser.add_argument('-d', '--drug', help='drug name (case sensitive)', default = None) +arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', default = None) +arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') +arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') +arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + + output') +#arg_parser.add_argument('-m', '--make_dirs', help = 'Make dir for input and output', action='store_true') # should be handled elsewhere! + +arg_parser.add_argument('--debug', action ='store_true', help = 'Debug Mode') + +args = arg_parser.parse_args() +#======================================================================= +#%% variable assignment: input and output paths & filenames +drug = args.drug +gene = args.gene +datadir = args.datadir +indir = args.input_dir +outdir = args.output_dir +#make_dirs = args.make_dirs + +#%% input and output dirs and files +#======= +# dirs +#======= +if not datadir: + datadir = homedir + '/' + 'git/Data' + +if not indir: + indir = datadir + '/' + drug + '/input' + +if not outdir: + outdir = datadir + '/' + drug + '/output' + +#%%===================================================================== + datadir = homedir + '/git/Data' indir = datadir + '/' + drug + '/input' outdir = datadir + '/' + drug + '/output' @@ -29,12 +66,12 @@ outdir_dynamut = outdir + '/dynamut_results/' outdir_dynamut2 = outdir + '/dynamut_results/dynamut2/' # Input file -infile_dynamut = outdir_dynamut + gene + '_dynamut_all_output_clean.csv' +#infile_dynamut = outdir_dynamut + gene + '_dynamut_all_output_clean.csv' infile_dynamut2 = outdir_dynamut2 + gene + '_dynamut2_output_combined_clean.csv' # Formatted output filename -outfile_dynamut_f = outdir_dynamut2 + gene + '_complex_dynamut_norm.csv' -outfile_dynamut2_f = outdir_dynamut2 + gene + '_complex_dynamut2_norm.csv' +#outfile_dynamut_f = outdir_dynamut2 + gene + '_dynamut_norm.csv' +outfile_dynamut2_f = outdir_dynamut2 + gene + '_dynamut2_norm.csv' #=============================== # CALL: format_results_dynamut @@ -69,4 +106,4 @@ print('Finished writing file:' , '\nExpected no. of cols:', len(dynamut2_df_f.columns) , '\n=============================================================') -#%%##################################################################### \ No newline at end of file +#%%##################################################################### diff --git a/dynamut/run_get_results_dynamut.py b/dynamut/run_get_results_dynamut.py index e9e82ef..029e934 100755 --- a/dynamut/run_get_results_dynamut.py +++ b/dynamut/run_get_results_dynamut.py @@ -17,8 +17,8 @@ my_host = 'http://biosig.unimelb.edu.au' #headers = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} # TODO: add cmd line args -#gene = 'gid' -drug = 'streptomycin' +# gene = +# drug = datadir = homedir + '/git/Data/' indir = datadir + drug + '/input/' outdir = datadir + drug + '/output/' @@ -41,4 +41,4 @@ get_results(url_file = my_url_file , output_dir = outdir , outfile_suffix = my_suffix) -######################################################################## \ No newline at end of file +######################################################################## diff --git a/dynamut/split_csv_chain.sh b/dynamut/split_csv_chain.sh index 2526b3f..ac60faa 100755 --- a/dynamut/split_csv_chain.sh +++ b/dynamut/split_csv_chain.sh @@ -13,10 +13,25 @@ CHUNK=$3 mkdir -p ${OUTDIR}/${CHUNK}/chain_added cd ${OUTDIR}/${CHUNK}/chain_added -# makes the 2 dirs, hence ../.. +# makes the 3 dirs, hence ../.. split ../../../${INFILE} -l ${CHUNK} -d snp_batch_ -# use case -#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps.csv snp_batches 50 #Date: 20/09/2021 +######################################################################## +# use cases +# Date: 20/09/2021 +# sed -e 's/^/A /g' katg_mcsm_formatted_snps.csv > katg_mcsm_formatted_snps_chain.csv +#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh katg_mcsm_formatted_snps_chain.csv snp_batches 50 +# Date: 01/10/2021 +# sed -e 's/^/A /g' rpob_mcsm_formatted_snps.csv > rpob_mcsm_formatted_snps_chain.csv +#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 50 + +# Date: 02/10/2021 +# sed -e 's/^/A /g' alr_mcsm_formatted_snps.csv > alr_mcsm_formatted_snps_chain.csv +#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 50 + +# Date: 05/10/2021 +#~/git/LSHTM_analysis/dynamut/split_csv_chain.sh alr_mcsm_formatted_snps_chain.csv snp_batches 20 + # add .txt to the files +######################################################################## diff --git a/scripts/data_extraction.py b/scripts/data_extraction.py index 5582632..31f8a27 100755 --- a/scripts/data_extraction.py +++ b/scripts/data_extraction.py @@ -81,9 +81,6 @@ indir = args.input_dir outdir = args.output_dir make_dirs = args.make_dirs -#drug = 'streptomycin' -#gene = 'gid' - #%% input and output dirs and files #======= # dirs @@ -1373,4 +1370,4 @@ if dr_muts.isin(other_muts).sum() & other_muts.isin(dr_muts).sum() > 0: print(u'\u2698' * 50, '\nEnd of script: Data extraction and writing files' '\n' + u'\u2698' * 50 ) -#%% end of script \ No newline at end of file +#%% end of script diff --git a/scripts/deepddg_format.py b/scripts/deepddg_format.py old mode 100644 new mode 100755