diff --git a/mcsm/mcsm.py b/mcsm/mcsm.py index 817089c..f992c04 100644 --- a/mcsm/mcsm.py +++ b/mcsm/mcsm.py @@ -243,7 +243,7 @@ def format_mcsm_output(mcsm_outputcsv): mcsm_data = mcsm_data.drop_duplicates(['mutationinformation']) print('Dim of data after removing duplicate muts:', mcsm_data.shape , '\n===========================================================') -#%%===================================================================== +#%%===================================================================== ############# # Create col: duet_outcome ############# diff --git a/mcsm/run_mcsm.py b/mcsm/run_mcsm.py index 9bfd140..bd84e28 100755 --- a/mcsm/run_mcsm.py +++ b/mcsm/run_mcsm.py @@ -79,13 +79,13 @@ gene_match = gene + '_p.' # directories #============ if not datadir: - datadir = homedir + '/' + 'git/Data' + datadir = homedir + '/git/Data/' if not indir: - indir = datadir + '/' + drug + '/input' + indir = datadir + drug + 'input/' if not outdir: - outdir = datadir + '/' + drug + '/output' + outdir = datadir + drug + 'output/' #======= # input @@ -95,7 +95,7 @@ if pdb_filename: else: in_filename_pdb = gene.lower() + '_complex.pdb' -infile_pdb = indir + '/' + in_filename_pdb +infile_pdb = indir + in_filename_pdb #in_filename_snps = gene.lower() + '_mcsm_snps.csv' #(outfile_mcsm_snps, from data_extraction.py) #infile_snps = outdir + '/' + in_filename_snps @@ -104,8 +104,8 @@ if mutation_filename: in_filename_snps = mutation_filename else: in_filename_snps = gene.lower() + '_mcsm_formatted_snps.csv' - -infile_snps = outdir + '/' + in_filename_snps + +infile_snps = outdir + in_filename_snps #======= # output @@ -113,13 +113,13 @@ infile_snps = outdir + '/' + in_filename_snps # mcsm_results globals if not result_urls: result_urls_filename = gene.lower() + '_result_urls.txt' - result_urls = outdir + '/' + result_urls_filename + result_urls = outdir + result_urls_filename if DEBUG: print('DEBUG: Result URLs:', result_urls) if not mcsm_output: mcsm_output_filename = gene.lower() + '_mcsm_output.csv' - mcsm_output = outdir + '/' + mcsm_output_filename + mcsm_output = outdir + mcsm_output_filename if DEBUG: print('DEBUG: mCSM output CSV file:', mcsm_output) @@ -127,7 +127,7 @@ if not mcsm_output: #out_filename_format = gene.lower() + '_mcsm_processed.csv' if not outfile_format: out_filename_format = gene.lower() + '_complex_mcsm_norm.csv' - outfile_format = outdir + '/' + out_filename_format + outfile_format = outdir + out_filename_format if DEBUG: print('DEBUG: formatted CSV output:', outfile_format) #%%===================================================================== diff --git a/mcsm_na/examples.py b/mcsm_na/examples.py old mode 100644 new mode 100755 diff --git a/mcsm_na/format_results_mcsm_na.py b/mcsm_na/format_results_mcsm_na.py old mode 100644 new mode 100755 diff --git a/mcsm_na/get_results_mcsm_na.py b/mcsm_na/get_results_mcsm_na.py old mode 100644 new mode 100755 diff --git a/mcsm_na/run_format_results_mcsm_na.py b/mcsm_na/run_format_results_mcsm_na.py old mode 100644 new mode 100755 index a5886b4..b2175f5 --- a/mcsm_na/run_format_results_mcsm_na.py +++ b/mcsm_na/run_format_results_mcsm_na.py @@ -52,15 +52,16 @@ if not outdir: outdir_na = outdir + 'mcsm_na_results/' # Input file -infile_mcsm_na = outdir_na + gene + '_output_combined_clean.tsv' +infile_mcsm_na = outdir_na + gene.lower() + '_output_combined_clean.tsv' # Formatted output file -outfile_mcsm_na_f = outdir_na + gene + '_complex_mcsm_na_norm.csv' +outfile_mcsm_na_f = outdir_na + gene.lower() + '_complex_mcsm_na_norm.csv' -#========================== +#=========================================== # CALL: format_results_mcsm_na() # Data: gid+streptomycin -#========================== +# Data: rpob+rifampicin, date: 18/11/2021 +#=========================================== print('Formatting results for:', infile_mcsm_na) mcsm_na_df_f = format_mcsm_na_output(mcsm_na_output_tsv = infile_mcsm_na) diff --git a/mcsm_na/run_submit_mcsm_na.py b/mcsm_na/run_submit_mcsm_na.py index 8f7ed90..4de2455 100755 --- a/mcsm_na/run_submit_mcsm_na.py +++ b/mcsm_na/run_submit_mcsm_na.py @@ -18,14 +18,14 @@ print(my_prediction_url) # TODO: add cmd line args #gene = 'gid' -drug = 'streptomycin' -datadir = homedir + '/git/Data' -indir = datadir + '/' + drug + '/input' -outdir = datadir + '/' + drug + '/output' -outdir_mcsm_na = outdir + 'mcsm_na_results' +drug = '' +datadir = homedir + '/git/Data/' +indir = datadir + drug + 'input/' +outdir = datadir + drug + 'output/' +outdir_mcsm_na = outdir + 'mcsm_na_results/' my_nuc_type = 'RNA' -my_pdb_file = indir + '/gid_complex.pdb' +my_pdb_file = indir + gene.lower() + '_complex.pdb' #============================================================================= # batch 26: 25.txt # RAN: 16 Feb: diff --git a/mcsm_na/split_csv.sh b/mcsm_na/split_csv.sh new file mode 100755 index 0000000..89ccd6d --- /dev/null +++ b/mcsm_na/split_csv.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# FIXME: This is written for expediency to kickstart running dynamut and mcsm-NA + +# Usage: ~/git/LSHTM_analysis/dynamut/split_csv.sh +# copy your snp file to split into the mcsm_na dir + +INFILE=$1 +OUTDIR=$2 +CHUNK=$3 + +mkdir -p ${OUTDIR}/${CHUNK} +cd ${OUTDIR}/${CHUNK} + +split ../../${INFILE} -l ${CHUNK} -d snp_batch_ + +# use case +#~/git/LSHTM_analysis/mcsm_na/split_csv.sh gid_mcsm_formatted_snps.csv snp_batches 50 +#~/git/LSHTM_analysis/mcsm_na/split_csv.sh embb_mcsm_formatted_snps.csv snp_batches 50 + + +#~/git/LSHTM_analysis/mcsm_na/split_csv.sh rpob_mcsm_formatted_snps_chain.csv snp_batches 20 # date: 17/11/2021 + + +#acccidently replaced file original rpob batches + +#~/git/LSHTM_analysis/mcsm_na/split_csv.sh 5uhc_mcsm_formatted_snps_chain.csv snp_batches_5uhc 20 # date: 17/11/2021 diff --git a/mcsm_na/submit_mcsm_na.py b/mcsm_na/submit_mcsm_na.py old mode 100644 new mode 100755 diff --git a/scripts/combining_dfs.py b/scripts/combining_dfs.py index cb47d50..e6ea6cc 100755 --- a/scripts/combining_dfs.py +++ b/scripts/combining_dfs.py @@ -122,7 +122,9 @@ if gene.lower() == "gid": in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SRY.csv' # was incorrectly SAM previously if gene.lower() == "embb": print("\nReading mCSM file for gene:", gene) - in_filename_mcsm = gene.lower() + '_complex_mcsm_norm1.csv' + #in_filename_mcsm = gene.lower() + '_complex_mcsm_norm1.csv' #798 + in_filename_mcsm = gene.lower() + '_complex_mcsm_norm2.csv' #844 + if gene.lower() in gene_list_normal: print("\nReading mCSM file for gene:", gene) in_filename_mcsm = gene.lower() + '_complex_mcsm_norm.csv'