saving work after running combining_dfs.py

This commit is contained in:
Tanushree Tunstall 2021-11-12 14:16:48 +00:00
parent dad8f526a2
commit 4eaa0b5d2b
7 changed files with 136 additions and 92 deletions

View file

@ -22,12 +22,11 @@ from pandas.api.types import is_numeric_dtype
sys.path.append(homedir + '/git/LSHTM_analysis/scripts')
from reference_dict import up_3letter_aa_dict
from reference_dict import oneletter_aa_dict
#%%#####################################################################
#%%============================================================================
def format_mcsm_ppi2_output(mcsm_ppi2_output_csv):
"""
@param mcsm_ppi2_output_csv: file containing mcsm_ppi2_results for all muts
@param mcsm_ppi2_output_csv: file containing mcsm_ppi2_results for all mcsm snps
which is the result of combining all mcsm_ppi2 batch results, and using
bash scripts to combine all the batch results into one file.
Formatting df to a pandas df and output as csv.

View file

@ -19,19 +19,22 @@ arg_parser.add_argument('-g', '--gene' , help = 'gene name (case sensitive)
arg_parser.add_argument('--datadir' , help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir' , help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
arg_parser.add_argument('--input_file' , help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
#arg_parser.add_argument('--mkdir_name' , help = 'Output dir for processed results. This will be created if it does not exist')
arg_parser.add_argument('-m', '--make_dirs' , help = 'Make dir for input and output', action='store_true')
arg_parser.add_argument('--debug' , action = 'store_true' , help = 'Debug Mode')
args = arg_parser.parse_args()
#%%============================================================================
# variable assignment: input and output paths & filenames
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
infile_mcsm_ppi2 = args.input_file
#outdir_ppi2 = args.mkdir_name
make_dirs = args.make_dirs
@ -53,7 +56,8 @@ if not outdir:
outdir_ppi2 = outdir + 'mcsm_ppi2/'
# Input file
infile_mcsm_ppi2 = outdir_ppi2 + gene.lower() + '_output_combined_clean.csv'
if not infile_mcsm_ppi2:
infile_mcsm_ppi2 = outdir_ppi2 + gene.lower() + '_output_combined_clean.csv'
# Formatted output file
outfile_mcsm_ppi2_f = outdir_ppi2 + gene.lower() + '_complex_mcsm_ppi2_norm.csv'