LSHTM_analysis/dynamut/run_format_results_dynamut.py

100 lines
3.9 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 12:15:26 2021
@author: tanu
"""
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# FIXME
# RE RUN when B07 completes!!!! as norm gets affected!
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
#%% load packages
import os
homedir = os.path.expanduser('~')
os.chdir (homedir + '/git/LSHTM_analysis/dynamut')
from format_results_dynamut import *
from format_results_dynamut2 import *
########################################################################
#%% command line args
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug' , help = 'drug name (case sensitive)', default = None)
arg_parser.add_argument('-g', '--gene' , help = 'gene name (case sensitive)', default = None)
arg_parser.add_argument('--datadir' , help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir' , help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
arg_parser.add_argument('-o', '--output_dir', help = 'Output dir for results. By default, it assmes homedir + <drug> + output')
#arg_parser.add_argument('--mkdir_name' , help = 'Output dir for processed results. This will be created if it does not exist')
arg_parser.add_argument('-m', '--make_dirs' , help = 'Make dir for input and output', action='store_true')
arg_parser.add_argument('--debug' , action = 'store_true' , help = 'Debug Mode')
args = arg_parser.parse_args()
#%%============================================================================
# variable assignment: input and output paths & filenames
drug = args.drug
gene = args.gene
datadir = args.datadir
indir = args.input_dir
outdir = args.output_dir
#outdir_dynamut2 = args.mkdir_name
make_dirs = args.make_dirs
#=======
# dirs
#=======
if not datadir:
datadir = homedir + '/git/Data/'
if not indir:
indir = datadir + drug + '/input/'
if not outdir:
outdir = datadir + drug + '/output/'
#if not mkdir_name:
outdir_dynamut = outdir + 'dynamut_results/'
outdir_dynamut2 = outdir + 'dynamut_results/dynamut2/'
# Input file
#infile_dynamut = outdir_dynamut + gene.lower() + '_dynamut_all_output_clean.csv'
infile_dynamut2 = outdir_dynamut2 + gene.lower() + '_dynamut2_output_combined_clean.csv'
# Formatted output filename
outfile_dynamut_f = outdir_dynamut2 + gene + '_dynamut_norm.csv'
outfile_dynamut2_f = outdir_dynamut2 + gene + '_dynamut2_norm.csv'
#%%========================================================================
#===============================
# CALL: format_results_dynamut
# DYNAMUT results
# #===============================
# print('Formatting results for:', infile_dynamut)
# dynamut_df_f = format_dynamut_output(infile_dynamut)
# # writing file
# print('Writing formatted dynamut df to csv')
# dynamut_df_f.to_csv(outfile_dynamut_f, index = False)
# print('Finished writing file:'
# , '\nFile:', outfile_dynamut_f
# , '\nExpected no. of rows:', len(dynamut_df_f)
# , '\nExpected no. of cols:', len(dynamut_df_f.columns)
# , '\n=============================================================')
#===============================
# CALL: format_results_dynamut2
# DYNAMUT2 results
#===============================
print('Formatting results for:', infile_dynamut2)
dynamut2_df_f = format_dynamut2_output(infile_dynamut2) # dynamut2
# writing file
print('Writing formatted dynamut2 df to csv')
dynamut2_df_f.to_csv(outfile_dynamut2_f, index = False)
print('Finished writing file:'
, '\nFile:', outfile_dynamut2_f
, '\nExpected no. of rows:', len(dynamut2_df_f)
, '\nExpected no. of cols:', len(dynamut2_df_f.columns)
, '\n=============================================================')
#%%#####################################################################