diff --git a/scripts/combining_dfs.py b/scripts/combining_dfs.py index 53361c7..2573f5d 100755 --- a/scripts/combining_dfs.py +++ b/scripts/combining_dfs.py @@ -116,6 +116,7 @@ if not outdir: #======= gene_list_normal = ["pnca", "katg", "rpob", "alr"] +#FIXME: for gid, this should be SRY as this is the drug...please check!!!! if gene.lower() == "gid": print("\nReading mCSM file for gene:", gene) in_filename_mcsm = gene.lower() + '_complex_mcsm_norm_SAM.csv' @@ -178,6 +179,8 @@ if gene.lower() in geneL_dy_na : infile_mcsm_na = outdir + 'mcsm_na_results/' + infilename_mcsm_na mcsm_na_df = pd.read_csv(infile_mcsm_na, sep = ',') +# FIXME: ppi2, not extracted as expected for alr +# TODO: get mcsm_ppi2 data for alr # ONLY:for gene embb and alr: End logic should pick this up! geneL_ppi2 = ["embb", "alr"] #if gene.lower() == "embb" or "alr": @@ -336,6 +339,7 @@ if len(deepddg_df.loc[:,'chain_id'].value_counts()) > 1: , "\nChains:", deepddg_df.loc[:,'chain_id'].value_counts().index) #-------------------------- +# FIXME: This needs to happen BEFORE scaling as it will vary # subset chain #-------------------------- if gene.lower() == "embb": @@ -709,4 +713,4 @@ combined_all_params.to_csv(outfile_comb, index = False) print('\nFinished writing file:' , '\nNo. of rows:', combined_all_params.shape[0] , '\nNo. of cols:', combined_all_params.shape[1]) -#%% end of script \ No newline at end of file +#%% end of script