much development

This commit is contained in:
Tanushree Tunstall 2021-10-28 10:41:43 +01:00
parent 873fd3a121
commit 057291a561
5 changed files with 266 additions and 89 deletions

View file

@ -117,12 +117,20 @@ deepddg_df['deepddg_outcome'].value_counts()
len(deepddg_df.loc[deepddg_df['deepddg'] < 0])
len(deepddg_df.loc[deepddg_df['deepddg'] >= 0])
#----------------------------------------------
# drop extra columns to allow clean merging
deepddg_short_df = deepddg_df.drop(['chain_id', 'wild_type_deepddg', 'position', 'mutant_type_deepddg'], axis = 1)
#----------------------------------------------
#deepddg_short_df = deepddg_df.drop(['chain_id', 'wild_type_deepddg', 'position', 'mutant_type_deepddg'], axis = 1)
#----------------------------------------------
# embb (where gene-target has > 1 chain)
# include chain else the numbering will be messed up!
#----------------------------------------------
deepddg_short_df = deepddg_df.drop(['wild_type_deepddg', 'position', 'mutant_type_deepddg'], axis = 1)
# rearrange columns
deepddg_short_df.columns
deepddg_short_df = deepddg_short_df[["mutationinformation", "deepddg", "deepddg_outcome"]]
deepddg_short_df = deepddg_short_df[["chain_id", "mutationinformation", "deepddg", "deepddg_outcome"]]
#%% combine with mcsm snps
deepddg_mcsm_muts_dfs = pd.merge(deepddg_short_df