added
This commit is contained in:
parent
82e2da4f3b
commit
f94eadf1d4
3 changed files with 40 additions and 13 deletions
|
@ -1,11 +1,11 @@
|
||||||
# count numbers for ML
|
# count numbers for ML
|
||||||
|
|
||||||
#source("~/git/LSHTM_analysis/config/alr.R")
|
source("~/git/LSHTM_analysis/config/alr.R")
|
||||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||||
#source("~/git/LSHTM_analysis/config/gid.R")
|
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||||
#source("~/git/LSHTM_analysis/config/pnca.R")
|
#source("~/git/LSHTM_analysis/config/pnca.R")
|
||||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||||
|
|
||||||
#############################
|
#############################
|
||||||
# GET the actual merged dfs
|
# GET the actual merged dfs
|
||||||
|
|
|
@ -450,15 +450,15 @@ def getmldata(gene, drug
|
||||||
|
|
||||||
|
|
||||||
if gene.lower() in geneL_na:
|
if gene.lower() in geneL_na:
|
||||||
gene_affinity_colnames = ['mcsm_na_affinity']
|
gene_affinity_colnames = ['mcsm_na_affinity', 'nca_distance']
|
||||||
#X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols
|
#X_stabilityN = common_cols_stabiltyN + geneL_na_st_cols
|
||||||
cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity']
|
cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity']
|
||||||
cols_to_mask_ppi2 = []
|
cols_to_mask_ppi2 = []
|
||||||
cols_to_mask_na = ['mcsm_na_affinity']
|
cols_to_mask_na = ['mcsm_na_affinity']
|
||||||
|
|
||||||
|
# both
|
||||||
if gene.lower() in geneL_na_ppi2:
|
if gene.lower() in geneL_na_ppi2:
|
||||||
gene_affinity_colnames = ['mcsm_na_affinity'] + ['mcsm_ppi2_affinity', 'interface_dist']
|
gene_affinity_colnames = ['mcsm_na_affinity','nca_distance', 'mcsm_ppi2_affinity', 'interface_dist']
|
||||||
#X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols
|
#X_stabilityN = common_cols_stabiltyN + geneL_na_ppi2_st_cols
|
||||||
#cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity']
|
#cols_to_mask = ['ligand_affinity_change', 'mcsm_na_affinity', 'mcsm_ppi2_affinity']
|
||||||
cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity']
|
cols_to_mask = ['ligand_affinity_change']#, 'mcsm_na_affinity']
|
||||||
|
@ -481,27 +481,40 @@ def getmldata(gene, drug
|
||||||
# mask the mcsm ligand affinity AND mcsm_na affinity columns where ligand distance > 10
|
# mask the mcsm ligand affinity AND mcsm_na affinity columns where ligand distance > 10
|
||||||
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
my_df_ml.loc[(my_df_ml['ligand_distance'] > 10), cols_to_mask] = 0
|
||||||
|
|
||||||
|
#mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask]
|
||||||
|
mask_check_cols = ['mutationinformation', 'ligand_distance'] + cols_to_mask
|
||||||
|
|
||||||
#---------------------------
|
#---------------------------
|
||||||
# mask the mcsm_ppi2_affinity column where interface_dist > 10
|
# mask the mcsm_ppi2_affinity column where interface_dist > 10
|
||||||
#---------------------------
|
#---------------------------
|
||||||
if len(cols_to_mask_ppi2) > 0:
|
if len(cols_to_mask_ppi2) > 0:
|
||||||
my_df_ml.loc[(my_df_ml['interface_dist'] > 10), cols_to_mask_ppi2] = 0
|
my_df_ml.loc[(my_df_ml['interface_dist'] > 10), cols_to_mask_ppi2] = 0
|
||||||
add_cols_mask = ['interface_dist'] + cols_to_mask_ppi2
|
add_cols_mask = ['interface_dist'] + cols_to_mask_ppi2
|
||||||
mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask + add_cols_mask]
|
#mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask + add_cols_mask]
|
||||||
else:
|
mask_check_cols = mask_check_cols + add_cols_mask
|
||||||
mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask ]
|
|
||||||
|
|
||||||
#---------------------------
|
#---------------------------
|
||||||
# mask the na_affinity column where nca_distance > 10
|
# mask the na_affinity column where nca_distance > 10
|
||||||
#---------------------------
|
#---------------------------
|
||||||
if len(cols_to_mask_na) > 0:
|
if len(cols_to_mask_na) > 0:
|
||||||
my_df_ml.loc[(my_df_ml['nca_distance'] > 10), cols_to_mask_na] = 0
|
my_df_ml.loc[(my_df_ml['nca_distance'] > 10), cols_to_mask_na] = 0
|
||||||
add_cols_mask = ['nca_distance'] + cols_to_mask_na
|
add_cols_mask = ['nca_distance'] + cols_to_mask_na
|
||||||
mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask + add_cols_mask]
|
#mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask + add_cols_mask]
|
||||||
else:
|
mask_check_cols = mask_check_cols + add_cols_mask
|
||||||
mask_check = my_df_ml[['mutationinformation', 'ligand_distance'] + cols_to_mask ]
|
|
||||||
|
# if gene.lower() in geneL_na_ppi2:
|
||||||
|
# #---------------------------
|
||||||
|
# # RPOB: mask ppi2 + na + lig cols
|
||||||
|
# #---------------------------
|
||||||
|
# mask_check = my_df_ml[['mutationinformation',
|
||||||
|
# 'ligand_distance', 'ligand_affinity_change',
|
||||||
|
# 'nca_distance','mcsm_na_affinity',
|
||||||
|
# 'mcsm_ppi2_affinity','interface_dist']]
|
||||||
|
|
||||||
|
|
||||||
|
# GET mask data
|
||||||
|
mask_check = my_df_ml[mask_check_cols]
|
||||||
|
|
||||||
# sanity check: check script SANITY_CHECK_mask.py
|
# sanity check: check script SANITY_CHECK_mask.py
|
||||||
if write_maskfile:
|
if write_maskfile:
|
||||||
# write mask file for sanity check
|
# write mask file for sanity check
|
14
scripts/ml/ml_functions/TEST_GetMLData.py
Normal file
14
scripts/ml/ml_functions/TEST_GetMLData.py
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""
|
||||||
|
Created on Thu Sep 1 12:22:27 2022
|
||||||
|
|
||||||
|
@author: tanu
|
||||||
|
"""
|
||||||
|
getmldata(gene = "katG"
|
||||||
|
, drug = "isoniazid"
|
||||||
|
, data_combined_model = False
|
||||||
|
, use_or = False
|
||||||
|
, omit_all_genomic_features = False
|
||||||
|
, write_maskfile = True
|
||||||
|
, write_outfile = False)
|
Loading…
Add table
Add a link
Reference in a new issue