81 lines
2 KiB
Python
81 lines
2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Thu Mar 3 17:08:18 2022
|
|
|
|
@author: tanu
|
|
"""
|
|
#%% load packages
|
|
import sys, os
|
|
import pandas as pd
|
|
from pandas import DataFrame
|
|
import numpy as np
|
|
import argparse
|
|
from functools import reduce
|
|
#%%
|
|
homedir = os.path.expanduser("~")
|
|
os.chdir(homedir + "/git/ML_AI_training/test_data")
|
|
|
|
#gene = ''
|
|
#drug = ''
|
|
|
|
#==============
|
|
# directories
|
|
#==============
|
|
datadir = homedir + '/git/Data/'
|
|
indir = datadir + drug + '/input/'
|
|
outdir = datadir + drug + '/output/'
|
|
|
|
# gene_baiscL = ['pnca']
|
|
# geneL_naL = ['gid', 'rpob']
|
|
# geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob']
|
|
|
|
#=======
|
|
# input
|
|
#=======
|
|
infile_ml1 = outdir + gene.lower() + '_merged_df3.csv'
|
|
#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
|
|
|
|
my_df = pd.read_csv(infile_ml1)
|
|
|
|
my_df.dtypes
|
|
my_df_cols = my_df.columns
|
|
|
|
#%%============================================================================
|
|
# GET Y
|
|
drug_labels = drug + '_labels'
|
|
drug_labels
|
|
my_df[drug_labels] = my_df[drug]
|
|
my_df[drug_labels].value_counts()
|
|
my_df[drug_labels] = my_df[drug].map({1: 'resistant', 0: 'sensitive'})
|
|
my_df[drug_labels].value_counts()
|
|
my_df[drug_labels] = my_df[drug_labels].fillna('unknown')
|
|
my_df[drug_labels].value_counts()
|
|
|
|
mutC = my_df[[ 'mutationinformation']].count()
|
|
|
|
target1C = my_df['mutation_info_labels'].value_counts()
|
|
|
|
target2C = my_df[drug_labels].value_counts()
|
|
#target2C.index = target2C.index.to_series().map({1: 'resistant', 0: 'sensitive'})
|
|
|
|
target3C = my_df['drtype'].value_counts()
|
|
|
|
targetsC = pd.concat([mutC, target1C, target2C, target3C])
|
|
targetsC
|
|
|
|
# targetsC2 = pd.concat([mutC, target1C, target2C
|
|
# #, target3C
|
|
# ], axis = 1)
|
|
# targetsC2
|
|
|
|
#%% try combinations
|
|
# X_vars = X_stability
|
|
# X_vars = X_evol
|
|
# X_vars = X_str
|
|
|
|
# X_vars = pd.concat([X_stability, X_evol, X_str], axis = 1)
|
|
# X_vars = pd.concat([X_stability, X_evol], axis = 1)
|
|
# X_vars = pd.concat([X_stability, X_str], axis = 1)
|
|
# X_vars = pd.concat([X_evol, X_str], axis = 1)
|
|
|