added count for targets for all genes and ran multiple classification models for all of the genes and target as a start
This commit is contained in:
parent
89158bc669
commit
877862acb7
8 changed files with 948 additions and 0 deletions
81
my_data_target_counts.py
Normal file
81
my_data_target_counts.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Created on Thu Mar 3 17:08:18 2022
|
||||
|
||||
@author: tanu
|
||||
"""
|
||||
#%% load packages
|
||||
import sys, os
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import numpy as np
|
||||
import argparse
|
||||
from functools import reduce
|
||||
#%%
|
||||
homedir = os.path.expanduser("~")
|
||||
os.chdir(homedir + "/git/ML_AI_training/test_data")
|
||||
|
||||
#gene = ''
|
||||
#drug = ''
|
||||
|
||||
#==============
|
||||
# directories
|
||||
#==============
|
||||
datadir = homedir + '/git/Data/'
|
||||
indir = datadir + drug + '/input/'
|
||||
outdir = datadir + drug + '/output/'
|
||||
|
||||
# gene_baiscL = ['pnca']
|
||||
# geneL_naL = ['gid', 'rpob']
|
||||
# geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob']
|
||||
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
infile_ml1 = outdir + gene.lower() + '_merged_df3.csv'
|
||||
#infile_ml2 = outdir + gene.lower() + '_merged_df2.csv'
|
||||
|
||||
my_df = pd.read_csv(infile_ml1)
|
||||
|
||||
my_df.dtypes
|
||||
my_df_cols = my_df.columns
|
||||
|
||||
#%%============================================================================
|
||||
# GET Y
|
||||
drug_labels = drug + '_labels'
|
||||
drug_labels
|
||||
my_df[drug_labels] = my_df[drug]
|
||||
my_df[drug_labels].value_counts()
|
||||
my_df[drug_labels] = my_df[drug].map({1: 'resistant', 0: 'sensitive'})
|
||||
my_df[drug_labels].value_counts()
|
||||
my_df[drug_labels] = my_df[drug_labels].fillna('unknown')
|
||||
my_df[drug_labels].value_counts()
|
||||
|
||||
mutC = my_df[[ 'mutationinformation']].count()
|
||||
|
||||
target1C = my_df['mutation_info_labels'].value_counts()
|
||||
|
||||
target2C = my_df[drug_labels].value_counts()
|
||||
#target2C.index = target2C.index.to_series().map({1: 'resistant', 0: 'sensitive'})
|
||||
|
||||
target3C = my_df['drtype'].value_counts()
|
||||
|
||||
targetsC = pd.concat([mutC, target1C, target2C, target3C])
|
||||
targetsC
|
||||
|
||||
# targetsC2 = pd.concat([mutC, target1C, target2C
|
||||
# #, target3C
|
||||
# ], axis = 1)
|
||||
# targetsC2
|
||||
|
||||
#%% try combinations
|
||||
# X_vars = X_stability
|
||||
# X_vars = X_evol
|
||||
# X_vars = X_str
|
||||
|
||||
# X_vars = pd.concat([X_stability, X_evol, X_str], axis = 1)
|
||||
# X_vars = pd.concat([X_stability, X_evol], axis = 1)
|
||||
# X_vars = pd.concat([X_stability, X_str], axis = 1)
|
||||
# X_vars = pd.concat([X_evol, X_str], axis = 1)
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue