#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Mar 3 17:08:18 2022 @author: tanu """ #%% load packages import sys, os import pandas as pd from pandas import DataFrame import numpy as np import argparse from functools import reduce #%% homedir = os.path.expanduser("~") os.chdir(homedir + "/git/ML_AI_training/test_data") #gene = '' #drug = '' #============== # directories #============== datadir = homedir + '/git/Data/' indir = datadir + drug + '/input/' outdir = datadir + drug + '/output/' # gene_baiscL = ['pnca'] # geneL_naL = ['gid', 'rpob'] # geneL_ppi2L = ['alr', 'embb', 'katg', 'rpob'] #======= # input #======= infile_ml1 = outdir + gene.lower() + '_merged_df3.csv' #infile_ml2 = outdir + gene.lower() + '_merged_df2.csv' my_df = pd.read_csv(infile_ml1) my_df.dtypes my_df_cols = my_df.columns #%%============================================================================ # GET Y drug_labels = drug + '_labels' drug_labels my_df[drug_labels] = my_df[drug] my_df[drug_labels].value_counts() my_df[drug_labels] = my_df[drug].map({1: 'resistant', 0: 'sensitive'}) my_df[drug_labels].value_counts() my_df[drug_labels] = my_df[drug_labels].fillna('unknown') my_df[drug_labels].value_counts() mutC = my_df[[ 'mutationinformation']].count() target1C = my_df['mutation_info_labels'].value_counts() target2C = my_df[drug_labels].value_counts() #target2C.index = target2C.index.to_series().map({1: 'resistant', 0: 'sensitive'}) target3C = my_df['drtype'].value_counts() targetsC = pd.concat([mutC, target1C, target2C, target3C]) targetsC # targetsC2 = pd.concat([mutC, target1C, target2C # #, target3C # ], axis = 1) # targetsC2 #%% try combinations # X_vars = X_stability # X_vars = X_evol # X_vars = X_str # X_vars = pd.concat([X_stability, X_evol, X_str], axis = 1) # X_vars = pd.concat([X_stability, X_evol], axis = 1) # X_vars = pd.concat([X_stability, X_str], axis = 1) # X_vars = pd.concat([X_evol, X_str], axis = 1)