updated kd.py to relfect a merging col for combining num params later
This commit is contained in:
parent
de1822f491
commit
87a847109a
3 changed files with 137 additions and 82 deletions
|
@ -10,16 +10,7 @@ Created on Tue Aug 6 12:56:03 2019
|
|||
# concentrate on positions that have structural info?
|
||||
|
||||
# FIXME: import dirs.py to get the basic dir paths available
|
||||
|
||||
#%% load libraries
|
||||
import os, sys
|
||||
import pandas as pd
|
||||
#import numpy as np
|
||||
|
||||
#from pandas.api.types import is_string_dtype
|
||||
#from pandas.api.types import is_numeric_dtype
|
||||
|
||||
#========================================================
|
||||
#=======================================================================
|
||||
# TASK: extract ALL pncA_p. mutations from GWAS data
|
||||
# Input data file has the following format: each row = unique sample id
|
||||
# id,country,lineage,sublineage,drtype,pyrazinamide,dr_mutations_pyrazinamide,other_mutations_pyrazinamide...
|
||||
|
@ -38,46 +29,58 @@ import pandas as pd
|
|||
# 3) pnca_metadata.csv
|
||||
# 4) pnca_all_muts_msa.csv
|
||||
# 5) pnca_mutational_positons.csv
|
||||
#========================================================
|
||||
#=======================================================================
|
||||
#%% load libraries
|
||||
import os, sys
|
||||
import pandas as pd
|
||||
#import numpy as np
|
||||
|
||||
#from pandas.api.types import is_string_dtype
|
||||
#from pandas.api.types import is_numeric_dtype
|
||||
|
||||
#%% specify homedir as python doesn't recognise tilde
|
||||
homedir = os.path.expanduser('~')
|
||||
|
||||
# my working dir
|
||||
# set working dir
|
||||
os.getcwd()
|
||||
os.chdir(homedir + '/git/LSHTM_analysis/meta_data_analysis')
|
||||
os.getcwd()
|
||||
|
||||
# import aa dict
|
||||
from reference_dict import my_aa_dict #CHECK DIR STRUC THERE!
|
||||
#========================================================
|
||||
|
||||
#=======================================================================
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
gene_match = gene + '_p.'
|
||||
|
||||
#==========
|
||||
# input dir
|
||||
#==========
|
||||
#=======
|
||||
# data dir
|
||||
#=======
|
||||
#indir = 'git/Data/pyrazinamide/input/original'
|
||||
indir = homedir + '/' + 'git/Data'
|
||||
datadir = homedir + '/' + 'git/Data'
|
||||
|
||||
#===========
|
||||
# output dir
|
||||
#===========
|
||||
#=======
|
||||
# input
|
||||
#=======
|
||||
#indir = 'git/Data/pyrazinamide/input/original'
|
||||
in_filename = 'original_tanushree_data_v2.csv'
|
||||
infile = datadir + '/' + in_filename
|
||||
print('Input filename: ', in_filename
|
||||
, '\nInput path: ', indir)
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
# several output files
|
||||
# output filenames in respective sections at the time of outputting files
|
||||
#outdir = 'git/Data/pyrazinamide/output'
|
||||
outdir = homedir + '/' + 'git/Data' + '/' + drug + '/' + 'output'
|
||||
outdir = datadir + '/' + drug + '/' + 'output'
|
||||
print('Output filename: in the respective sections'
|
||||
, '\nOutput path: ', outdir)
|
||||
|
||||
#%%end of variable assignment for input and output files
|
||||
#==============================================================================
|
||||
#%% Read files
|
||||
|
||||
in_filename = 'original_tanushree_data_v2.csv'
|
||||
infile = indir + '/' + in_filename
|
||||
print('Reading input master file:', infile)
|
||||
|
||||
#=======================================================================
|
||||
#%% Read input file
|
||||
master_data = pd.read_csv(infile, sep = ',')
|
||||
|
||||
# column names
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue