add wrapper and mcsm library

This commit is contained in:
Tanushree Tunstall 2020-04-16 17:45:24 +01:00
parent 7aafa72e10
commit e50466da39
6 changed files with 558 additions and 678 deletions

View file

@ -54,11 +54,14 @@ arg_parser.add_argument('-g', '--gene', help='gene name (case sensitive)', defau
args = arg_parser.parse_args()
#=======================================================================
#%% variable assignment: input and output paths & filenames
#drug = 'pyrazinamide'
#gene = 'pncA'
drug = 'cycloserine'
gene = 'alr'
drug = args.drug
gene = args.gene
gene_match = gene + '_p.'
# building cols to extract
dr_muts_col = 'dr_mutations_' + drug
other_muts_col = 'other_mutations_' + drug
@ -82,8 +85,7 @@ datadir = homedir + '/' + 'git/Data'
#=======
in_filename = 'original_tanushree_data_v2.csv'
infile = datadir + '/' + in_filename
print('Input filename: ', in_filename
, '\nInput path: ', datadir
print('Input file: ', infile
, '\n============================================================')
#=======
@ -352,9 +354,8 @@ out_filename0 = gene.lower() + '_common_ids.csv'
outfile0 = outdir + '/' + out_filename0
#FIXME: CHECK line len(common_ids)
print('Writing file: common ids:'
, '\nFilename:', out_filename0
, '\nPath:', outdir
print('Writing file:'
, '\nFile:', outfile0
, '\nExpected no. of rows:', len(common_ids)
, '\n=============================================================')
@ -530,7 +531,7 @@ print('lengths after tidy split and extracting', gene_match, 'muts:',
'\nexpected len:', other_gene_count)
if len(other_gene_WF1) == other_gene_count:
print('PASS: length of dr_gene_WF0 match with expected length'
print('PASS: length matches with expected length'
, '\n===============================================================')
else:
print('FAIL: lengths mismatch'
@ -685,12 +686,12 @@ else:
, '\nmuts should be distinct within dr* and other* type'
, '\ninspecting ...'
, '\n=========================================================')
muts_split = list(gene_LF1.groupby('mutation_info'))
dr_muts = muts_split[0][1].mutation
other_muts = muts_split[1][1].mutation
# print('splitting muts by mut_info:', muts_split)
print('no.of dr_muts samples:', len(dr_muts))
print('no. of other_muts samples', len(other_muts))
muts_split = list(gene_LF1.groupby('mutation_info'))
dr_muts = muts_split[0][1].mutation
other_muts = muts_split[1][1].mutation
print('splitting muts by mut_info:', muts_split)
print('no.of dr_muts samples:', len(dr_muts))
print('no. of other_muts samples', len(other_muts))
#%%
# !!! IMPORTANT !!!!
# sanity check: There should not be any common muts