various changes
This commit is contained in:
parent
c72269dcd1
commit
e4608342a4
3 changed files with 199 additions and 95 deletions
|
@ -81,16 +81,16 @@ gene = args.gene
|
|||
gene_match = gene + '_p.'
|
||||
print('mut pattern for gene', gene, ':', gene_match)
|
||||
|
||||
nssnp_match = gene_match +'[A-Z]{3}[0-9]+[A-Z]{3}'
|
||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
||||
|
||||
wt_regex = gene_match.lower()+'(\w{3})'
|
||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||
print('wt regex:', wt_regex)
|
||||
|
||||
mut_regex = r'\d+(\w{3})$'
|
||||
mut_regex = r'[0-9]+(\w{3})$'
|
||||
print('mt regex:', mut_regex)
|
||||
|
||||
pos_regex = r'(\d+)'
|
||||
pos_regex = r'([0-9]+)'
|
||||
print('position regex:', pos_regex)
|
||||
|
||||
# building cols to extract
|
||||
|
@ -154,30 +154,29 @@ if in_filename_master == 'original_tanushree_data_v2.csv':
|
|||
else:
|
||||
core_cols = ['id'
|
||||
, 'sample'
|
||||
, 'patient_id'
|
||||
, 'strain'
|
||||
#, 'patient_id'
|
||||
#, 'strain'
|
||||
, 'lineage'
|
||||
, 'sublineage'
|
||||
, 'country'
|
||||
#, 'country'
|
||||
, 'country_code'
|
||||
, 'geographic_source'
|
||||
#, 'region'
|
||||
, 'location'
|
||||
, 'host_body_site'
|
||||
, 'environment_material'
|
||||
, 'host_status'
|
||||
, 'host_sex'
|
||||
, 'submitted_host_sex'
|
||||
, 'hiv_status'
|
||||
, 'HIV_status'
|
||||
, 'tissue_type'
|
||||
, 'isolation_source'
|
||||
#, 'location'
|
||||
#, 'host_body_site'
|
||||
#, 'environment_material'
|
||||
#, 'host_status'
|
||||
#, 'host_sex'
|
||||
#, 'submitted_host_sex'
|
||||
#, 'hiv_status'
|
||||
#, 'HIV_status'
|
||||
#, 'tissue_type'
|
||||
#, 'isolation_source'
|
||||
, resistance_col]
|
||||
|
||||
variable_based_cols = [drug
|
||||
, dr_muts_col
|
||||
, other_muts_col]
|
||||
#, resistance_col]
|
||||
|
||||
cols_to_extract = core_cols + variable_based_cols
|
||||
print('Extracting', len(cols_to_extract), 'columns from master data')
|
||||
|
@ -200,7 +199,7 @@ print('No. of NAs/column:' + '\n', meta_data.isna().sum()
|
|||
|
||||
#%% Write check file
|
||||
check_file = outdir + '/' + gene.lower() + '_gwas.csv'
|
||||
meta_data.to_csv(check_file)
|
||||
meta_data.to_csv(check_file, index = False)
|
||||
print('Writing subsetted gwas data'
|
||||
, '\nFile', check_file
|
||||
, '\nDim:', meta_data.shape)
|
||||
|
@ -215,9 +214,9 @@ print('Writing subsetted gwas data'
|
|||
# drug counts: complete samples for OR calcs
|
||||
meta_data[drug].value_counts()
|
||||
print('===========================================================\n'
|
||||
, 'RESULT: No. of Sus and Res samples:\n', meta_data[drug].value_counts()
|
||||
, 'RESULT: No. of Sus and Res', drug, 'samples:\n', meta_data[drug].value_counts()
|
||||
, '\n===========================================================\n'
|
||||
, 'RESULT: Percentage of Sus and Res samples:\n', meta_data[drug].value_counts(normalize = True)*100
|
||||
, 'RESULT: Percentage of Sus and Res', drug, 'samples:\n', meta_data[drug].value_counts(normalize = True)*100
|
||||
, '\n===========================================================')
|
||||
|
||||
#%%
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue