saving work before adding files
This commit is contained in:
parent
0e0f7c89df
commit
926d181120
3 changed files with 47 additions and 24 deletions
|
@ -54,13 +54,26 @@ os.getcwd()
|
|||
# FIXME: local imports
|
||||
#from combining import combine_dfs_with_checks
|
||||
from combining_FIXME import detect_common_cols
|
||||
from reference_dict import oneletter_aa_dict # CHECK DIR STRUC THERE!
|
||||
from reference_dict import low_3letter_dict # CHECK DIR STRUC THERE!
|
||||
from reference_dict import oneletter_aa_dict
|
||||
from reference_dict import low_3letter_dict
|
||||
|
||||
from aa_code import get_aa_3lower
|
||||
from aa_code import get_aa_1upper
|
||||
|
||||
# REGEX: as required
|
||||
# mcsm_regex = r'^([A-Za-z]{1})([0-9]+)([A-Za-z]{1})$'
|
||||
# mcsm_wt = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[0]
|
||||
# mcsm_mut = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[2]
|
||||
|
||||
# gwas_regex = r'^([A-Za-z]{3})([0-9]+)([A-Za-z]{3})$'
|
||||
# gwas_wt = mcsm_df['mutation'].str.extract(gwas_regex)[0]
|
||||
# gwas_pos = mcsm_df['mutation'].str.extract(gwas_regex)[1]
|
||||
# gwas_mut = mcsm_df['mutation'].str.extract(gwas_regex)[2]
|
||||
#=======================================================================
|
||||
#%% command line args: case sensitive
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument('-d', '--drug', help='drug name', default = '')
|
||||
arg_parser.add_argument('-g', '--gene', help='gene name', default = '')
|
||||
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
|
||||
arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '')
|
||||
|
||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||
|
@ -83,17 +96,19 @@ outdir = args.output_dir
|
|||
gene_match = gene + '_p.'
|
||||
print('mut pattern for gene', gene, ':', gene_match)
|
||||
|
||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
||||
# !"Redundant, now that improvements have been made!
|
||||
# See section "REGEX"
|
||||
# nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||
# print('nsSNP for gene', gene, ':', nssnp_match)
|
||||
|
||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||
print('wt regex:', wt_regex)
|
||||
# wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||
# print('wt regex:', wt_regex)
|
||||
|
||||
mut_regex = r'[0-9]+(\w{3})$'
|
||||
print('mt regex:', mut_regex)
|
||||
# mut_regex = r'[0-9]+(\w{3})$'
|
||||
# print('mt regex:', mut_regex)
|
||||
|
||||
pos_regex = r'([0-9]+)'
|
||||
print('position regex:', pos_regex)
|
||||
# pos_regex = r'([0-9]+)'
|
||||
# print('position regex:', pos_regex)
|
||||
#%%=======================================================================
|
||||
#==============
|
||||
# directories
|
||||
|
@ -168,6 +183,14 @@ print('==================================='
|
|||
, '\n===================================')
|
||||
|
||||
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
|
||||
|
||||
# add 3 lowercase aa code for wt and mutant
|
||||
get_aa_3lower(df = mcsm_df
|
||||
, wt_colname = 'wild_type'
|
||||
, mut_colname = 'mutant_type'
|
||||
, col_wt = 'wt_aa_3lower'
|
||||
, col_mut = 'mut_aa_3lower')
|
||||
|
||||
#mcsm_df.columns = mcsm_df.columns.str.lower()
|
||||
foldx_df = pd.read_csv(infile_foldx , sep = ',')
|
||||
|
||||
|
@ -201,8 +224,9 @@ print('==================================='
|
|||
, '\ndssp_kd_dfs + rd_df'
|
||||
, '\n===================================')
|
||||
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
|
||||
merging_cols_m3 = detect_common_cols(dssp_df, kd_df)
|
||||
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3, how = o_join)
|
||||
merging_cols_m3 = detect_common_cols(dssp_kd_dfs, rd_df)
|
||||
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3
|
||||
, how = o_join)
|
||||
|
||||
ncols_m3 = len(dssp_kd_rd_dfs.columns)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue