saving work before adding files
This commit is contained in:
parent
0e0f7c89df
commit
926d181120
3 changed files with 47 additions and 24 deletions
|
@ -20,14 +20,14 @@ source("functions/myaf_or_calcs.R")
|
||||||
# command line args
|
# command line args
|
||||||
#********************
|
#********************
|
||||||
spec = matrix(c(
|
spec = matrix(c(
|
||||||
"drug" ,"d" , 1, "character",
|
"drug" ,"d", 1, "character",
|
||||||
"gene" ,"g" , 1, "character",
|
"gene" ,"g", 1, "character",
|
||||||
"master_data" ,"m", 2, "character",
|
"master_data" ,"m", 2, "character",
|
||||||
"gene_data" ,"G", 2, "character",
|
"gene_data" ,"G", 2, "character",
|
||||||
"outfile" ,"o" , 2, "character",
|
"outfile" ,"o", 2, "character",
|
||||||
"idcol" ,"I", 2, "character",
|
"idcol" ,"I", 2, "character",
|
||||||
"drmuts_col" ,"D", 2, "character",
|
"drmuts_col" ,"D", 2, "character",
|
||||||
"othermuts_col" ,"O", 2, "character"
|
"othermuts_col" ,"O", 2, "character"
|
||||||
|
|
||||||
), byrow = TRUE, ncol = 4)
|
), byrow = TRUE, ncol = 4)
|
||||||
|
|
||||||
|
@ -109,7 +109,6 @@ if (is.null(other_muts_col)){
|
||||||
other_muts_col
|
other_muts_col
|
||||||
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
||||||
, other_muts_col, "\n")
|
, other_muts_col, "\n")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Informing the user of the sensible defaults being used:
|
# Informing the user of the sensible defaults being used:
|
||||||
|
|
|
@ -54,13 +54,26 @@ os.getcwd()
|
||||||
# FIXME: local imports
|
# FIXME: local imports
|
||||||
#from combining import combine_dfs_with_checks
|
#from combining import combine_dfs_with_checks
|
||||||
from combining_FIXME import detect_common_cols
|
from combining_FIXME import detect_common_cols
|
||||||
from reference_dict import oneletter_aa_dict # CHECK DIR STRUC THERE!
|
from reference_dict import oneletter_aa_dict
|
||||||
from reference_dict import low_3letter_dict # CHECK DIR STRUC THERE!
|
from reference_dict import low_3letter_dict
|
||||||
|
|
||||||
|
from aa_code import get_aa_3lower
|
||||||
|
from aa_code import get_aa_1upper
|
||||||
|
|
||||||
|
# REGEX: as required
|
||||||
|
# mcsm_regex = r'^([A-Za-z]{1})([0-9]+)([A-Za-z]{1})$'
|
||||||
|
# mcsm_wt = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[0]
|
||||||
|
# mcsm_mut = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[2]
|
||||||
|
|
||||||
|
# gwas_regex = r'^([A-Za-z]{3})([0-9]+)([A-Za-z]{3})$'
|
||||||
|
# gwas_wt = mcsm_df['mutation'].str.extract(gwas_regex)[0]
|
||||||
|
# gwas_pos = mcsm_df['mutation'].str.extract(gwas_regex)[1]
|
||||||
|
# gwas_mut = mcsm_df['mutation'].str.extract(gwas_regex)[2]
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% command line args: case sensitive
|
#%% command line args: case sensitive
|
||||||
arg_parser = argparse.ArgumentParser()
|
arg_parser = argparse.ArgumentParser()
|
||||||
arg_parser.add_argument('-d', '--drug', help='drug name', default = '')
|
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
|
||||||
arg_parser.add_argument('-g', '--gene', help='gene name', default = '')
|
arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '')
|
||||||
|
|
||||||
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
|
||||||
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
|
||||||
|
@ -83,17 +96,19 @@ outdir = args.output_dir
|
||||||
gene_match = gene + '_p.'
|
gene_match = gene + '_p.'
|
||||||
print('mut pattern for gene', gene, ':', gene_match)
|
print('mut pattern for gene', gene, ':', gene_match)
|
||||||
|
|
||||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
# !"Redundant, now that improvements have been made!
|
||||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
# See section "REGEX"
|
||||||
|
# nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||||
|
# print('nsSNP for gene', gene, ':', nssnp_match)
|
||||||
|
|
||||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
# wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||||
print('wt regex:', wt_regex)
|
# print('wt regex:', wt_regex)
|
||||||
|
|
||||||
mut_regex = r'[0-9]+(\w{3})$'
|
# mut_regex = r'[0-9]+(\w{3})$'
|
||||||
print('mt regex:', mut_regex)
|
# print('mt regex:', mut_regex)
|
||||||
|
|
||||||
pos_regex = r'([0-9]+)'
|
# pos_regex = r'([0-9]+)'
|
||||||
print('position regex:', pos_regex)
|
# print('position regex:', pos_regex)
|
||||||
#%%=======================================================================
|
#%%=======================================================================
|
||||||
#==============
|
#==============
|
||||||
# directories
|
# directories
|
||||||
|
@ -168,6 +183,14 @@ print('==================================='
|
||||||
, '\n===================================')
|
, '\n===================================')
|
||||||
|
|
||||||
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
|
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
|
||||||
|
|
||||||
|
# add 3 lowercase aa code for wt and mutant
|
||||||
|
get_aa_3lower(df = mcsm_df
|
||||||
|
, wt_colname = 'wild_type'
|
||||||
|
, mut_colname = 'mutant_type'
|
||||||
|
, col_wt = 'wt_aa_3lower'
|
||||||
|
, col_mut = 'mut_aa_3lower')
|
||||||
|
|
||||||
#mcsm_df.columns = mcsm_df.columns.str.lower()
|
#mcsm_df.columns = mcsm_df.columns.str.lower()
|
||||||
foldx_df = pd.read_csv(infile_foldx , sep = ',')
|
foldx_df = pd.read_csv(infile_foldx , sep = ',')
|
||||||
|
|
||||||
|
@ -201,8 +224,9 @@ print('==================================='
|
||||||
, '\ndssp_kd_dfs + rd_df'
|
, '\ndssp_kd_dfs + rd_df'
|
||||||
, '\n===================================')
|
, '\n===================================')
|
||||||
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
|
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
|
||||||
merging_cols_m3 = detect_common_cols(dssp_df, kd_df)
|
merging_cols_m3 = detect_common_cols(dssp_kd_dfs, rd_df)
|
||||||
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3, how = o_join)
|
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3
|
||||||
|
, how = o_join)
|
||||||
|
|
||||||
ncols_m3 = len(dssp_kd_rd_dfs.columns)
|
ncols_m3 = len(dssp_kd_rd_dfs.columns)
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 881ff8f27aaf1db4266a84fb03baad3dab552c64
|
Subproject commit eadbb223f3883be8730ba39e751a24f5ce0cd45d
|
Loading…
Add table
Add a link
Reference in a new issue