saving work before adding files

This commit is contained in:
Tanushree Tunstall 2021-06-18 17:47:09 +01:00
parent 0e0f7c89df
commit 926d181120
3 changed files with 47 additions and 24 deletions

View file

@ -20,14 +20,14 @@ source("functions/myaf_or_calcs.R")
# command line args
#********************
spec = matrix(c(
"drug" ,"d" , 1, "character",
"gene" ,"g" , 1, "character",
"master_data" ,"m", 2, "character",
"gene_data" ,"G", 2, "character",
"outfile" ,"o" , 2, "character",
"idcol" ,"I", 2, "character",
"drmuts_col" ,"D", 2, "character",
"othermuts_col" ,"O", 2, "character"
"drug" ,"d", 1, "character",
"gene" ,"g", 1, "character",
"master_data" ,"m", 2, "character",
"gene_data" ,"G", 2, "character",
"outfile" ,"o", 2, "character",
"idcol" ,"I", 2, "character",
"drmuts_col" ,"D", 2, "character",
"othermuts_col" ,"O", 2, "character"
), byrow = TRUE, ncol = 4)
@ -109,7 +109,6 @@ if (is.null(other_muts_col)){
other_muts_col
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
, other_muts_col, "\n")
}
# Informing the user of the sensible defaults being used:

View file

@ -54,13 +54,26 @@ os.getcwd()
# FIXME: local imports
#from combining import combine_dfs_with_checks
from combining_FIXME import detect_common_cols
from reference_dict import oneletter_aa_dict # CHECK DIR STRUC THERE!
from reference_dict import low_3letter_dict # CHECK DIR STRUC THERE!
from reference_dict import oneletter_aa_dict
from reference_dict import low_3letter_dict
from aa_code import get_aa_3lower
from aa_code import get_aa_1upper
# REGEX: as required
# mcsm_regex = r'^([A-Za-z]{1})([0-9]+)([A-Za-z]{1})$'
# mcsm_wt = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[0]
# mcsm_mut = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[2]
# gwas_regex = r'^([A-Za-z]{3})([0-9]+)([A-Za-z]{3})$'
# gwas_wt = mcsm_df['mutation'].str.extract(gwas_regex)[0]
# gwas_pos = mcsm_df['mutation'].str.extract(gwas_regex)[1]
# gwas_mut = mcsm_df['mutation'].str.extract(gwas_regex)[2]
#=======================================================================
#%% command line args: case sensitive
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-d', '--drug', help='drug name', default = '')
arg_parser.add_argument('-g', '--gene', help='gene name', default = '')
arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '')
arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '')
arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data')
arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + <drug> + input')
@ -83,17 +96,19 @@ outdir = args.output_dir
gene_match = gene + '_p.'
print('mut pattern for gene', gene, ':', gene_match)
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
print('nsSNP for gene', gene, ':', nssnp_match)
# !"Redundant, now that improvements have been made!
# See section "REGEX"
# nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
# print('nsSNP for gene', gene, ':', nssnp_match)
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
print('wt regex:', wt_regex)
# wt_regex = gene_match.lower()+'([A-Za-z]{3})'
# print('wt regex:', wt_regex)
mut_regex = r'[0-9]+(\w{3})$'
print('mt regex:', mut_regex)
# mut_regex = r'[0-9]+(\w{3})$'
# print('mt regex:', mut_regex)
pos_regex = r'([0-9]+)'
print('position regex:', pos_regex)
# pos_regex = r'([0-9]+)'
# print('position regex:', pos_regex)
#%%=======================================================================
#==============
# directories
@ -168,6 +183,14 @@ print('==================================='
, '\n===================================')
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
# add 3 lowercase aa code for wt and mutant
get_aa_3lower(df = mcsm_df
, wt_colname = 'wild_type'
, mut_colname = 'mutant_type'
, col_wt = 'wt_aa_3lower'
, col_mut = 'mut_aa_3lower')
#mcsm_df.columns = mcsm_df.columns.str.lower()
foldx_df = pd.read_csv(infile_foldx , sep = ',')
@ -201,8 +224,9 @@ print('==================================='
, '\ndssp_kd_dfs + rd_df'
, '\n===================================')
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
merging_cols_m3 = detect_common_cols(dssp_df, kd_df)
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3, how = o_join)
merging_cols_m3 = detect_common_cols(dssp_kd_dfs, rd_df)
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3
, how = o_join)
ncols_m3 = len(dssp_kd_rd_dfs.columns)

@ -1 +1 @@
Subproject commit 881ff8f27aaf1db4266a84fb03baad3dab552c64
Subproject commit eadbb223f3883be8730ba39e751a24f5ce0cd45d