diff --git a/scripts/af_or_calcs.R b/scripts/af_or_calcs.R index 99f7a00..08b614b 100755 --- a/scripts/af_or_calcs.R +++ b/scripts/af_or_calcs.R @@ -20,14 +20,14 @@ source("functions/myaf_or_calcs.R") # command line args #******************** spec = matrix(c( - "drug" ,"d" , 1, "character", - "gene" ,"g" , 1, "character", - "master_data" ,"m", 2, "character", - "gene_data" ,"G", 2, "character", - "outfile" ,"o" , 2, "character", - "idcol" ,"I", 2, "character", - "drmuts_col" ,"D", 2, "character", - "othermuts_col" ,"O", 2, "character" + "drug" ,"d", 1, "character", + "gene" ,"g", 1, "character", + "master_data" ,"m", 2, "character", + "gene_data" ,"G", 2, "character", + "outfile" ,"o", 2, "character", + "idcol" ,"I", 2, "character", + "drmuts_col" ,"D", 2, "character", + "othermuts_col" ,"O", 2, "character" ), byrow = TRUE, ncol = 4) @@ -109,7 +109,6 @@ if (is.null(other_muts_col)){ other_muts_col cat("\ndrug and other mut colnames not specified, sourcing from globals: " , other_muts_col, "\n") - } # Informing the user of the sensible defaults being used: diff --git a/scripts/combining_dfs.py b/scripts/combining_dfs.py index b15f59c..ee6214a 100755 --- a/scripts/combining_dfs.py +++ b/scripts/combining_dfs.py @@ -54,13 +54,26 @@ os.getcwd() # FIXME: local imports #from combining import combine_dfs_with_checks from combining_FIXME import detect_common_cols -from reference_dict import oneletter_aa_dict # CHECK DIR STRUC THERE! -from reference_dict import low_3letter_dict # CHECK DIR STRUC THERE! +from reference_dict import oneletter_aa_dict +from reference_dict import low_3letter_dict + +from aa_code import get_aa_3lower +from aa_code import get_aa_1upper + +# REGEX: as required +# mcsm_regex = r'^([A-Za-z]{1})([0-9]+)([A-Za-z]{1})$' +# mcsm_wt = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[0] +# mcsm_mut = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[2] + +# gwas_regex = r'^([A-Za-z]{3})([0-9]+)([A-Za-z]{3})$' +# gwas_wt = mcsm_df['mutation'].str.extract(gwas_regex)[0] +# gwas_pos = mcsm_df['mutation'].str.extract(gwas_regex)[1] +# gwas_mut = mcsm_df['mutation'].str.extract(gwas_regex)[2] #======================================================================= #%% command line args: case sensitive arg_parser = argparse.ArgumentParser() -arg_parser.add_argument('-d', '--drug', help='drug name', default = '') -arg_parser.add_argument('-g', '--gene', help='gene name', default = '') +arg_parser.add_argument('-d', '--drug', help = 'drug name', default = '') +arg_parser.add_argument('-g', '--gene', help = 'gene name', default = '') arg_parser.add_argument('--datadir', help = 'Data Directory. By default, it assmumes homedir + git/Data') arg_parser.add_argument('-i', '--input_dir', help = 'Input dir containing pdb files. By default, it assmumes homedir + + input') @@ -83,17 +96,19 @@ outdir = args.output_dir gene_match = gene + '_p.' print('mut pattern for gene', gene, ':', gene_match) -nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}' -print('nsSNP for gene', gene, ':', nssnp_match) +# !"Redundant, now that improvements have been made! +# See section "REGEX" +# nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}' +# print('nsSNP for gene', gene, ':', nssnp_match) -wt_regex = gene_match.lower()+'([A-Za-z]{3})' -print('wt regex:', wt_regex) +# wt_regex = gene_match.lower()+'([A-Za-z]{3})' +# print('wt regex:', wt_regex) -mut_regex = r'[0-9]+(\w{3})$' -print('mt regex:', mut_regex) +# mut_regex = r'[0-9]+(\w{3})$' +# print('mt regex:', mut_regex) -pos_regex = r'([0-9]+)' -print('position regex:', pos_regex) +# pos_regex = r'([0-9]+)' +# print('position regex:', pos_regex) #%%======================================================================= #============== # directories @@ -168,6 +183,14 @@ print('===================================' , '\n===================================') mcsm_df = pd.read_csv(infile_mcsm, sep = ',') + +# add 3 lowercase aa code for wt and mutant +get_aa_3lower(df = mcsm_df + , wt_colname = 'wild_type' + , mut_colname = 'mutant_type' + , col_wt = 'wt_aa_3lower' + , col_mut = 'mut_aa_3lower') + #mcsm_df.columns = mcsm_df.columns.str.lower() foldx_df = pd.read_csv(infile_foldx , sep = ',') @@ -201,8 +224,9 @@ print('===================================' , '\ndssp_kd_dfs + rd_df' , '\n===================================') #dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join) -merging_cols_m3 = detect_common_cols(dssp_df, kd_df) -dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3, how = o_join) +merging_cols_m3 = detect_common_cols(dssp_kd_dfs, rd_df) +dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3 + , how = o_join) ncols_m3 = len(dssp_kd_rd_dfs.columns) diff --git a/scripts/my_pdbtools/pdbtools b/scripts/my_pdbtools/pdbtools index 881ff8f..eadbb22 160000 --- a/scripts/my_pdbtools/pdbtools +++ b/scripts/my_pdbtools/pdbtools @@ -1 +1 @@ -Subproject commit 881ff8f27aaf1db4266a84fb03baad3dab552c64 +Subproject commit eadbb223f3883be8730ba39e751a24f5ce0cd45d