saving work before adding files
This commit is contained in:
parent
86ed1805fc
commit
f6a2e029cb
3 changed files with 47 additions and 24 deletions
|
@ -109,7 +109,6 @@ if (is.null(other_muts_col)){
|
||||||
other_muts_col
|
other_muts_col
|
||||||
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
cat("\ndrug and other mut colnames not specified, sourcing from globals: "
|
||||||
, other_muts_col, "\n")
|
, other_muts_col, "\n")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Informing the user of the sensible defaults being used:
|
# Informing the user of the sensible defaults being used:
|
||||||
|
|
|
@ -54,8 +54,21 @@ os.getcwd()
|
||||||
# FIXME: local imports
|
# FIXME: local imports
|
||||||
#from combining import combine_dfs_with_checks
|
#from combining import combine_dfs_with_checks
|
||||||
from combining_FIXME import detect_common_cols
|
from combining_FIXME import detect_common_cols
|
||||||
from reference_dict import oneletter_aa_dict # CHECK DIR STRUC THERE!
|
from reference_dict import oneletter_aa_dict
|
||||||
from reference_dict import low_3letter_dict # CHECK DIR STRUC THERE!
|
from reference_dict import low_3letter_dict
|
||||||
|
|
||||||
|
from aa_code import get_aa_3lower
|
||||||
|
from aa_code import get_aa_1upper
|
||||||
|
|
||||||
|
# REGEX: as required
|
||||||
|
# mcsm_regex = r'^([A-Za-z]{1})([0-9]+)([A-Za-z]{1})$'
|
||||||
|
# mcsm_wt = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[0]
|
||||||
|
# mcsm_mut = mcsm_df['mutationinformation'].str.extract(mcsm_regex)[2]
|
||||||
|
|
||||||
|
# gwas_regex = r'^([A-Za-z]{3})([0-9]+)([A-Za-z]{3})$'
|
||||||
|
# gwas_wt = mcsm_df['mutation'].str.extract(gwas_regex)[0]
|
||||||
|
# gwas_pos = mcsm_df['mutation'].str.extract(gwas_regex)[1]
|
||||||
|
# gwas_mut = mcsm_df['mutation'].str.extract(gwas_regex)[2]
|
||||||
#=======================================================================
|
#=======================================================================
|
||||||
#%% command line args: case sensitive
|
#%% command line args: case sensitive
|
||||||
arg_parser = argparse.ArgumentParser()
|
arg_parser = argparse.ArgumentParser()
|
||||||
|
@ -83,17 +96,19 @@ outdir = args.output_dir
|
||||||
gene_match = gene + '_p.'
|
gene_match = gene + '_p.'
|
||||||
print('mut pattern for gene', gene, ':', gene_match)
|
print('mut pattern for gene', gene, ':', gene_match)
|
||||||
|
|
||||||
nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
# !"Redundant, now that improvements have been made!
|
||||||
print('nsSNP for gene', gene, ':', nssnp_match)
|
# See section "REGEX"
|
||||||
|
# nssnp_match = gene_match +'[A-Za-z]{3}[0-9]+[A-Za-z]{3}'
|
||||||
|
# print('nsSNP for gene', gene, ':', nssnp_match)
|
||||||
|
|
||||||
wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
# wt_regex = gene_match.lower()+'([A-Za-z]{3})'
|
||||||
print('wt regex:', wt_regex)
|
# print('wt regex:', wt_regex)
|
||||||
|
|
||||||
mut_regex = r'[0-9]+(\w{3})$'
|
# mut_regex = r'[0-9]+(\w{3})$'
|
||||||
print('mt regex:', mut_regex)
|
# print('mt regex:', mut_regex)
|
||||||
|
|
||||||
pos_regex = r'([0-9]+)'
|
# pos_regex = r'([0-9]+)'
|
||||||
print('position regex:', pos_regex)
|
# print('position regex:', pos_regex)
|
||||||
#%%=======================================================================
|
#%%=======================================================================
|
||||||
#==============
|
#==============
|
||||||
# directories
|
# directories
|
||||||
|
@ -168,6 +183,14 @@ print('==================================='
|
||||||
, '\n===================================')
|
, '\n===================================')
|
||||||
|
|
||||||
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
|
mcsm_df = pd.read_csv(infile_mcsm, sep = ',')
|
||||||
|
|
||||||
|
# add 3 lowercase aa code for wt and mutant
|
||||||
|
get_aa_3lower(df = mcsm_df
|
||||||
|
, wt_colname = 'wild_type'
|
||||||
|
, mut_colname = 'mutant_type'
|
||||||
|
, col_wt = 'wt_aa_3lower'
|
||||||
|
, col_mut = 'mut_aa_3lower')
|
||||||
|
|
||||||
#mcsm_df.columns = mcsm_df.columns.str.lower()
|
#mcsm_df.columns = mcsm_df.columns.str.lower()
|
||||||
foldx_df = pd.read_csv(infile_foldx , sep = ',')
|
foldx_df = pd.read_csv(infile_foldx , sep = ',')
|
||||||
|
|
||||||
|
@ -201,8 +224,9 @@ print('==================================='
|
||||||
, '\ndssp_kd_dfs + rd_df'
|
, '\ndssp_kd_dfs + rd_df'
|
||||||
, '\n===================================')
|
, '\n===================================')
|
||||||
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
|
#dssp_kd_rd_dfs = combine_dfs_with_checks(dssp_kd_dfs, rd_df, my_join = o_join)
|
||||||
merging_cols_m3 = detect_common_cols(dssp_df, kd_df)
|
merging_cols_m3 = detect_common_cols(dssp_kd_dfs, rd_df)
|
||||||
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3, how = o_join)
|
dssp_kd_rd_dfs = pd.merge(dssp_kd_dfs, rd_df, on = merging_cols_m3
|
||||||
|
, how = o_join)
|
||||||
|
|
||||||
ncols_m3 = len(dssp_kd_rd_dfs.columns)
|
ncols_m3 = len(dssp_kd_rd_dfs.columns)
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 881ff8f27aaf1db4266a84fb03baad3dab552c64
|
Subproject commit eadbb223f3883be8730ba39e751a24f5ce0cd45d
|
Loading…
Add table
Add a link
Reference in a new issue