added maf column in appendix_tables
This commit is contained in:
parent
6f354ab390
commit
2acea43bcf
3 changed files with 84 additions and 283 deletions
|
@ -23,31 +23,82 @@ geneL_normal = c("pnca")
|
||||||
geneL_na = c("gid", "rpob")
|
geneL_na = c("gid", "rpob")
|
||||||
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
|
|
||||||
# LigDist_colname # from globals used
|
#from plotting_globals()
|
||||||
# ppi2Dist_colname #from globals used
|
LigDist_colname
|
||||||
# naDist_colname #from globals used
|
ppi2Dist_colname
|
||||||
|
naDist_colname
|
||||||
|
|
||||||
|
delta_symbol #delta_symbol = "\u0394"; delta_symbol
|
||||||
|
angstroms_symbol
|
||||||
|
|
||||||
|
#===========
|
||||||
|
# Data used
|
||||||
|
#===========
|
||||||
|
|
||||||
df3 = merged_df3
|
df3 = merged_df3
|
||||||
|
|
||||||
# Add adjusted P-values
|
cols_to_output = c("mutationinformation"
|
||||||
df3$p_adj_fdr = p.adjust(df3$pval_fisher, method = "fdr")
|
, "position"
|
||||||
df3$signif_fdr = df3$p_adj_fdr
|
, affinity_dist_colnames[1]
|
||||||
df3 = dplyr::mutate(df3
|
, "ligand_affinity_change"
|
||||||
|
, "ligand_outcome"
|
||||||
|
, "mmcsm_lig"
|
||||||
|
, "mmcsm_lig_outcome"
|
||||||
|
, affinity_dist_colnames[2]
|
||||||
|
, "mcsm_ppi2_affinity"
|
||||||
|
, "mcsm_ppi2_outcome"
|
||||||
|
, "maf"
|
||||||
|
, "or_mychisq"
|
||||||
|
, "pval_fisher")
|
||||||
|
|
||||||
|
cols_to_output
|
||||||
|
df3_output = df3[, cols_to_output]
|
||||||
|
colnames(df3_output)
|
||||||
|
|
||||||
|
#===============================================
|
||||||
|
# Add COLS and rounding: adjusted P-values + MAF
|
||||||
|
#==============================================
|
||||||
|
#-----------------------------
|
||||||
|
# adjusted P-values
|
||||||
|
#-----------------------------
|
||||||
|
# add cols: p_adj_fdr and signif_fdr
|
||||||
|
df3_output$p_adj_fdr = p.adjust(df3_output$pval_fisher, method = "fdr")
|
||||||
|
df3_output$signif_fdr = df3_output$p_adj_fdr
|
||||||
|
df3_output = dplyr::mutate(df3_output
|
||||||
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
|
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
|
||||||
, signif_fdr <=0.0001 ~ '****'
|
, signif_fdr <=0.0001 ~ '****'
|
||||||
, signif_fdr <=0.001 ~ '***'
|
, signif_fdr <=0.001 ~ '***'
|
||||||
, signif_fdr <=0.01 ~ '**'
|
, signif_fdr <=0.01 ~ '**'
|
||||||
, signif_fdr <0.05 ~ '*'
|
, signif_fdr <0.05 ~ '*'
|
||||||
, TRUE ~ 'ns'))
|
, TRUE ~ 'ns'))
|
||||||
|
# rounding
|
||||||
|
df3_output$or_mychisq = round(df3_output$or_mychisq,2)
|
||||||
|
df3_output$p_adj_fdr = round(df3_output$p_adj_fdr,2)
|
||||||
|
head(df3_output)
|
||||||
|
|
||||||
|
#----------
|
||||||
|
# MAF (%)
|
||||||
|
#----------
|
||||||
|
# add col maf_percent
|
||||||
|
df3_output$maf_percent = df3_output$maf*100
|
||||||
|
|
||||||
|
# rounding
|
||||||
|
df3_output$maf_percent = round(df3_output$maf_percent,2)
|
||||||
|
head(df3_output$af); head(df3_output$maf);head(df3_output$maf_percent)
|
||||||
|
|
||||||
|
#----------
|
||||||
|
# P-value
|
||||||
|
#----------
|
||||||
|
df3_output$pval_fisher = round(df3_output$pval_fisher,2)
|
||||||
|
|
||||||
|
|
||||||
|
class(df3_output)
|
||||||
|
head(df3_output)
|
||||||
|
|
||||||
####################################
|
####################################
|
||||||
# Appendix: ligand affinity
|
# Appendix: ligand affinity
|
||||||
####################################
|
####################################
|
||||||
df_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
|
df_lig = df3_output[df3_output[[LigDist_colname]]<DistCutOff,]
|
||||||
|
|
||||||
|
|
||||||
cols_to_output_lig = c("mutationinformation"
|
cols_to_output_lig = c("mutationinformation"
|
||||||
, "position"
|
, "position"
|
||||||
|
@ -56,20 +107,20 @@ cols_to_output_lig = c("mutationinformation"
|
||||||
, "ligand_outcome"
|
, "ligand_outcome"
|
||||||
, "mmcsm_lig"
|
, "mmcsm_lig"
|
||||||
, "mmcsm_lig_outcome"
|
, "mmcsm_lig_outcome"
|
||||||
|
, "maf_percent"
|
||||||
, "or_mychisq"
|
, "or_mychisq"
|
||||||
|
, "pval_fisher"
|
||||||
, "p_adj_fdr"
|
, "p_adj_fdr"
|
||||||
, "signif_fdr")
|
, "signif_fdr")
|
||||||
|
# select cols
|
||||||
# counting
|
|
||||||
|
|
||||||
Out_df_lig = df_lig[, cols_to_output_lig]
|
Out_df_lig = df_lig[, cols_to_output_lig]
|
||||||
Out_df_ligS = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
|
|
||||||
|
|
||||||
delta_symbol = "\u0394"; delta_symbol
|
# sort df by OR and then MAF: highest OR and highest MAF
|
||||||
|
#Out_df_ligS1 = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
|
||||||
|
Out_df_ligS = Out_df_lig[order(-Out_df_lig$or_mychisq, Out_df_lig$maf_percent), ]
|
||||||
|
|
||||||
Out_df_ligS$or_mychisq = round(Out_df_ligS$or_mychisq,2)
|
#head(Out_df_ligS1); tail(Out_df_ligS1)
|
||||||
Out_df_ligS$p_adj_fdr = round(Out_df_ligS$p_adj_fdr,2)
|
head(Out_df_ligS); tail(Out_df_ligS)
|
||||||
Out_df_ligS
|
|
||||||
|
|
||||||
colsNames_to_output_lig = c("Mutation"
|
colsNames_to_output_lig = c("Mutation"
|
||||||
, "position"
|
, "position"
|
||||||
|
@ -78,12 +129,15 @@ colsNames_to_output_lig = c("Mutation"
|
||||||
, "mCSM ligand_outcome"
|
, "mCSM ligand_outcome"
|
||||||
, "mmCSM-ligand affinity"
|
, "mmCSM-ligand affinity"
|
||||||
, "mmCSM ligand_outcome"
|
, "mmCSM ligand_outcome"
|
||||||
|
, paste0("MAF ","(%)")
|
||||||
, "Odds Ratio"
|
, "Odds Ratio"
|
||||||
, "Adj P-value"
|
, "P-value"
|
||||||
|
, "Adjusted P-value"
|
||||||
, "P-value significance")
|
, "P-value significance")
|
||||||
|
|
||||||
colnames(Out_df_ligS) = colsNames_to_output_lig
|
colnames(Out_df_ligS) = colsNames_to_output_lig
|
||||||
Out_df_ligS
|
head(Out_df_ligS)
|
||||||
|
|
||||||
#--------------------
|
#--------------------
|
||||||
# write output file: KS test within grpup
|
# write output file: KS test within grpup
|
||||||
#----------------------
|
#----------------------
|
||||||
|
@ -99,27 +153,28 @@ write.csv(Out_df_ligS, Out_ligT, row.names = FALSE)
|
||||||
# Appendix: PPi2 affinity
|
# Appendix: PPi2 affinity
|
||||||
####################################
|
####################################
|
||||||
|
|
||||||
df_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
|
# Filtered data
|
||||||
|
df_ppi2 = df3_output[df3_output[[ppi2Dist_colname]]<DistCutOff,]
|
||||||
|
|
||||||
|
# select cols
|
||||||
cols_to_output_ppi2 = c("mutationinformation"
|
cols_to_output_ppi2 = c("mutationinformation"
|
||||||
, "position"
|
, "position"
|
||||||
, ppi2Dist_colname
|
, ppi2Dist_colname
|
||||||
, "mcsm_ppi2_affinity"
|
, "mcsm_ppi2_affinity"
|
||||||
, "mcsm_ppi2_outcome"
|
, "mcsm_ppi2_outcome"
|
||||||
|
, "maf_percent"
|
||||||
, "or_mychisq"
|
, "or_mychisq"
|
||||||
|
, "pval_fisher"
|
||||||
, "p_adj_fdr"
|
, "p_adj_fdr"
|
||||||
, "signif_fdr")
|
, "signif_fdr")
|
||||||
|
|
||||||
# extract output cols
|
# extract output cols
|
||||||
Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2]
|
Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2]
|
||||||
Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
|
|
||||||
|
|
||||||
delta_symbol = "\u0394"; delta_symbol
|
# sort df by OR and then MAF: Highest OR and Highest MAF
|
||||||
|
#Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
|
||||||
|
Out_df_ppi2S = Out_df_ppi2[order(-Out_df_ppi2$or_mychisq, Out_df_ppi2$maf_percent), ]
|
||||||
|
|
||||||
Out_df_ppi2S$or_mychisq = round(Out_df_ppi2S$or_mychisq,2)
|
|
||||||
Out_df_ppi2S$p_adj_fdr = round(Out_df_ppi2S$p_adj_fdr,2)
|
|
||||||
Out_df_ppi2S
|
|
||||||
|
|
||||||
colsNames_to_output_ppi2 = c("Mutation"
|
colsNames_to_output_ppi2 = c("Mutation"
|
||||||
, "position"
|
, "position"
|
||||||
|
@ -127,7 +182,8 @@ colsNames_to_output_ppi2 = c("Mutation"
|
||||||
, paste0("mCSM-PPI2 (", delta_symbol, ")")
|
, paste0("mCSM-PPI2 (", delta_symbol, ")")
|
||||||
, "mCSM-PPI2 outcome"
|
, "mCSM-PPI2 outcome"
|
||||||
, "Odds Ratio"
|
, "Odds Ratio"
|
||||||
, "Adj P-value"
|
, "P-value"
|
||||||
|
, "Adjusted P-value"
|
||||||
, "P-value significance")
|
, "P-value significance")
|
||||||
|
|
||||||
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2
|
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2
|
||||||
|
|
|
@ -1,156 +0,0 @@
|
||||||
"colnames(merged_df3)"
|
|
||||||
"mutationinformation"
|
|
||||||
"id"
|
|
||||||
"sample"
|
|
||||||
"lineage"
|
|
||||||
"sublineage"
|
|
||||||
"country_code"
|
|
||||||
"drtype"
|
|
||||||
"ethambutol"
|
|
||||||
"mutation"
|
|
||||||
"drug_name"
|
|
||||||
"mutation_info"
|
|
||||||
"mutation_info_orig"
|
|
||||||
"mutation_info_v1"
|
|
||||||
"wild_type"
|
|
||||||
"mutant_type"
|
|
||||||
"position"
|
|
||||||
"Mut"
|
|
||||||
"Mut_copy"
|
|
||||||
"index_orig"
|
|
||||||
"index_orig_copy"
|
|
||||||
"snp_frequency"
|
|
||||||
"pos_count"
|
|
||||||
"wt_prop_water"
|
|
||||||
"mut_prop_water"
|
|
||||||
"wt_prop_polarity"
|
|
||||||
"mut_prop_polarity"
|
|
||||||
"wt_calcprop"
|
|
||||||
"mut_calcprop"
|
|
||||||
"total_id_ucount"
|
|
||||||
"maf"
|
|
||||||
"drtype_numeric"
|
|
||||||
"drtype_all_vals"
|
|
||||||
"drtype_all_names"
|
|
||||||
"drtype_multimode"
|
|
||||||
"drtype_mode"
|
|
||||||
"drtype_max"
|
|
||||||
"mutation_info_labels"
|
|
||||||
"dm_om_numeric"
|
|
||||||
"dm_om_numeric_orig"
|
|
||||||
"dst"
|
|
||||||
"dst_multimode"
|
|
||||||
"dst_mode"
|
|
||||||
"mutation_info_labels_v1"
|
|
||||||
"mutation_info_labels_orig"
|
|
||||||
"lineage_list_all"
|
|
||||||
"lineage_count_all"
|
|
||||||
"lineage_count_unique"
|
|
||||||
"lineage_list_unique"
|
|
||||||
"lineage_multimode"
|
|
||||||
"chain"
|
|
||||||
"ligand_id"
|
|
||||||
"ligand_distance"
|
|
||||||
"duet_stability_change"
|
|
||||||
"duet_outcome"
|
|
||||||
"ligand_affinity_change"
|
|
||||||
"ligand_outcome"
|
|
||||||
"duet_scaled"
|
|
||||||
"affinity_scaled"
|
|
||||||
"wild_pos"
|
|
||||||
"wild_chain_pos"
|
|
||||||
"ddg_foldx"
|
|
||||||
"contacts"
|
|
||||||
"electro_rr"
|
|
||||||
"electro_mm"
|
|
||||||
"electro_sm"
|
|
||||||
"electro_ss"
|
|
||||||
"disulfide_rr"
|
|
||||||
"disulfide_mm"
|
|
||||||
"disulfide_sm"
|
|
||||||
"disulfide_ss"
|
|
||||||
"hbonds_rr"
|
|
||||||
"hbonds_mm"
|
|
||||||
"hbonds_sm"
|
|
||||||
"hbonds_ss"
|
|
||||||
"partcov_rr"
|
|
||||||
"partcov_mm"
|
|
||||||
"partcov_sm"
|
|
||||||
"partcov_ss"
|
|
||||||
"vdwclashes_rr"
|
|
||||||
"vdwclashes_mm"
|
|
||||||
"vdwclashes_sm"
|
|
||||||
"vdwclashes_ss"
|
|
||||||
"volumetric_rr"
|
|
||||||
"volumetric_mm"
|
|
||||||
"volumetric_sm"
|
|
||||||
"volumetric_ss"
|
|
||||||
"foldx_scaled"
|
|
||||||
"foldx_outcome"
|
|
||||||
"deepddg"
|
|
||||||
"deepddg_outcome"
|
|
||||||
"deepddg_scaled"
|
|
||||||
"asa"
|
|
||||||
"rsa"
|
|
||||||
"ss"
|
|
||||||
"ss_class"
|
|
||||||
"kd_values"
|
|
||||||
"rd_values"
|
|
||||||
"wt_3upper"
|
|
||||||
"consurf_score"
|
|
||||||
"consurf_scaled"
|
|
||||||
"consurf_colour"
|
|
||||||
"consurf_colour_rev"
|
|
||||||
"consurf_ci_upper"
|
|
||||||
"consurf_ci_lower"
|
|
||||||
"consurf_ci_colour"
|
|
||||||
"consurf_msa_data"
|
|
||||||
"consurf_aa_variety"
|
|
||||||
"snap2_score"
|
|
||||||
"snap2_scaled"
|
|
||||||
"snap2_accuracy_pc"
|
|
||||||
"snap2_outcome"
|
|
||||||
"af"
|
|
||||||
"beta_logistic"
|
|
||||||
"or_logistic"
|
|
||||||
"pval_logistic"
|
|
||||||
"se_logistic"
|
|
||||||
"zval_logistic"
|
|
||||||
"ci_low_logistic"
|
|
||||||
"ci_hi_logistic"
|
|
||||||
"or_mychisq"
|
|
||||||
"log10_or_mychisq"
|
|
||||||
"or_fisher"
|
|
||||||
"pval_fisher"
|
|
||||||
"neglog_pval_fisher"
|
|
||||||
"ci_low_fisher"
|
|
||||||
"ci_hi_fisher"
|
|
||||||
"est_chisq"
|
|
||||||
"pval_chisq"
|
|
||||||
"ddg_dynamut2"
|
|
||||||
"ddg_dynamut2_scaled"
|
|
||||||
"ddg_dynamut2_outcome"
|
|
||||||
"mcsm_ppi2_affinity"
|
|
||||||
"mcsm_ppi2_scaled"
|
|
||||||
"mcsm_ppi2_outcome"
|
|
||||||
"interface_dist"
|
|
||||||
"mut_3upper"
|
|
||||||
"seq_offset4pdb"
|
|
||||||
"provean_score"
|
|
||||||
"provean_outcome"
|
|
||||||
"provean_scaled"
|
|
||||||
"mmcsm_lig"
|
|
||||||
"mmcsm_lig_scaled"
|
|
||||||
"mmcsm_lig_outcome"
|
|
||||||
"gene_name"
|
|
||||||
"pdb_file"
|
|
||||||
"lineage_labels"
|
|
||||||
"consurf_outcome"
|
|
||||||
"sensitivity"
|
|
||||||
"foldx_scaled_signC"
|
|
||||||
"avg_stability"
|
|
||||||
"avg_stability_outcome"
|
|
||||||
"avg_stability_scaled"
|
|
||||||
"avg_lig_affinity"
|
|
||||||
"avg_lig_affinity_outcome"
|
|
||||||
"avg_lig_affinity_scaled"
|
|
|
|
@ -1,99 +0,0 @@
|
||||||
"mutationinformation"
|
|
||||||
"id"
|
|
||||||
"lineage"
|
|
||||||
"drtype"
|
|
||||||
drug
|
|
||||||
"mutation"
|
|
||||||
"drug_name"
|
|
||||||
"mutation_info"
|
|
||||||
#"mutation_info_orig"
|
|
||||||
#"mutation_info_v1"
|
|
||||||
#"wild_type"
|
|
||||||
#"mutant_type"
|
|
||||||
#"position"
|
|
||||||
#"Mut"
|
|
||||||
"snp_frequency"
|
|
||||||
"pos_count"
|
|
||||||
"total_id_ucount"
|
|
||||||
"maf"
|
|
||||||
"drtype_mode"
|
|
||||||
"drtype_max"
|
|
||||||
"mutation_info_labels"
|
|
||||||
"dst"
|
|
||||||
"dst_mode"
|
|
||||||
#"mutation_info_labels_v1"
|
|
||||||
#"mutation_info_labels_orig"
|
|
||||||
"lineage_count_all"
|
|
||||||
"lineage_count_unique"
|
|
||||||
"chain"
|
|
||||||
"ligand_id"
|
|
||||||
LigDist_colname
|
|
||||||
"duet_stability_change"
|
|
||||||
"duet_outcome"
|
|
||||||
"ligand_affinity_change"
|
|
||||||
"ligand_outcome"
|
|
||||||
"duet_scaled"
|
|
||||||
"affinity_scaled"
|
|
||||||
"wild_pos"
|
|
||||||
"wild_chain_pos"
|
|
||||||
"ddg_foldx"
|
|
||||||
"foldx_scaled"
|
|
||||||
"foldx_outcome"
|
|
||||||
"deepddg"
|
|
||||||
"deepddg_outcome"
|
|
||||||
"deepddg_scaled"
|
|
||||||
"asa"
|
|
||||||
"rsa"
|
|
||||||
"ss"
|
|
||||||
"ss_class"
|
|
||||||
"kd_values"
|
|
||||||
"rd_values"
|
|
||||||
"wt_3upper"
|
|
||||||
"consurf_score"
|
|
||||||
"consurf_scaled"
|
|
||||||
"consurf_colour"
|
|
||||||
"consurf_colour_rev"
|
|
||||||
"consurf_ci_upper"
|
|
||||||
"consurf_ci_lower"
|
|
||||||
"consurf_ci_colour"
|
|
||||||
"consurf_msa_data"
|
|
||||||
"consurf_aa_variety"
|
|
||||||
"snap2_score"
|
|
||||||
"snap2_scaled"
|
|
||||||
"snap2_accuracy_pc"
|
|
||||||
"snap2_outcome"
|
|
||||||
"af"
|
|
||||||
"or_logistic"
|
|
||||||
"pval_logistic"
|
|
||||||
"or_mychisq"
|
|
||||||
"log10_or_mychisq"
|
|
||||||
"or_fisher"
|
|
||||||
"pval_fisher"
|
|
||||||
"neglog_pval_fisher"
|
|
||||||
"ddg_dynamut2"
|
|
||||||
"ddg_dynamut2_scaled"
|
|
||||||
"ddg_dynamut2_outcome"
|
|
||||||
"mcsm_ppi2_affinity"
|
|
||||||
"mcsm_ppi2_scaled"
|
|
||||||
"mcsm_ppi2_outcome"
|
|
||||||
ppi2Dist_colname
|
|
||||||
"mut_3upper"
|
|
||||||
"seq_offset4pdb"
|
|
||||||
"provean_score"
|
|
||||||
"provean_outcome"
|
|
||||||
"provean_scaled"
|
|
||||||
"mmcsm_lig"
|
|
||||||
"mmcsm_lig_scaled"
|
|
||||||
"mmcsm_lig_outcome"
|
|
||||||
"gene_name"
|
|
||||||
"pdb_file"
|
|
||||||
"lineage_labels"
|
|
||||||
"consurf_outcome"
|
|
||||||
"sensitivity"
|
|
||||||
"foldx_scaled_signC"
|
|
||||||
"avg_stability"
|
|
||||||
"avg_stability_outcome"
|
|
||||||
"avg_stability_scaled"
|
|
||||||
"avg_lig_affinity"
|
|
||||||
"avg_lig_affinity_outcome"
|
|
||||||
"avg_lig_affinity_scaled"
|
|
Can't render this file because it contains an unexpected character in line 9 and column 2.
|
Loading…
Add table
Add a link
Reference in a new issue