added maf column in appendix_tables

This commit is contained in:
Tanushree Tunstall 2022-08-14 12:18:12 +01:00
parent 6f354ab390
commit 2acea43bcf
3 changed files with 84 additions and 283 deletions

View file

@ -23,31 +23,82 @@ geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# LigDist_colname # from globals used
# ppi2Dist_colname #from globals used
# naDist_colname #from globals used
#from plotting_globals()
LigDist_colname
ppi2Dist_colname
naDist_colname
delta_symbol #delta_symbol = "\u0394"; delta_symbol
angstroms_symbol
#===========
# Data used
#===========
df3 = merged_df3
# Add adjusted P-values
df3$p_adj_fdr = p.adjust(df3$pval_fisher, method = "fdr")
df3$signif_fdr = df3$p_adj_fdr
df3 = dplyr::mutate(df3
cols_to_output = c("mutationinformation"
, "position"
, affinity_dist_colnames[1]
, "ligand_affinity_change"
, "ligand_outcome"
, "mmcsm_lig"
, "mmcsm_lig_outcome"
, affinity_dist_colnames[2]
, "mcsm_ppi2_affinity"
, "mcsm_ppi2_outcome"
, "maf"
, "or_mychisq"
, "pval_fisher")
cols_to_output
df3_output = df3[, cols_to_output]
colnames(df3_output)
#===============================================
# Add COLS and rounding: adjusted P-values + MAF
#==============================================
#-----------------------------
# adjusted P-values
#-----------------------------
# add cols: p_adj_fdr and signif_fdr
df3_output$p_adj_fdr = p.adjust(df3_output$pval_fisher, method = "fdr")
df3_output$signif_fdr = df3_output$p_adj_fdr
df3_output = dplyr::mutate(df3_output
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
, signif_fdr <=0.0001 ~ '****'
, signif_fdr <=0.001 ~ '***'
, signif_fdr <=0.01 ~ '**'
, signif_fdr <0.05 ~ '*'
, TRUE ~ 'ns'))
# rounding
df3_output$or_mychisq = round(df3_output$or_mychisq,2)
df3_output$p_adj_fdr = round(df3_output$p_adj_fdr,2)
head(df3_output)
#----------
# MAF (%)
#----------
# add col maf_percent
df3_output$maf_percent = df3_output$maf*100
# rounding
df3_output$maf_percent = round(df3_output$maf_percent,2)
head(df3_output$af); head(df3_output$maf);head(df3_output$maf_percent)
#----------
# P-value
#----------
df3_output$pval_fisher = round(df3_output$pval_fisher,2)
class(df3_output)
head(df3_output)
####################################
# Appendix: ligand affinity
####################################
df_lig = df3[df3[[LigDist_colname]]<DistCutOff,]
df_lig = df3_output[df3_output[[LigDist_colname]]<DistCutOff,]
cols_to_output_lig = c("mutationinformation"
, "position"
@ -56,20 +107,20 @@ cols_to_output_lig = c("mutationinformation"
, "ligand_outcome"
, "mmcsm_lig"
, "mmcsm_lig_outcome"
, "maf_percent"
, "or_mychisq"
, "pval_fisher"
, "p_adj_fdr"
, "signif_fdr")
# counting
# select cols
Out_df_lig = df_lig[, cols_to_output_lig]
Out_df_ligS = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
delta_symbol = "\u0394"; delta_symbol
# sort df by OR and then MAF: highest OR and highest MAF
#Out_df_ligS1 = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
Out_df_ligS = Out_df_lig[order(-Out_df_lig$or_mychisq, Out_df_lig$maf_percent), ]
Out_df_ligS$or_mychisq = round(Out_df_ligS$or_mychisq,2)
Out_df_ligS$p_adj_fdr = round(Out_df_ligS$p_adj_fdr,2)
Out_df_ligS
#head(Out_df_ligS1); tail(Out_df_ligS1)
head(Out_df_ligS); tail(Out_df_ligS)
colsNames_to_output_lig = c("Mutation"
, "position"
@ -78,12 +129,15 @@ colsNames_to_output_lig = c("Mutation"
, "mCSM ligand_outcome"
, "mmCSM-ligand affinity"
, "mmCSM ligand_outcome"
, paste0("MAF ","(%)")
, "Odds Ratio"
, "Adj P-value"
, "P-value"
, "Adjusted P-value"
, "P-value significance")
colnames(Out_df_ligS) = colsNames_to_output_lig
Out_df_ligS
head(Out_df_ligS)
#--------------------
# write output file: KS test within grpup
#----------------------
@ -99,27 +153,28 @@ write.csv(Out_df_ligS, Out_ligT, row.names = FALSE)
# Appendix: PPi2 affinity
####################################
df_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,]
# Filtered data
df_ppi2 = df3_output[df3_output[[ppi2Dist_colname]]<DistCutOff,]
# select cols
cols_to_output_ppi2 = c("mutationinformation"
, "position"
, ppi2Dist_colname
, "mcsm_ppi2_affinity"
, "mcsm_ppi2_outcome"
, "maf_percent"
, "or_mychisq"
, "pval_fisher"
, "p_adj_fdr"
, "signif_fdr")
# extract output cols
Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2]
Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
delta_symbol = "\u0394"; delta_symbol
# sort df by OR and then MAF: Highest OR and Highest MAF
#Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
Out_df_ppi2S = Out_df_ppi2[order(-Out_df_ppi2$or_mychisq, Out_df_ppi2$maf_percent), ]
Out_df_ppi2S$or_mychisq = round(Out_df_ppi2S$or_mychisq,2)
Out_df_ppi2S$p_adj_fdr = round(Out_df_ppi2S$p_adj_fdr,2)
Out_df_ppi2S
colsNames_to_output_ppi2 = c("Mutation"
, "position"
@ -127,7 +182,8 @@ colsNames_to_output_ppi2 = c("Mutation"
, paste0("mCSM-PPI2 (", delta_symbol, ")")
, "mCSM-PPI2 outcome"
, "Odds Ratio"
, "Adj P-value"
, "P-value"
, "Adjusted P-value"
, "P-value significance")
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2