200 lines
6.5 KiB
R
200 lines
6.5 KiB
R
#!/usr/bin/env Rscript
|
|
#source("~/git/LSHTM_analysis/config/alr.R")
|
|
source("~/git/LSHTM_analysis/config/embb.R")
|
|
#source("~/git/LSHTM_analysis/config/katg.R")
|
|
#source("~/git/LSHTM_analysis/config/gid.R")
|
|
#source("~/git/LSHTM_analysis/config/pnca.R")
|
|
#source("~/git/LSHTM_analysis/config/rpob.R")
|
|
|
|
# get plottting dfs
|
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
|
|
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
|
|
#=======
|
|
# output
|
|
#=======
|
|
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
|
|
outdir_stats = paste0(outdir_images,"stats/")
|
|
|
|
###################################################################
|
|
# FIXME: ADD distance to NA when SP replies
|
|
|
|
geneL_normal = c("pnca")
|
|
geneL_na = c("gid", "rpob")
|
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
|
|
|
#from plotting_globals()
|
|
LigDist_colname
|
|
ppi2Dist_colname
|
|
naDist_colname
|
|
|
|
delta_symbol #delta_symbol = "\u0394"; delta_symbol
|
|
angstroms_symbol
|
|
|
|
#===========
|
|
# Data used
|
|
#===========
|
|
|
|
df3 = merged_df3
|
|
|
|
cols_to_output = c("mutationinformation"
|
|
, "position"
|
|
, affinity_dist_colnames[1]
|
|
, "ligand_affinity_change"
|
|
, "ligand_outcome"
|
|
, "mmcsm_lig"
|
|
, "mmcsm_lig_outcome"
|
|
, affinity_dist_colnames[2]
|
|
, "mcsm_ppi2_affinity"
|
|
, "mcsm_ppi2_outcome"
|
|
, "maf"
|
|
, "or_mychisq"
|
|
, "pval_fisher")
|
|
|
|
cols_to_output
|
|
df3_output = df3[, cols_to_output]
|
|
colnames(df3_output)
|
|
|
|
#===============================================
|
|
# Add COLS and rounding: adjusted P-values + MAF
|
|
#==============================================
|
|
#-----------------------------
|
|
# adjusted P-values
|
|
#-----------------------------
|
|
# add cols: p_adj_fdr and signif_fdr
|
|
df3_output$p_adj_fdr = p.adjust(df3_output$pval_fisher, method = "fdr")
|
|
df3_output$signif_fdr = df3_output$p_adj_fdr
|
|
df3_output = dplyr::mutate(df3_output
|
|
, signif_fdr = case_when(signif_fdr == 0.05 ~ "."
|
|
, signif_fdr <=0.0001 ~ '****'
|
|
, signif_fdr <=0.001 ~ '***'
|
|
, signif_fdr <=0.01 ~ '**'
|
|
, signif_fdr <0.05 ~ '*'
|
|
, TRUE ~ 'ns'))
|
|
# rounding
|
|
df3_output$or_mychisq = round(df3_output$or_mychisq,2)
|
|
df3_output$p_adj_fdr = round(df3_output$p_adj_fdr,2)
|
|
head(df3_output)
|
|
|
|
#----------
|
|
# MAF (%)
|
|
#----------
|
|
# add col maf_percent
|
|
df3_output$maf_percent = df3_output$maf*100
|
|
|
|
# rounding
|
|
df3_output$maf_percent = round(df3_output$maf_percent,2)
|
|
head(df3_output$af); head(df3_output$maf);head(df3_output$maf_percent)
|
|
|
|
#----------
|
|
# P-value
|
|
#----------
|
|
df3_output$pval_fisher = round(df3_output$pval_fisher,2)
|
|
|
|
|
|
class(df3_output)
|
|
head(df3_output)
|
|
|
|
####################################
|
|
# Appendix: ligand affinity
|
|
####################################
|
|
df_lig = df3_output[df3_output[[LigDist_colname]]<DistCutOff,]
|
|
|
|
cols_to_output_lig = c("mutationinformation"
|
|
, "position"
|
|
, LigDist_colname
|
|
, "ligand_affinity_change"
|
|
, "ligand_outcome"
|
|
, "mmcsm_lig"
|
|
, "mmcsm_lig_outcome"
|
|
, "maf_percent"
|
|
, "or_mychisq"
|
|
, "pval_fisher"
|
|
, "p_adj_fdr"
|
|
, "signif_fdr")
|
|
# select cols
|
|
Out_df_lig = df_lig[, cols_to_output_lig]
|
|
|
|
# sort df by OR and then MAF: highest OR and highest MAF
|
|
#Out_df_ligS1 = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
|
|
Out_df_ligS = Out_df_lig[order(-Out_df_lig$or_mychisq, Out_df_lig$maf_percent), ]
|
|
|
|
#head(Out_df_ligS1); tail(Out_df_ligS1)
|
|
head(Out_df_ligS); tail(Out_df_ligS)
|
|
|
|
colsNames_to_output_lig = c("Mutation"
|
|
, "position"
|
|
, paste0("Lig-Dist (", angstroms_symbol, ")")
|
|
, "mCSM-ligand affinity"
|
|
, "mCSM ligand_outcome"
|
|
, "mmCSM-ligand affinity"
|
|
, "mmCSM ligand_outcome"
|
|
, paste0("MAF ","(%)")
|
|
, "Odds Ratio"
|
|
, "P-value"
|
|
, "Adjusted P-value"
|
|
, "P-value significance")
|
|
|
|
colnames(Out_df_ligS) = colsNames_to_output_lig
|
|
head(Out_df_ligS)
|
|
|
|
#--------------------
|
|
# write output file: KS test within grpup
|
|
#----------------------
|
|
Out_ligT = paste0(outdir_stats
|
|
, tolower(gene)
|
|
, "_lig_muts.csv")
|
|
|
|
cat("Output of Ligand muts:", Out_ligT )
|
|
write.csv(Out_df_ligS, Out_ligT, row.names = FALSE)
|
|
|
|
########################################################################
|
|
####################################
|
|
# Appendix: PPi2 affinity
|
|
####################################
|
|
|
|
# Filtered data
|
|
df_ppi2 = df3_output[df3_output[[ppi2Dist_colname]]<DistCutOff,]
|
|
|
|
# select cols
|
|
cols_to_output_ppi2 = c("mutationinformation"
|
|
, "position"
|
|
, ppi2Dist_colname
|
|
, "mcsm_ppi2_affinity"
|
|
, "mcsm_ppi2_outcome"
|
|
, "maf_percent"
|
|
, "or_mychisq"
|
|
, "pval_fisher"
|
|
, "p_adj_fdr"
|
|
, "signif_fdr")
|
|
|
|
# extract output cols
|
|
Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2]
|
|
|
|
# sort df by OR and then MAF: Highest OR and Highest MAF
|
|
#Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
|
|
Out_df_ppi2S = Out_df_ppi2[order(-Out_df_ppi2$or_mychisq, Out_df_ppi2$maf_percent), ]
|
|
|
|
|
|
colsNames_to_output_ppi2 = c("Mutation"
|
|
, "position"
|
|
, paste0("PPI2-Dist (", angstroms_symbol, ")")
|
|
, paste0("mCSM-PPI2 (", delta_symbol, ")")
|
|
, "mCSM-PPI2 outcome"
|
|
, "Odds Ratio"
|
|
, "P-value"
|
|
, "Adjusted P-value"
|
|
, "P-value significance")
|
|
|
|
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2
|
|
Out_df_ppi2S
|
|
#--------------------
|
|
# write output file: KS test within grpup
|
|
#----------------------
|
|
Out_ppi2T = paste0(outdir_stats
|
|
, tolower(gene)
|
|
, "_ppi2_muts.csv")
|
|
|
|
cat("Output of PPI2 muts:", Out_ppi2T )
|
|
write.csv(Out_df_ppi2S, Out_ppi2T, row.names = FALSE)
|
|
|