agenerated corr plots with MAF and provean
This commit is contained in:
parent
1a513913ce
commit
569e372476
6 changed files with 115 additions and 104 deletions
|
@ -223,20 +223,20 @@ consurf_palette2 = c("0" = "yellow2"
|
||||||
, "9" = "darkorchid4")
|
, "9" = "darkorchid4")
|
||||||
|
|
||||||
|
|
||||||
consurf_colours = c(#"0" = rgb(1.00,1.00,0.59)
|
consurf_colours = c(
|
||||||
"nsd" = rgb(1.00,1.00,0.59)
|
"nsd" = rgb(1.00,1.00,0.59)
|
||||||
, "1" = rgb(0.63,0.16,0.37)
|
, "1" = rgb(0.63,0.16,0.37)
|
||||||
, "2" = rgb(0.94,0.49,0.67)
|
, "2" = rgb(0.94,0.49,0.67)
|
||||||
, "3" = rgb(0.98,0.78,0.86)
|
, "3" = rgb(0.98,0.78,0.86)
|
||||||
, "4" = rgb(0.98,0.92,0.96)
|
, "4" = rgb(0.98,0.92,0.96)
|
||||||
, "5" = rgb(1.00,1.00,1.00)
|
, "5" = rgb(1.00,1.00,1.00)
|
||||||
, "6" = rgb(0.84,0.94,0.94)
|
, "6" = rgb(0.84,0.94,0.94)
|
||||||
, "7" = rgb(0.65,0.86,0.90)
|
, "7" = rgb(0.65,0.86,0.90)
|
||||||
, "8" = rgb(0.29,0.69,0.75)
|
, "8" = rgb(0.29,0.69,0.75)
|
||||||
, "9" = rgb(0.04,0.49,0.51)
|
, "9" = rgb(0.04,0.49,0.51)
|
||||||
)
|
)
|
||||||
|
|
||||||
consurf_bp_colours = c(#"0" = rgb(1.00,1.00,0.59)
|
consurf_bp_colours = c(
|
||||||
"0" = rgb(1.00,1.00,0.59)
|
"0" = rgb(1.00,1.00,0.59)
|
||||||
, "1" = rgb(0.63,0.16,0.37)
|
, "1" = rgb(0.63,0.16,0.37)
|
||||||
, "2" = rgb(0.94,0.49,0.67)
|
, "2" = rgb(0.94,0.49,0.67)
|
||||||
|
|
|
@ -22,9 +22,12 @@
|
||||||
#1) Corr type?
|
#1) Corr type?
|
||||||
#2)
|
#2)
|
||||||
##################################################################
|
##################################################################
|
||||||
|
# LigDist_colname #from globals: plotting_globals.R
|
||||||
|
# ppi2Dist_colname #from globals: plotting_globals.R
|
||||||
|
# naDist_colname #from globals: plotting_globals.R
|
||||||
corr_data_extract <- function(df
|
corr_data_extract <- function(df
|
||||||
#, gene_name = gene
|
, gene
|
||||||
, drug_name = drug
|
, drug
|
||||||
#, ligand_dist_colname = LigDist_colname
|
#, ligand_dist_colname = LigDist_colname
|
||||||
, colnames_to_extract
|
, colnames_to_extract
|
||||||
, colnames_display_key
|
, colnames_display_key
|
||||||
|
@ -38,34 +41,48 @@ corr_data_extract <- function(df
|
||||||
, "\n=========================================")
|
, "\n=========================================")
|
||||||
|
|
||||||
cat("\nExtracting default columns for"
|
cat("\nExtracting default columns for"
|
||||||
#, "\nGene name:", gene
|
, "\nGene name:", gene
|
||||||
, "\nDrug name:", drug)
|
, "\nDrug name:", drug)
|
||||||
|
|
||||||
colnames_to_extract = c(drug
|
geneL_normal = c("pnca")
|
||||||
#, "mutationinformation"
|
geneL_na = c("gid", "rpob")
|
||||||
#, "mutation_info_labels"
|
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
|
||||||
, "dst_mode"
|
|
||||||
, "duet_stability_change"
|
common_colnames = c(drug, "dst_mode"
|
||||||
, "ligand_affinity_change"
|
, "duet_stability_change" , "ddg_foldx" , "deepddg" , "ddg_dynamut2"
|
||||||
, "ligand_distance"
|
, "asa" , "rsa" , "kd_values" , "rd_values"
|
||||||
#, ligand_dist_colname
|
, "maf" , "log10_or_mychisq" , "neglog_pval_fisher"
|
||||||
, "interface_dist"
|
, LigDist_colname
|
||||||
, "ddg_foldx"
|
, "consurf_score" , "snap2_score" , "provean_score"
|
||||||
, "deepddg"
|
, "ligand_affinity_change"
|
||||||
, "asa"
|
#, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
|
||||||
, "rsa"
|
)
|
||||||
, "kd_values"
|
|
||||||
, "rd_values"
|
display_common_colnames = c( drug, "dst_mode"
|
||||||
, "af"
|
, "DUET" , "FoldX" , "DeepDDG", "Dynamut2"
|
||||||
, "log10_or_mychisq"
|
, "ASA" , "RSA" , "KD" , "RD"
|
||||||
, "neglog_pval_fisher"
|
, "MAF" , "Log(OR)" , "-Log(P)"
|
||||||
, "ddg_dynamut2"
|
, "Lig-Dist"
|
||||||
, "consurf_score"
|
, "ConSurf" , "SNAP2" , "PROVEAN"
|
||||||
, "snap2_score"
|
, "mCSM-lig"
|
||||||
, "ddg_dynamut", "ddg_encom", "dds_encom", "ddg_mcsm", "ddg_sdm", "ddg_duet"
|
# , "Dynamut" , "ENCoM-DDG" , "mCSM" , "SDM" , "DUET-d" , "ENCoM-DDS"
|
||||||
, "mcsm_na_affinity"
|
)
|
||||||
, "mcsm_ppi2_affinity"
|
|
||||||
)
|
if (tolower(gene)%in%geneL_normal){
|
||||||
|
colnames_to_extract = c(common_colnames)
|
||||||
|
display_colnames = c(display_common_colnames)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_ppi2){
|
||||||
|
colnames_to_extract = c(common_colnames ,"mcsm_ppi2_affinity", ppi2Dist_colname)
|
||||||
|
display_colnames = c(display_common_colnames,"mCSM-PPI2" , "PPI-Dist")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tolower(gene)%in%geneL_na){
|
||||||
|
colnames_to_extract = c(common_colnames,"mcsm_na_affinity", naDist_colname)
|
||||||
|
display_colnames = c(display_common_colnames, "mCSM-NA", "NA-Dist")
|
||||||
|
}
|
||||||
|
|
||||||
# [optional] arg: extract_scaled_cols
|
# [optional] arg: extract_scaled_cols
|
||||||
if (extract_scaled_cols){
|
if (extract_scaled_cols){
|
||||||
|
@ -77,46 +94,15 @@ corr_data_extract <- function(df
|
||||||
colnames_to_extract = colnames_to_extract
|
colnames_to_extract = colnames_to_extract
|
||||||
}
|
}
|
||||||
|
|
||||||
corr_df = df[, colnames(df)%in%colnames_to_extract]
|
# extract df based on gene
|
||||||
|
corr_df = df[,colnames_to_extract]
|
||||||
|
colnames(corr_df)
|
||||||
|
display_colnames
|
||||||
|
|
||||||
# arg: colnames_display_key
|
# arg: colnames_display_key
|
||||||
colnames_display_key = c(duet_stability_change = "DUET"
|
colnames(corr_df)[colnames(corr_df)%in%colnames_to_extract] <- display_colnames
|
||||||
, ligand_affinity_change = "mCSM-lig"
|
colnames(corr_df)
|
||||||
, ligand_distance = "ligand_distance"
|
|
||||||
#, ligand_dist_colname = "ligand_distance"
|
|
||||||
, interface_dist = "interface_dist"
|
|
||||||
, ddg_foldx = "FoldX"
|
|
||||||
, deepddg = "DeepDDG"
|
|
||||||
, asa = "ASA"
|
|
||||||
, rsa = "RSA"
|
|
||||||
, kd_values = "KD"
|
|
||||||
, rd_values = "RD"
|
|
||||||
, af = "MAF"
|
|
||||||
, log10_or_mychisq = "Log (OR)"
|
|
||||||
, neglog_pval_fisher = "-Log (P)"
|
|
||||||
, ddg_dynamut2 = "Dynamut2"
|
|
||||||
, consurf_score = "Consurf"
|
|
||||||
, snap2_score = "SNAP2"
|
|
||||||
, ddg_dynamut = "Dynamut"
|
|
||||||
, ddg_encom = "ENCoM-DDG"
|
|
||||||
, ddg_mcsm = "mCSM"
|
|
||||||
, ddg_sdm = "SDM"
|
|
||||||
, ddg_duet = "DUET-d"
|
|
||||||
, dds_encom = "ENCoM-DDS"
|
|
||||||
, mcsm_na_affinity = "mCSM-NA"
|
|
||||||
, mcsm_ppi2_affinity = "mCSM-PPI2")
|
|
||||||
|
|
||||||
# COMMENT: This only works when all the columns are in the namekey vector.
|
|
||||||
# If one is missing, there is no error, but it also renamed as "NA.
|
|
||||||
#names(corr_df) <- colnames_display_key[names(corr_df)]
|
|
||||||
|
|
||||||
# Solution: to use plyr::rename()
|
|
||||||
# Consider using requireNamespace() instead of library() so its function names doesn't collide with dplyr's.
|
|
||||||
corr_df = plyr::rename(corr_df
|
|
||||||
, replace = colnames_display_key
|
|
||||||
, warn_missing = T
|
|
||||||
, warn_duplicated = T)
|
|
||||||
|
|
||||||
cat("\nExtracted ncols:", ncol(corr_df)
|
cat("\nExtracted ncols:", ncol(corr_df)
|
||||||
,"\nRenaming successful")
|
,"\nRenaming successful")
|
||||||
|
|
||||||
|
|
|
@ -39,9 +39,9 @@ resistance_col <<- "drtype"
|
||||||
LigDist_colname <<- "ligand_distance"
|
LigDist_colname <<- "ligand_distance"
|
||||||
LigDist_cutoff <<- 10
|
LigDist_cutoff <<- 10
|
||||||
|
|
||||||
DistCutOff = 10
|
DistCutOff <<- 10
|
||||||
ppi2Dist_colname = "interface_dist"
|
ppi2Dist_colname <<- "interface_dist"
|
||||||
naDist_colname = "TBC"
|
naDist_colname <<- "TBC"
|
||||||
|
|
||||||
#==================
|
#==================
|
||||||
# Angstroms symbol
|
# Angstroms symbol
|
||||||
|
|
|
@ -186,10 +186,16 @@ cat(s3)
|
||||||
# make sure the above script works because merged_df2_combined is needed
|
# make sure the above script works because merged_df2_combined is needed
|
||||||
merged_df3 = as.data.frame(merged_df3)
|
merged_df3 = as.data.frame(merged_df3)
|
||||||
|
|
||||||
corr_df_m3_f = corr_data_extract(merged_df3, extract_scaled_cols = F)
|
corr_df_m3_f = corr_data_extract(merged_df3
|
||||||
|
, gene = gene
|
||||||
|
, drug = drug
|
||||||
|
, extract_scaled_cols = F)
|
||||||
head(corr_df_m3_f)
|
head(corr_df_m3_f)
|
||||||
|
|
||||||
corr_df_m2_f = corr_data_extract(merged_df2, extract_scaled_cols = F)
|
corr_df_m2_f = corr_data_extract(merged_df2
|
||||||
|
, gene = gene
|
||||||
|
, drug = drug
|
||||||
|
, extract_scaled_cols = F)
|
||||||
head(corr_df_m2_f)
|
head(corr_df_m2_f)
|
||||||
|
|
||||||
s4 = c("\nSuccessfully sourced Corr_data.R")
|
s4 = c("\nSuccessfully sourced Corr_data.R")
|
||||||
|
|
|
@ -1,5 +1,9 @@
|
||||||
merged_df3 = as.data.frame(merged_df3)
|
merged_df3 = as.data.frame(merged_df3)
|
||||||
corr_plotdf = corr_data_extract(merged_df3, extract_scaled_cols = F)
|
corr_plotdf = corr_data_extract(merged_df3
|
||||||
|
, gene = gene
|
||||||
|
, drug = drug
|
||||||
|
, extract_scaled_cols = F)
|
||||||
|
colnames(corr_plotdf)
|
||||||
|
|
||||||
#================
|
#================
|
||||||
# stability
|
# stability
|
||||||
|
@ -9,12 +13,13 @@ corr_ps_colnames = c("DUET"
|
||||||
, "DeepDDG"
|
, "DeepDDG"
|
||||||
, "Dynamut2"
|
, "Dynamut2"
|
||||||
, "MAF"
|
, "MAF"
|
||||||
, "Log (OR)"
|
, "Log(OR)"
|
||||||
, "-Log (P)"
|
, "-Log(P)"
|
||||||
#, "ligand_distance"
|
#, "ligand_distance"
|
||||||
, "dst_mode"
|
, "dst_mode"
|
||||||
, drug)
|
, drug)
|
||||||
|
|
||||||
|
corr_ps_colnames%in%colnames(corr_plotdf)
|
||||||
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
corr_df_ps = corr_plotdf[, corr_ps_colnames]
|
||||||
|
|
||||||
color_coln = which(colnames(corr_df_ps) == "dst_mode")
|
color_coln = which(colnames(corr_df_ps) == "dst_mode")
|
||||||
|
@ -46,10 +51,10 @@ my_corr_pairs(corr_data_all = corr_df_ps
|
||||||
|
|
||||||
dev.off()
|
dev.off()
|
||||||
#####################################################
|
#####################################################
|
||||||
DistCutOff = 10
|
#DistCutOff = 10
|
||||||
LigDist_colname # = "ligand_distance" # from globals
|
#LigDist_colname # = "ligand_distance" # from globals
|
||||||
ppi2Dist_colname = "interface_dist"
|
#ppi2Dist_colname = "interface_dist"
|
||||||
naDist_colname = "TBC"
|
#naDist_colname = "TBC"
|
||||||
#####################################################
|
#####################################################
|
||||||
|
|
||||||
#================
|
#================
|
||||||
|
@ -57,14 +62,15 @@ naDist_colname = "TBC"
|
||||||
#================
|
#================
|
||||||
corr_lig_colnames = c("mCSM-lig"
|
corr_lig_colnames = c("mCSM-lig"
|
||||||
, "MAF"
|
, "MAF"
|
||||||
, "Log (OR)"
|
, "Log(OR)"
|
||||||
, "-Log (P)"
|
, "-Log(P)"
|
||||||
, "ligand_distance"
|
, "Lig-Dist"
|
||||||
, "dst_mode"
|
, "dst_mode"
|
||||||
, drug)
|
, drug)
|
||||||
|
|
||||||
|
corr_lig_colnames%in%colnames(corr_plotdf)
|
||||||
corr_df_lig = corr_plotdf[, corr_lig_colnames]
|
corr_df_lig = corr_plotdf[, corr_lig_colnames]
|
||||||
corr_df_lig = corr_df_lig[corr_df_lig[[LigDist_colname]]<DistCutOff,]
|
corr_df_lig = corr_df_lig[corr_df_lig["Lig-Dist"]<DistCutOff,]
|
||||||
|
|
||||||
color_coln = which(colnames(corr_df_lig) == "dst_mode")
|
color_coln = which(colnames(corr_df_lig) == "dst_mode")
|
||||||
end = which(colnames(corr_df_lig) == drug)
|
end = which(colnames(corr_df_lig) == drug)
|
||||||
|
@ -99,15 +105,15 @@ dev.off()
|
||||||
#================
|
#================
|
||||||
corr_ppi2_colnames = c("mCSM-PPI2"
|
corr_ppi2_colnames = c("mCSM-PPI2"
|
||||||
, "MAF"
|
, "MAF"
|
||||||
, "Log (OR)"
|
, "Log(OR)"
|
||||||
, "-Log (P)"
|
, "-Log(P)"
|
||||||
, "interface_dist"
|
, "PPI-Dist" # "interface_dist"
|
||||||
, "dst_mode"
|
, "dst_mode"
|
||||||
, drug)
|
, drug)
|
||||||
|
|
||||||
|
corr_ppi2_colnames%in%colnames(corr_plotdf)
|
||||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||||
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2[[ppi2Dist_colname]]<DistCutOff,]
|
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,]
|
||||||
|
|
||||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||||
end = which(colnames(corr_df_ppi2) == drug)
|
end = which(colnames(corr_df_ppi2) == drug)
|
||||||
|
@ -146,10 +152,21 @@ dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# FIXME: ADD PROVEAN
|
|
||||||
####################################################
|
####################################################
|
||||||
# CONSERVATION
|
# CONSERVATION
|
||||||
|
corr_conservation_cols = c("ConSurf"
|
||||||
|
, "SNAP2"
|
||||||
|
, "PROVEAN"
|
||||||
|
, "MAF"
|
||||||
|
, "Log(OR)"
|
||||||
|
, "-Log(P)"
|
||||||
|
, "dst_mode"
|
||||||
|
, drug)
|
||||||
|
|
||||||
####################################################
|
####################################################
|
||||||
|
colnames(corr_plotdf)
|
||||||
|
corr_conservation_cols%in%colnames(corr_plotdf)
|
||||||
|
|
||||||
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
corr_df_cons = corr_plotdf[, corr_conservation_cols]
|
||||||
|
|
||||||
color_coln = which(colnames(corr_df_cons) == "dst_mode")
|
color_coln = which(colnames(corr_df_cons) == "dst_mode")
|
||||||
|
|
|
@ -111,7 +111,9 @@ df3 = merged_df3
|
||||||
#df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
#df3[[consurf_colNew]] = as.factor(df3[[consurf_colNew]])
|
||||||
#df3[[consurf_colNew]]
|
#df3[[consurf_colNew]]
|
||||||
# not this bit
|
# not this bit
|
||||||
levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
#!!!!!!!!!!!!!1
|
||||||
|
#levels(df3$consurf_outcome) = c( "nsd", 1, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||||
|
|
||||||
#levels(df3$consurf_outcome)
|
#levels(df3$consurf_outcome)
|
||||||
|
|
||||||
# SNAP2 labels
|
# SNAP2 labels
|
||||||
|
@ -242,9 +244,9 @@ corr_ppi2_colnames = c("mCSM-PPI2"
|
||||||
, "dst_mode"
|
, "dst_mode"
|
||||||
, drug)
|
, drug)
|
||||||
|
|
||||||
#FIXME: Add provean
|
|
||||||
corr_conservation_cols = c("Consurf"
|
corr_conservation_cols = c("Consurf"
|
||||||
, "SNAP2"
|
, "SNAP2"
|
||||||
|
, "PROVEAN"
|
||||||
, "MAF"
|
, "MAF"
|
||||||
, "Log (OR)"
|
, "Log (OR)"
|
||||||
, "-Log (P)"
|
, "-Log (P)"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue