starting corr plots

This commit is contained in:
Tanushree Tunstall 2022-08-09 21:55:24 +01:00
parent cd86fcf8e8
commit a6d93b3fa8
2 changed files with 92 additions and 107 deletions

View file

@ -1,28 +1,7 @@
#!/usr/bin/env Rscript #!/usr/bin/env Rscript
######################################################### #########################################################
# TASK: Script to format data for Correlation plots: # TASK: Script to format data for Correlation plots:
# corr_data_extract() # corr_data_extract()
# INPUT:
# df: data with all parameters (my_use case)
# merged_df3 or merged_df2!?
# gene: [sanity check]
# drug: relates to a column name that will need to extracted
# ligand_dist_colname = LigDist_colname (variable from plotting_globals()
# colnames_to_extract = c("mutationinformation", "duet_affinity_change")
# display_colnames_key = c(mutationinformation = "MUT" , duet_affinity_change = "DUET")
# extract_scaled_cols = T or F, so that parameters with the _scaled suffix can be extracted.
# NOTE*: No formatting applied to these cols i.e display name
# RETURNS: DF
# containing all the columns required for generating downstream correlation plots
# TODO: ADD
#lineage_count_all
#lineage_count_unique
#my_df['lineage_proportion'] = my_df['lineage_count_unique']/my_df['lineage_count_all']
#my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc
################################################################## ##################################################################
# LigDist_colname #from globals: plotting_globals.R # LigDist_colname #from globals: plotting_globals.R
@ -31,14 +10,11 @@
corr_data_extract <- function(df corr_data_extract <- function(df
, gene , gene
, drug , drug
#, ligand_dist_colname = LigDist_colname
, colnames_to_extract , colnames_to_extract
, colnames_display_key , colnames_display_key
, extract_scaled_cols = F){ , extract_scaled_cols = F){
if ( missing(colnames_to_extract) || missing(colnames_display_key) ){ if ( missing(colnames_to_extract) || missing(colnames_display_key) ){
#if ( missing(colnames_to_extract) ){
cat("\n==========================================" cat("\n=========================================="
, "\nCORR PLOTS data: ALL params" , "\nCORR PLOTS data: ALL params"
, "\n=========================================") , "\n=========================================")

View file

@ -127,48 +127,50 @@ dev.off()
#================ #================
# ppi2 affinity # ppi2 affinity
#================ #================
corr_ppi2_colnames = c("mCSM-PPI2" if (tolower(gene)%in%geneL_ppi2){
, "MAF" corr_ppi2_colnames = c("mCSM-PPI2"
, "Log(OR)" , "MAF"
, "-Log(P)" , "Log(OR)"
, "PPI-Dist" # "interface_dist" , "-Log(P)"
, "dst_mode" , "PPI-Dist" # "interface_dist"
, drug) , "dst_mode"
, drug)
corr_ppi2_colnames%in%colnames(corr_plotdf) corr_ppi2_colnames%in%colnames(corr_plotdf)
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames] corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,] corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,]
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`)) complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2) cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
color_coln = which(colnames(corr_df_ppi2) == "dst_mode") color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
end = which(colnames(corr_df_ppi2) == drug) end = which(colnames(corr_df_ppi2) == drug)
ncol_omit = 3 #omit dist col ncol_omit = 3 #omit dist col
corr_end = end-ncol_omit corr_end = end-ncol_omit
#------------------------ #------------------------
# Output: ppi2 corrP # Output: ppi2 corrP
#------------------------ #------------------------
corr_ppi2P = paste0(outdir_images corr_ppi2P = paste0(outdir_images
,tolower(gene) ,tolower(gene)
,"_corr_ppi2.svg" ) ,"_corr_ppi2.svg" )
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P) cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
svg(corr_ppi2P, width = 10, height = 10) svg(corr_ppi2P, width = 10, height = 10)
my_corr_pairs(corr_data_all = corr_df_ppi2 my_corr_pairs(corr_data_all = corr_df_ppi2
, corr_cols = colnames(corr_df_ppi2[1:corr_end]) , corr_cols = colnames(corr_df_ppi2[1:corr_end])
, corr_method = "spearman" , corr_method = "spearman"
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode" , colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
, categ_colour = c("red", "blue") , categ_colour = c("red", "blue")
, density_show = F , density_show = F
, hist_col = "coral4" , hist_col = "coral4"
, dot_size = 2 , dot_size = 2
, ats = 1.5 , ats = 1.5
, corr_lab_size = 3 , corr_lab_size = 3
, corr_value_size = 1) , corr_value_size = 1)
dev.off() dev.off()
}
# FIXME: ADD distance # FIXME: ADD distance
#================== #==================
@ -177,48 +179,52 @@ dev.off()
#================ #================
# NA affinity # NA affinity
#================ #================
corr_na_colnames = c("mCSM-NA" if (tolower(gene)%in%geneL_na){
, "MAF"
, "Log(OR)"
, "-Log(P)"
, "NA-Dist" # "NA_dist"
, "dst_mode"
, drug)
corr_na_colnames%in%colnames(corr_plotdf) corr_na_colnames = c("mCSM-NA"
corr_df_na = corr_plotdf[, corr_na_colnames] , "MAF"
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,] , "Log(OR)"
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`)) , "-Log(P)"
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na) , "NA-Dist" # "NA_dist"
, "dst_mode"
, drug)
color_coln = which(colnames(corr_df_na) == "dst_mode") corr_na_colnames%in%colnames(corr_plotdf)
end = which(colnames(corr_df_na) == drug) corr_df_na = corr_plotdf[, corr_na_colnames]
ncol_omit = 3 #omit dist col corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
corr_end = end-ncol_omit complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
#------------------------ color_coln = which(colnames(corr_df_na) == "dst_mode")
# Output: mCSM-NA corrP end = which(colnames(corr_df_na) == drug)
#------------------------ ncol_omit = 3 #omit dist col
corr_naP = paste0(outdir_images corr_end = end-ncol_omit
,tolower(gene)
,"_corr_na.svg" )
cat("Corr plot mCSM-NA with coloured dots:", corr_naP) #------------------------
svg(corr_naP, width = 10, height = 10) # Output: mCSM-NA corrP
#------------------------
corr_naP = paste0(outdir_images
,tolower(gene)
,"_corr_na.svg" )
my_corr_pairs(corr_data_all = corr_df_na cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
, corr_cols = colnames(corr_df_na[1:corr_end]) svg(corr_naP, width = 10, height = 10)
, corr_method = "spearman"
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode" my_corr_pairs(corr_data_all = corr_df_na
, categ_colour = c("red", "blue") , corr_cols = colnames(corr_df_na[1:corr_end])
, density_show = F , corr_method = "spearman"
, hist_col = "coral4" , colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
, dot_size = 2 , categ_colour = c("red", "blue")
, ats = 1.5 , density_show = F
, corr_lab_size = 3 , hist_col = "coral4"
, corr_value_size = 1) , dot_size = 2
, ats = 1.5
, corr_lab_size = 3
, corr_value_size = 1)
dev.off()
}
dev.off()
#################################################### ####################################################
# CONSERVATION # CONSERVATION
@ -265,3 +271,6 @@ my_corr_pairs(corr_data_all = corr_df_cons
, corr_value_size = 1) , corr_value_size = 1)
dev.off() dev.off()
#