starting corr plots
This commit is contained in:
parent
cd86fcf8e8
commit
a6d93b3fa8
2 changed files with 92 additions and 107 deletions
|
@ -1,28 +1,7 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for Correlation plots:
|
||||
|
||||
# corr_data_extract()
|
||||
# INPUT:
|
||||
# df: data with all parameters (my_use case)
|
||||
# merged_df3 or merged_df2!?
|
||||
# gene: [sanity check]
|
||||
# drug: relates to a column name that will need to extracted
|
||||
# ligand_dist_colname = LigDist_colname (variable from plotting_globals()
|
||||
|
||||
# colnames_to_extract = c("mutationinformation", "duet_affinity_change")
|
||||
# display_colnames_key = c(mutationinformation = "MUT" , duet_affinity_change = "DUET")
|
||||
# extract_scaled_cols = T or F, so that parameters with the _scaled suffix can be extracted.
|
||||
# NOTE*: No formatting applied to these cols i.e display name
|
||||
|
||||
# RETURNS: DF
|
||||
# containing all the columns required for generating downstream correlation plots
|
||||
|
||||
# TODO: ADD
|
||||
#lineage_count_all
|
||||
#lineage_count_unique
|
||||
#my_df['lineage_proportion'] = my_df['lineage_count_unique']/my_df['lineage_count_all']
|
||||
#my_df['dist_lineage_proportion'] = my_df['lineage_count_unique']/total_mtblineage_uc
|
||||
|
||||
##################################################################
|
||||
# LigDist_colname #from globals: plotting_globals.R
|
||||
|
@ -31,14 +10,11 @@
|
|||
corr_data_extract <- function(df
|
||||
, gene
|
||||
, drug
|
||||
#, ligand_dist_colname = LigDist_colname
|
||||
, colnames_to_extract
|
||||
, colnames_display_key
|
||||
, extract_scaled_cols = F){
|
||||
|
||||
if ( missing(colnames_to_extract) || missing(colnames_display_key) ){
|
||||
#if ( missing(colnames_to_extract) ){
|
||||
|
||||
cat("\n=========================================="
|
||||
, "\nCORR PLOTS data: ALL params"
|
||||
, "\n=========================================")
|
||||
|
|
|
@ -127,7 +127,8 @@ dev.off()
|
|||
#================
|
||||
# ppi2 affinity
|
||||
#================
|
||||
corr_ppi2_colnames = c("mCSM-PPI2"
|
||||
if (tolower(gene)%in%geneL_ppi2){
|
||||
corr_ppi2_colnames = c("mCSM-PPI2"
|
||||
, "MAF"
|
||||
, "Log(OR)"
|
||||
, "-Log(P)"
|
||||
|
@ -135,28 +136,28 @@ corr_ppi2_colnames = c("mCSM-PPI2"
|
|||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
corr_ppi2_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,]
|
||||
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
|
||||
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
|
||||
corr_ppi2_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_ppi2 = corr_plotdf[, corr_ppi2_colnames]
|
||||
corr_df_ppi2 = corr_df_ppi2[corr_df_ppi2["PPI-Dist"]<DistCutOff,]
|
||||
complete_obs_ppi2 = nrow(corr_df_ppi2) - sum(is.na(corr_df_ppi2$`Log(OR)`))
|
||||
cat("\nComplete muts for ppi2 affinity for", gene, ":", complete_obs_ppi2)
|
||||
|
||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||
end = which(colnames(corr_df_ppi2) == drug)
|
||||
ncol_omit = 3 #omit dist col
|
||||
corr_end = end-ncol_omit
|
||||
color_coln = which(colnames(corr_df_ppi2) == "dst_mode")
|
||||
end = which(colnames(corr_df_ppi2) == drug)
|
||||
ncol_omit = 3 #omit dist col
|
||||
corr_end = end-ncol_omit
|
||||
|
||||
#------------------------
|
||||
# Output: ppi2 corrP
|
||||
#------------------------
|
||||
corr_ppi2P = paste0(outdir_images
|
||||
#------------------------
|
||||
# Output: ppi2 corrP
|
||||
#------------------------
|
||||
corr_ppi2P = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_ppi2.svg" )
|
||||
|
||||
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
|
||||
svg(corr_ppi2P, width = 10, height = 10)
|
||||
cat("Corr plot ppi2 with coloured dots:", corr_ppi2P)
|
||||
svg(corr_ppi2P, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_ppi2
|
||||
my_corr_pairs(corr_data_all = corr_df_ppi2
|
||||
, corr_cols = colnames(corr_df_ppi2[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_ppi2[color_coln]) #"dst_mode"
|
||||
|
@ -168,7 +169,8 @@ my_corr_pairs(corr_data_all = corr_df_ppi2
|
|||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
dev.off()
|
||||
}
|
||||
|
||||
# FIXME: ADD distance
|
||||
#==================
|
||||
|
@ -177,7 +179,9 @@ dev.off()
|
|||
#================
|
||||
# NA affinity
|
||||
#================
|
||||
corr_na_colnames = c("mCSM-NA"
|
||||
if (tolower(gene)%in%geneL_na){
|
||||
|
||||
corr_na_colnames = c("mCSM-NA"
|
||||
, "MAF"
|
||||
, "Log(OR)"
|
||||
, "-Log(P)"
|
||||
|
@ -185,28 +189,28 @@ corr_na_colnames = c("mCSM-NA"
|
|||
, "dst_mode"
|
||||
, drug)
|
||||
|
||||
corr_na_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_na = corr_plotdf[, corr_na_colnames]
|
||||
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
|
||||
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
|
||||
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
|
||||
corr_na_colnames%in%colnames(corr_plotdf)
|
||||
corr_df_na = corr_plotdf[, corr_na_colnames]
|
||||
corr_df_na = corr_df_na[corr_df_na["NA-Dist"]<DistCutOff,]
|
||||
complete_obs_na = nrow(corr_df_na) - sum(is.na(corr_df_na$`Log(OR)`))
|
||||
cat("\nComplete muts for NA affinity for", gene, ":", complete_obs_na)
|
||||
|
||||
color_coln = which(colnames(corr_df_na) == "dst_mode")
|
||||
end = which(colnames(corr_df_na) == drug)
|
||||
ncol_omit = 3 #omit dist col
|
||||
corr_end = end-ncol_omit
|
||||
color_coln = which(colnames(corr_df_na) == "dst_mode")
|
||||
end = which(colnames(corr_df_na) == drug)
|
||||
ncol_omit = 3 #omit dist col
|
||||
corr_end = end-ncol_omit
|
||||
|
||||
#------------------------
|
||||
# Output: mCSM-NA corrP
|
||||
#------------------------
|
||||
corr_naP = paste0(outdir_images
|
||||
#------------------------
|
||||
# Output: mCSM-NA corrP
|
||||
#------------------------
|
||||
corr_naP = paste0(outdir_images
|
||||
,tolower(gene)
|
||||
,"_corr_na.svg" )
|
||||
|
||||
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
|
||||
svg(corr_naP, width = 10, height = 10)
|
||||
cat("Corr plot mCSM-NA with coloured dots:", corr_naP)
|
||||
svg(corr_naP, width = 10, height = 10)
|
||||
|
||||
my_corr_pairs(corr_data_all = corr_df_na
|
||||
my_corr_pairs(corr_data_all = corr_df_na
|
||||
, corr_cols = colnames(corr_df_na[1:corr_end])
|
||||
, corr_method = "spearman"
|
||||
, colour_categ_col = colnames(corr_df_na[color_coln]) #"dst_mode"
|
||||
|
@ -218,7 +222,9 @@ my_corr_pairs(corr_data_all = corr_df_na
|
|||
, corr_lab_size = 3
|
||||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
dev.off()
|
||||
}
|
||||
|
||||
|
||||
####################################################
|
||||
# CONSERVATION
|
||||
|
@ -265,3 +271,6 @@ my_corr_pairs(corr_data_all = corr_df_cons
|
|||
, corr_value_size = 1)
|
||||
|
||||
dev.off()
|
||||
|
||||
|
||||
#
|
Loading…
Add table
Add a link
Reference in a new issue