From 92af9fd56504866322c26a7d59ffaa61fe58c58d Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 26 Jan 2022 11:06:04 +0000 Subject: [PATCH] combined logolas and raw data msa plots into 1 script and called it the same as before logoP_msa.R --- .../{logoP_logolas.R => logoP_msa.R} | 366 ++++++++++-------- 1 file changed, 196 insertions(+), 170 deletions(-) rename scripts/functions/{logoP_logolas.R => logoP_msa.R} (51%) diff --git a/scripts/functions/logoP_logolas.R b/scripts/functions/logoP_msa.R similarity index 51% rename from scripts/functions/logoP_logolas.R rename to scripts/functions/logoP_msa.R index 5a85f9f..637098e 100644 --- a/scripts/functions/logoP_logolas.R +++ b/scripts/functions/logoP_msa.R @@ -1,14 +1,17 @@ +source("~/git/LSHTM_analysis/scripts/plotting/Header_TT.R") +source("~/git/LSHTM_analysis/scripts/functions/ed_pfm_data.R") ########################################### PlotLogolasMSA <- function(msaSeq_mut # chr vector , msaSeq_wt # chr vector - , msa_method = c("custom") # will be c("EDLogo", "Logo)# - , ED_score = c("log")# can be: "log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL" + #, msa_method = c("custom") # can be "bits", "probability" or "custom" + , logo_type = c("EDLogo") #"bits_pfm", "probability_pfm", "bits_raw", "probability_raw") # can be "bits", "probability" or "custom" + , EDScore_type = c("log") # see if this relevant, or source function should have it! , bg_prob = NULL , my_logo_col = "chemistry" , plot_positions , y_breaks , x_lab_mut = "nsSNP-position" - , y_lab_mut = "" + , y_lab_mut , x_ats = 13 # text size , x_tangle = 90 # text angle , x_axis_offset = 0.05 # dist b/w y-axis and plot start @@ -25,115 +28,148 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector ) { - #''' Can be put into a separate EDData plot function''' - dash_control = list() - dash_control_default <- list(concentration = NULL, mode = NULL, - optmethod = "mixEM", sample_weights = NULL, verbose = FALSE, - bf = TRUE, pi_init = NULL, squarem_control = list(), - dash_control = list(), reportcov = FALSE) - dash_control <- modifyList(dash_control_default, dash_control) + # Get PFM matrix for mut and wt MSA provided + data_ed = DataED_PFM(msaSeq_mut + , msaSeq_wt + , ED_score = EDScore_type) + names(data_ed) + #"pfm_mutM" "pfm_mut_scaledM" "combED_mutM" "pfm_wtM" "pfm_wt_scaledM" "combED_wtM" - - ############################################ - # Data processing for logo plot for nsSNPS - ########################################### - cat("\nLength of MSA", length(msaSeq_mut) - , "\nlength of WT seq:", length(msaSeq_wt)) - - cat("\n=======================" - , "\nPlotting entire MSA" - , "\n========================") + if (logo_type == "EDLogo"){ + msa_method = "custom" + y_label = "Enrichment Score" + data_logo_mut = data_ed[['combED_mutM']] + data_logo_wt = data_ed[['combED_wtM']] - #-------------------------- - # Getting PFM: mutant MSA - #-------------------------- - pfm_mut <- Biostrings::consensusMatrix(msaSeq_mut) - colnames(pfm_mut) <- 1:dim(pfm_mut)[2] - pfm_mut_scaled <- do.call(dash, append(list(comp_data = pfm_mut), - dash_control))$posmean + msa_pos = as.numeric(colnames(data_logo_mut)) + wt_pos = as.numeric(colnames(data_logo_wt)) - logo_mut_h = get_logo_heights(pfm_mut_scaled - , bg = bg_prob - , score = ED_score) - - cat("\nGetting logo_heights from Logolas package...") + # Construct Y-axis for MSA mut plot: + cat("\nCalculating y-axis for MSA mut plot") + + if ( missing(y_breaks) ){ + # Y-axis: Calculating + cat("\n----------------------------------------" + , "\nY-axis being generated from data" + , "\n-----------------------------------------") + ylim_low <- floor(min(data_logo_mut)); ylim_low + if( ylim_low == 0){ + ylim_low = ylim_low + cat("\nY-axis lower limit:", ylim_low) + y_rlow = seq(0, ylim_low, length.out = 3); y_rlow + + ylim_up <- ceiling(max(data_logo_mut)) + 4; ylim_up + cat("\nY-axis upper limit:", ylim_up) + y_rup = seq(0, ylim_up, by = 2); y_rup + }else{ + ylim_low = ylim_low + (-0.5) + cat("\nY-axis lower limit is <0:", ylim_low) + y_rlow = seq(0, ylim_low, length.out = 3); y_rlow + + ylim_up <- ceiling(max(data_logo_mut)) + 3; ylim_up + cat("\nY-axis upper limit:", ylim_up) + y_rup = seq(0, ylim_up, by = 3); y_rup + } + #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale + ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale + cat("\nY-axis generated: see below\n" + , ylim_scale) + }else{ + # Y-axis: User provided + cat("\n--------------------------------" + , "\nUsing y-axis:: User provided" + ,"\n---------------------------------") + ylim_scale = sort(y_breaks) + ylim_low = min(ylim_scale); ylim_low + ylim_up = max(ylim_scale); ylim_up + } - pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM - pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS - pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED - - neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1) - neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS - neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED - - if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mut)[2]){ - cat("\nPASS: pfm calculated successfully including scaled matrix" - , "\nDim of pfm matrix:", dim(pfm_mut)[1], dim(pfm_mut)[2]) } - combED_mutM = pos_mutED + neg_mutED + if (logo_type == "bits_pfm"){ + msa_method = "bits" + y_label = "Bits (PFM)" + data_logo_mut = data_ed[['pfm_mut_scaledM']] + data_logo_wt = data_ed[['pfm_wtM']] - # Construct the x-axis: mutant MSA - msa_all_pos = as.numeric(colnames(combED_mutM)) - - #--------------------- - # Getting PFM: WT - #--------------------- - pfm_wt <- Biostrings::consensusMatrix(msaSeq_wt) - colnames(pfm_wt) <- 1:dim(pfm_wt)[2] - pfm_wt_scaled <- do.call(dash, append(list(comp_data = pfm_wt), - dash_control))$posmean - - logo_wt_h = get_logo_heights(pfm_wt_scaled - , bg = bg_prob - , score = ED_score) - - pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM - pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS - pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED - - neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1) - neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS - neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED - - if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wt)[2]){ - cat("\nPASS: pfm calculated successfully including scaled matrix" - , "\nDim of pfm matrix:", dim(pfm_wt)[1], dim(pfm_wt)[2]) + msa_pos = as.numeric(colnames(data_logo_mut)) + wt_pos = as.numeric(colnames(data_logo_wt)) } - combED_wtM = pos_wtED + neg_wtED + if (logo_type == "probability_pfm"){ + msa_method = "probability" + y_label = "Probability (PFM)" + data_logo_mut = data_ed[['pfm_mut_scaledM']] + data_logo_wt = data_ed[['pfm_wtM']] - # Construct the x-axis: mutant MSA - wt_all_pos = as.numeric(colnames(combED_wtM)) + msa_pos = as.numeric(colnames(data_logo_mut)) + wt_pos = as.numeric(colnames(data_logo_wt)) + } + if (logo_type == "bits_raw"){ + msa_method = "bits" + y_label = "Bits" + data_logo_mut = msaSeq_mut + msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) + msa_interimDF = data.frame(msa_interim) + msa_pos = as.numeric(rownames(msa_interimDF)) + + data_logo_wt = msaSeq_wt + wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) + wt_interimDF = data.frame(wt_interim) + wt_pos = as.numeric(rownames(wt_interimDF)) + + } + + if (logo_type == "probability_raw"){ + msa_method = "probability" + y_label = "Probability" + + data_logo_mut = msaSeq_mut + msa_interim = sapply(data_logo_mut, function(x) unlist(strsplit(x,""))) + msa_interimDF = data.frame(msa_interim) + msa_pos = as.numeric(rownames(msa_interimDF)) + + data_logo_wt = msaSeq_wt + wt_interim = sapply(data_logo_wt, function(x) unlist(strsplit(x,""))) + wt_interimDF = data.frame(wt_interim) + wt_pos = as.numeric(rownames(wt_interimDF)) + } + + ################################################################################# + # param: plot_position + ################################################################################# + if(missing(plot_positions)){ - - #------------------------------ - # MSA mut: All, no filtering - #------------------------------- + + #================================ + # NO filtering of positions + #================================ + #--------- + # MSA mut + #--------- cat("\n===========================================" , "\nGenerated PFM mut: No filtering" , "\n===========================================") - plot_mut_edM = combED_mutM + plot_mut_edM = data_logo_mut - #------------------------------ - # MSA WT: All, no filtering - #------------------------------- + #--------- + # MSA WT + #--------- cat("\n===========================================" , "\nGenerated PFM WT: No filtering" , "\n===========================================") - plot_wt_edM = combED_wtM + plot_wt_edM = data_logo_wt }else{ - #------------------------------ - # PFM mut: Filtered positions - #------------------------------- - + #================================ + # Filtering of positions + #================================ cat("\n===========================================" , "\nGenerating PFM MSA: filtered positions" , "\n===========================================" @@ -146,17 +182,56 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector cat("\nPlotting positions sorted:\n" , plot_positions) - if ( all(plot_positions%in%msa_all_pos) && all(plot_positions%in%wt_all_pos) ){ + if ( all(plot_positions%in%msa_pos) && all(plot_positions%in%wt_pos) ){ cat("\nAll positions within range" , "\nFiltering positions as specified..." , "\nNo. of positions in plot:", length(plot_positions)) - i_extract = plot_positions - plot_mut_edM = combED_mutM[, i_extract] - plot_wt_edM = combED_wtM[, i_extract] + i_extract = plot_positions + #plot_mut_edM = data_logo_mut[, i_extract] + #plot_wt_edM = data_logo_wt[, i_extract] + #----------------- + # PFM: mut + wt + #------------------ + if (logo_type%in%c("EDLogo", "bits_pfm", "probability_pfm")){ + + plot_mut_edM = data_logo_mut[, i_extract] + plot_wt_edM = data_logo_wt[, i_extract] + } + if (logo_type%in%c("bits_raw", "probability_raw")){ + + #-------- + # Mut + #-------- + dfP1 = msa_interimDF[i_extract,] + dfP1 = data.frame(t(dfP1)) + names(dfP1) = i_extract + cols_to_paste = names(dfP1) + dfP1['chosen_seq'] = apply(dfP1[, cols_to_paste] + , 1 + , paste, sep = '' + , collapse = "") + plot_mut_edM = dfP1$chosen_seq + + #-------- + # WT + #-------- + dfP2 = wt_interimDF[i_extract,] + dfP2 = data.frame(t(dfP2)) + names(dfP2) = i_extract + cols_to_paste2 = names(dfP2) + dfP2['chosen_seq'] = apply( dfP2[, cols_to_paste2] + , 1 + , paste, sep = '' + , collapse = "") + + plot_wt_edM = dfP2$chosen_seq + + } + }else{ cat("\nNo. of positions selected:", length(plot_positions)) - i_ofr = plot_positions[!plot_positions%in%msa_all_pos] + i_ofr = plot_positions[!plot_positions%in%msa_pos] cat("\n1 or more plot_positions out of range..." , "\nThese are:\n", i_ofr , "\nQuitting! Resubmit with correct plot_positions") @@ -164,64 +239,14 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector } } - # Construct Y-axis for MSA mut plot: - cat("\nCalculating y-axis for MSA mut plot") - if (missing(y_breaks)){ - - # Y-axis: Calculating - cat("\n----------------------------------------" - , "\nY-axis being generated from data" - , "\n-----------------------------------------") - - ylim_low <- floor(min(combED_mutM)); ylim_low - - if( ylim_low == 0){ - - ylim_low = ylim_low - cat("\nY-axis lower limit:", ylim_low) - y_rlow = seq(0, ylim_low, length.out = 3); y_rlow - - ylim_up <- ceiling(max(combED_mutM)) + 4; ylim_up - cat("\nY-axis upper limit:", ylim_up) - y_rup = seq(0, ylim_up, by = 2); y_rup - - }else{ - - ylim_low = ylim_low + (-0.5) - cat("\nY-axis lower limit is <0:", ylim_low) - y_rlow = seq(0, ylim_low, length.out = 3); y_rlow - - ylim_up <- ceiling(max(combED_mutM)) + 3; ylim_up - cat("\nY-axis upper limit:", ylim_up) - y_rup = seq(0, ylim_up, by = 3); y_rup - } - - #ylim_scale <- unique(sort(c(y_rlow, y_rup, ylim_up))); ylim_scale - ylim_scale <- unique(sort(c(y_rlow, y_rup))); ylim_scale - - cat("\nY-axis generated: see below\n" - , ylim_scale) - - }else{ - - # Y-axis: User provided - cat("\n--------------------------------" - , "\nUsing y-axis:: User provided" - ,"\n---------------------------------") - ylim_scale = sort(y_breaks) - ylim_low = min(ylim_scale); ylim_low - ylim_up = max(ylim_scale); ylim_up - } - ###################################### # Generating plots for muts and wt ##################################### - if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") - + theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" @@ -252,7 +277,6 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector # Mutant logo plot #------------------- p0 = ggseqlogo(plot_mut_edM - #, facet = "grid" , method = msa_method , col_scheme = my_logo_col , seq_type = 'auto') + @@ -279,39 +303,41 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) - , plot.background = element_rect(fill = theme_bgc))+ + , plot.background = element_rect(fill = theme_bgc)) + - xlab(x_lab_mut) + ylab(y_lab_mut) + xlab(x_lab_mut) if (missing(plot_positions)){ - ed_mut_logo_P = p0 + - scale_x_discrete(breaks = msa_all_pos + scale_x_discrete(breaks = msa_pos , expand = c(x_axis_offset, 0) - , labels = msa_all_pos - , limits = factor(msa_all_pos))+ - scale_y_continuous(limits = c(ylim_low, ylim_up) - , breaks = ylim_scale - , expand = c(0, y_axis_offset)) + - geom_hline(yintercept = 0 - , linetype = "solid" - , color = "grey" - , size = 1) + , labels = msa_pos + , limits = factor(msa_pos)) }else{ - ed_mut_logo_P = p0 + + ed_mut_logo_P = p0 + scale_x_discrete(breaks = i_extract - , expand = c(x_axis_offset_filtered,0) + , expand = c(x_axis_offset_filtered, 0) , labels = i_extract - , limits = factor(i_extract)) + - #scale_y_continuous(expand = c(0,0.09)) + - scale_y_continuous(limits = c(ylim_low, ylim_up) - , breaks = ylim_scale - , expand = c(0, y_axis_offset))+ - geom_hline(yintercept = 0 - , linetype = "solid" - , color = "grey" - , size = 1) + , limits = factor(i_extract)) + + } + + if (logo_type == "EDLogo"){ + ed_mut_logo_P = ed_mut_logo_P + + scale_y_continuous(limits = c(ylim_low, ylim_up) + , breaks = ylim_scale + , expand = c(0, y_axis_offset)) + + geom_hline(yintercept = 0 + , linetype = "solid" + , color = "grey" + , size = 1) + } + + if (missing(y_lab_mut)){ + ed_mut_logo_P = ed_mut_logo_P + ylab(y_label) + } else{ + ed_mut_logo_P = ed_mut_logo_P + ylab(y_lab_mut) } cat('\nDone: MSA plot for mutations') @@ -354,10 +380,10 @@ PlotLogolasMSA <- function(msaSeq_mut # chr vector # No y-axis needed ed_wt_logo_P = p1 + - scale_x_discrete(breaks = wt_all_pos + scale_x_discrete(breaks = wt_pos , expand = c(x_axis_offset, 0) - , labels = wt_all_pos - , limits = factor(wt_all_pos)) + , labels = wt_pos + , limits = factor(wt_pos)) }else{ ed_wt_logo_P = p1 +