source("~/git/LSHTM_analysis/scripts/functions/my_logolas.R") ##################################################################################### # DataED_PFM(): # Input: # Data: # msaSeq_mut: MSA chr vector for muts # msaSeq_wt [Optional]: MSA chr vector for wt # Others params: # ED_score = c("log", log-odds", "diff", "probKL", "ratio", "unscaled_log", "wKL") # bg_prob: background probability, default is equal i.e NULL # Returns data for ED plot from MSA # Mut matrix: # PFM matrix # PFM matrix scaled # ED matrix # Wt matrix [optional] # For my case, I always use it as it helps see what is at the wild-type already! # TODO: SHINY # drop down: ED score type (in the actual plot function!) # drop down/enter field : bg probability (in the actual plot function!) # Make it hover over position and then get the corresponding data table! ########################a########################################################### DataED_PFM <- function(msaSeq_mut , msaSeq_wt , ED_score = c("log") , bg_prob = NULL) { dash_control = list() dash_control_default <- list(concentration = NULL, mode = NULL, optmethod = "mixEM", sample_weights = NULL, verbose = FALSE, bf = TRUE, pi_init = NULL, squarem_control = list(), dash_control = list(), reportcov = FALSE) dash_control <- modifyList(dash_control_default, dash_control) ############################################ # Data processing for logo plot for nsSNPS ########################################### cat("\nLength of MSA", length(msaSeq_mut)) pfm_mutM = matrix() pfm_mut_scaledM = matrix() combED_mutM = matrix() #-------------------------- # Getting PFM: mutant MSA #-------------------------- pfm_mutM <- Biostrings::consensusMatrix(msaSeq_mut) colnames(pfm_mutM) <- 1:dim(pfm_mutM)[2] pfm_mut_scaledM <- do.call(dash, append(list(comp_data = pfm_mutM), dash_control))$posmean logo_mut_h = get_logo_heights(pfm_mut_scaledM , bg = bg_prob , score = ED_score) cat("\nGetting logo_heights from Logolas package...") pos_mutM = logo_mut_h[['table_mat_pos_norm']]; pos_mutM pos_mutS = logo_mut_h[['pos_ic']]; pos_mutS pos_mutED = t(pos_mutS*t(pos_mutM)); pos_mutED neg_mutM = logo_mut_h[['table_mat_neg_norm']]*(-1) neg_mutS = logo_mut_h[['neg_ic']]; neg_mutS neg_mutED = t(neg_mutS*t(neg_mutM)); neg_mutED if (length(pos_mutS) && length(neg_mutS) == dim(pfm_mutM)[2]){ cat("\nPASS: pfm calculated successfully including scaled matrix" , "\nDim of pfm matrix:", dim(pfm_mutM)[1], dim(pfm_mutM)[2]) } combED_mutM = pos_mutED + neg_mutED # initialise the mut list names_mutL = c("pfm_mutM", "pfm_mut_scaledM", "combED_mutM") EDmutDataL = vector("list", length(names_mutL)) EDmutDataL = list(pfm_mutM, pfm_mut_scaledM, combED_mutM) names(EDmutDataL) = names_mutL #--------------------- # Getting PFM: WT #--------------------- if(!missing(msaSeq_wt)){ cat("\nLength of WT seq", length(msaSeq_wt)) pfm_wtM = matrix() pfm_wt_scaledM = matrix() combED_wtM = matrix() pfm_wtM <- Biostrings::consensusMatrix(msaSeq_wt) colnames(pfm_wtM) <- 1:dim(pfm_wtM)[2] pfm_wt_scaledM <- do.call(dash, append(list(comp_data = pfm_wtM), dash_control))$posmean logo_wt_h = get_logo_heights(pfm_wt_scaledM , bg = bg_prob , score = ED_score) pos_wtM = logo_wt_h[['table_mat_pos_norm']]; pos_wtM pos_wtS = logo_wt_h[['pos_ic']]; pos_wtS pos_wtED = t(pos_wtS*t(pos_wtM)); pos_wtED neg_wtM = logo_wt_h[['table_mat_neg_norm']]*(-1) neg_wtS = logo_wt_h[['neg_ic']]; neg_wtS neg_wtED = t(neg_wtS*t(neg_wtM)); neg_wtED if (length(pos_wtS) && length(neg_wtS) == dim(pfm_wtM)[2]){ cat("\nPASS: pfm calculated successfully including scaled matrix" , "\nDim of pfm matrix:", dim(pfm_wtM)[1], dim(pfm_wtM)[2]) } combED_wtM = pos_wtED + neg_wtED # initialise the wt list names_wtL = c("pfm_wtM", "pfm_wt_scaledM", "combED_wtM") EDwtDataL = vector("list", length(names_wtL)) EDwtDataL = list(pfm_wtM, pfm_wt_scaledM, combED_wtM) names(EDwtDataL) = names_wtL # Combine two lists EDallDataL = append(EDmutDataL, EDwtDataL) cat("\nReturning output for Mut + WT" , "\nLength of all data:", length(EDallDataL)) return(EDallDataL) }else{ cat("\nReturning output for Mut data only" , "\nLength of Mut data:", length(EDmutDataL)) return(EDmutDataL) } }