########################a########################################################### # Input: # Data # plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column # x_axis_colname = "position" # symbol_mut_colname = "mutant_type" # symbol_wt_colname = "mutant_type" # omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count # my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor") # --> if clustalx and taylor, set variable to black bg + white font # --> if chemistry and hydrophobicity, then grey bg + black font # ...other params # Returns: Logo plot from combined data containing all nsSNPs per position. # Helps to see the overview of SNP diversity # TODO: SHINY # select/drop down: omit_snp_count # select/drop down: my_logo_col # should include WT?? # Make it hover over position and then get the corresponding data table! #################################################################################### #================== # logo data: OR #================== # NOTE: my_logo_col LogoPlotSnps <- function(plot_df , x_axis_colname = "position" , symbol_mut_colname = "mutant_type" , symbol_wt_colname = "wild_type" , omit_snp_count = c(0) # can be 1, 2, etc. , my_logo_col = "chemistry" , x_lab = "Position" , y_lab = "Count" , x_ats = 14 # text size , x_tangle = 90 # text angle , y_ats = 22 , y_tangle = 0 , x_tts = 20 # title size , y_tts = 23 , leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9) , leg_dir = "horizontal" #can be vertical or horizontal , leg_ts = 20 # leg text size , leg_tts = 16 # leg title size , tpos0 = 0 # 0 is a magic number that does my sensible default , tW0 = 1 , tH0 = 0.2 , debug=FALSE ) { # handle funky omit_snp_count. DOES NOT WORK YET if (class(omit_snp_count) != "numeric"){ omit_snp_count <- as.numeric(unlist(str_extract_all(omit_snp_count, regex("[0-9]+")))) } ############################################ # Data processing for logo plot for nsSNPS ############################################ # Generate "ligand distance" colour map plot_df = generate_distance_colour_map(plot_df, debug=TRUE) unique_colour_map = unique(plot_df[,c("position","ligD_colours")]) unique_colour_map = unique_colour_map[order(unique_colour_map$position), ] rownames(unique_colour_map) = unique_colour_map$position unique_colour_map2 = unique_colour_map unique_colour_map2$position=as.factor(unique_colour_map2$position) unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours) # setDT(plot_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))] if (debug) { table(plot_df[[x_axis_colname]]) table(plot_df$mut_pos_occurrence) } max_mut = max(table(plot_df[[x_axis_colname]])) # Subset Data as specified by user cat("\nDisplaying nsSNP position frequency:\n") print(table(plot_df$mut_pos_occurrence)) if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){ my_data_snp = plot_df u = unique(my_data_snp[[x_axis_colname]]) max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) if (debug) { cat("\nNo filtering requested:" , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) , "\nDim of data:", dim(my_data_snp) , "\nNo. of positions:", length(u) , "\nMax no. of muts at any position:", max_mult_mut) } } else { my_data_snp = subset(plot_df, !(mut_pos_occurrence%in%omit_snp_count) ) exp_nrows = sum(table(plot_df$mut_pos_occurrence)) - sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) got_rows = sum(table(my_data_snp$mut_pos_occurrence)) u = unique(my_data_snp[[x_axis_colname]]) max_mult_mut = max(table(my_data_snp[[x_axis_colname]])) if (debug) { if (got_rows == exp_nrows) { cat("\nPass: Position with the stated nsSNP frequency filtered:", omit_snp_count , "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence)) , "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count]) , "\nDim of subsetted data:", dim(my_data_snp) , "\nNo. of positions:", length(u) , "\nMax no. of muts at any position:", max_mult_mut) } else { cat("\nFAIL:Position with the stated nsSNP frequency COULD NOT be filtered..." , "\nExpected:",exp_nrows , "\nGot:", got_rows ) } } } #-------------------------------------- # matrix for mutant type # frequency of mutant type by position #--------------------------------------- table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]]) tab_mt = table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]]) class(tab_mt) # unclass to convert to matrix tab_mt = unclass(tab_mt) if (is.matrix(tab_mt)){ if (debug) { cat("\nPASS: Mutant matrix successfully created..." #, "\nRownames of mutant matrix:", rownames(tab_mt) #, "\nColnames of mutant matrix:", colnames(tab_mt) ) } } else{ tab_mt = as.matrix(tab_mt, rownames = T) if (is.matrix(tab_mt)){ if (debug) { cat("\nCreating mutant matrix..." #, "\nRownames of mutant matrix:", rownames(tab_mt) #, "\nColnames of mutant matrix:", colnames(tab_mt) ) } } } #------------------------------------- # matrix for wild type # frequency of wild type by position #------------------------------------- tab_wt = table(my_data_snp[[symbol_wt_colname]], my_data_snp[[x_axis_colname]]); tab_wt tab_wt = unclass(tab_wt) # Important: remove wt duplicates #wt = my_data_snp[, c("position", "wild_type")] wt = my_data_snp %>% select(x_axis_colname, symbol_wt_colname) wt = wt[!duplicated(wt),] wt tab_wt = table(wt[[symbol_wt_colname]], wt[[x_axis_colname]]); tab_wt # should all be 1 if (debug) { if ( identical(colnames(tab_mt), colnames(tab_wt) ) && identical(ncol(tab_mt), ncol(tab_wt)) ){ cat("\nPASS: Wild type matrix successfully created" , "\nDim of wt matrix:", dim(tab_wt) , "\nDim of mutant matrix:", dim(tab_mt) , "\n" #, "\nRownames of mutant matrix:", rownames(tab_wt) #, "\nColnames of mutant matrix:", colnames(tab_wt) ) } } ###################################### # Generating plots for muts and wt ##################################### LogoPlotL <- list() if (my_logo_col %in% c('clustalx','taylor')) { cat("\nSelected colour scheme:", my_logo_col , "\nUsing black theme\n") theme_bgc = "black" xfont_bgc = "white" yfont_bgc = "white" xtt_col = "white" ytt_col = "white" } if (my_logo_col %in% c('chemistry', 'hydrophobicity')) { cat('\nSelected colour scheme:', my_logo_col , "\nUsing grey theme") theme_bgc = "white" xfont_bgc = "black" yfont_bgc = "black" xtt_col = "black" ytt_col = "black" } ##################################### # Generating logo plots for nsSNPs ##################################### cowplot::plot_grid( #------------------- # Mutant logo plot #------------------- ggseqlogo(tab_mt , method = 'custom' , col_scheme = my_logo_col , seq_type = 'aa') + scale_x_continuous(breaks = 1:ncol(tab_mt) , expand = c(0.01,0) , labels = colnames(tab_mt))+ scale_y_continuous(breaks = 0:(max_mult_mut-1) , labels = c(1:max_mult_mut) , limits = c(0, max_mult_mut)) + ylab(y_lab) + theme(text=element_text(family="FreeSans") , legend.position = leg_pos , legend.direction = leg_dir , legend.title = element_text(size = leg_tts , colour = ytt_col) , legend.text = element_text(size = leg_ts) , axis.text.x = element_text(size = x_ats , angle = x_tangle , hjust = 1 , vjust = 0.4 , colour = xfont_bgc) , axis.text.y = element_text(size = y_ats , angle = y_tangle , hjust = 1 , vjust = -1.0 , colour = yfont_bgc) , axis.title.x = element_text(size = x_tts , colour = xtt_col) , axis.title.y = element_text(size = y_tts , colour = ytt_col) , plot.background = element_rect(fill = theme_bgc, colour=NA) ), ggseqlogo(tab_wt , method = 'custom' , col_scheme = my_logo_col , seq_type = 'aa') + scale_x_continuous(breaks = 1:ncol(tab_wt) , expand = c(0.01,0) , labels = as.factor(colnames(tab_wt))) + theme(text = element_text(family="FreeSans") , legend.position = "none" , axis.text.x = element_blank() , axis.text.y = element_blank() , axis.title.x = element_blank() , axis.title.y = element_blank() , plot.background = element_rect(fill = theme_bgc, colour=NA) ) + labs(x=NULL, y=NULL), ggplot( data=unique_colour_map2, aes( x=factor(position), 0 # heat-mapped distance tiles along the bot , fill = position , colour = position , linetype = "blank" ) ) + geom_tile() + theme( axis.text.x = element_blank() , axis.ticks.x = element_blank() # axis.text.x = element_text(size = x_ats # , angle = x_tangle # , hjust = 1 # , vjust = 0.4 # , colour = xfont_bgc) , axis.text.y = element_blank() , axis.ticks.y = element_blank() , axis.title.x = element_blank() # , axis.title.x = element_text(size = x_tts # , colour = xtt_col) # , axis.title.y = element_text(size = y_tts # , colour = ytt_col) # , legend.title = element_text(size = leg_tts # , colour = ytt_col) , legend.text = element_text(size = leg_ts) , legend.position = leg_pos , legend.direction = leg_dir , plot.background = element_rect(fill = theme_bgc, colour=NA) , plot.margin = margin(t=0) , panel.grid=element_blank() , panel.background = element_rect(fill = theme_bgc, colour=NA) ) + scale_x_discrete(x_lab, labels=unique_colour_map$position) + #scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) + scale_color_manual(values=unique_colour_map$ligD_colours) + scale_fill_manual(values=unique_colour_map$ligD_colours) + labs(y = NULL) , NULL , position_annotation(plot_df, bg=theme_bgc) , ncol=1 , align = "v" , axis='lr' , rel_heights = c(7/10, 2/7,1/7, -0.1, 0.5/7)) #------------------ # Wild logo plot #------------------ } #LogoPlotSnps(small_df3)