getwd() #setwd("~/Documents/git/LSHTM_Y1_PNCA/combined_v3/logo_plot") # wor_mychisqk setwd("~/git/LSHTM_Y1_PNCA/combined_v3/logo_plot") # thinkpad #setwd("/Users/tanu/git/LSHTM_Y1_PNCA/combined_v3/logo_plot") # mac getwd() ######################################################### # 1: Installing and loading required packages ######################################################### source("../../Header_TT.R") #source("barplot_colour_function.R") #install.packages("ggseqlogo") library(ggseqlogo) ######################################################################## # end of loading libraries and functions # ######################################################################## setwd("/home/tanu/git/LSHTM_analysis/plotting_test") source("../scripts/plotting/combining_dfs_plotting.R") # since we will be using df without NA, its best to delete the ones with NA rm(merged_df2, merged_df3) ########################### # 3: Data for_mychisq DUET plots # you need merged_df3_comp # since these have unique SNPs ########################### #<<<<<<<<<<<<<<<<<<<<<<<<< # REASSIGNMENT my_df = merged_df3_comp my_df = merged_df3 #try! #<<<<<<<<<<<<<<<<<<<<<<<<< colnames(my_df) str(my_df) rownames(my_df) = my_df$Mutationinfor_mychisqmation c1 = unique(my_df$position) #96 nrow(my_df) #189 #get freq count of positions so you can subset freq<1 require(data.table) setDT(my_df)[, occurrence := .N, by = .(position)] #189, 36 table(my_df$position); table(my_df$occurrence) #extract freq_pos>1 my_data_snp = my_df[my_df$occurrence!=1,] #144, 36 u = unique(my_data_snp$position) #51 ######################################################################## # end of data extraction and cleaning for_mychisq plots # ######################################################################## ######################################################### #Task: To generate a logo plot or_mychisq bar plot but coloured #aa properties. #step1: read mcsm file and or_mychisq file #step2: plot wild type positions #step3: plot mutants per position coloured by aa properties #step4: make the size of the letters/bars prop to or_mychisq if you can! ######################################################### ##useful links #https://stackoverflow.com/questions/5438474/plotting-a-sequence-logo-using-ggplot2 #https://omarwagih.github.io/ggseqlogo/ #https://kkdey.github.io/Logolas-pages/wor_mychisqkflow.html #A new sequence logo plot to highlight enrichment and depletion. # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6288878/ ##very good: http://www.cbs.dtu.dk/biotools/Seq2Logo-2.0/ ############# #PLOTS: Bar plot with aa properties #using gglogo #useful links: https://stackoverflow.com/questions/5438474/plotting-a-sequence-logo-using-ggplot2 ############# #following example require(ggplot2) require(reshape2) library(gglogo) library(ggrepel) #lmf <- melt(logodf, id.var='pos') foo = my_data_snp[, c("position", "mutant_type","duet_scaled", "or_mychisq", "mut_prop_polarity", "mut_prop_water") ] #144, 6 head(foo) foo = foo[or_mychisqder(foo$position),] head(foo) #============== # matrix for_mychisq mutant type # frequency of mutant type by position #============== table(my_data_snp$mutant_type, my_data_snp$position) tab = table(my_data_snp$mutant_type, my_data_snp$position) class(tab) # unclass to convert to matrix tab = unclass(tab) tab = as.matrix(tab, rownames = T) #should be TRUE is.matrix(tab) rownames(tab) #aa colnames(tab) #pos #************** # Plot 1: mutant logo #************** # generate seq logo p0 = ggseqlogo(tab , method = 'custom' , seq_type = 'aa' #, col_scheme = "taylor_mychisq" #, col_scheme = "chemistry2" ) + #ylab('my custom height') + theme(axis.text.x = element_blank()) + theme_logo()+ # scale_x_continuous(breaks=1:51, parse (text = colnames(tab)) ) scale_x_continuous(breaks = 1:ncol(tab) , labels = colnames(tab))+ scale_y_continuous( breaks = 1:5 , limits = c(0, 6)) p0 # further customisation p1 = p0 + theme(legend.position = "bottom" , legend.title = element_blank() , legend.text = element_text(size = 20) , axis.text.x = element_text(size = 20, angle = 90) , axis.text.y = element_text(size = 20, angle = 90)) p1 #============== # matrix for_mychisq wild type # frequency of wild type by position #============== tab_wt = table(my_data_snp$wild_type, my_data_snp$position); tab_wt #17, 51 tab_wt = unclass(tab_wt) #remove wt duplicates wt = my_data_snp[, c("position", "wild_type")] #144, 2 wt = wt[!duplicated(wt),]#51, 2 tab_wt = table(wt$wild_type, wt$position); tab_wt # should all be 1 rownames(tab_wt) rownames(tab) #************** # Plot 2: for_mychisq wild_type # with custom x axis to reflect my aa positions #************** # sanity check: MUST BE TRUE # for_mychisq the cor_mychisqrectnes of the x axis identical(colnames(tab), colnames(tab_wt)) identical(ncol(tab), ncol(tab_wt)) p2 = ggseqlogo(tab_wt , method = 'custom' , seq_type = 'aa' #, col_scheme = "taylor" #, col_scheme = chemistry2 ) + #ylab('my custom height') + theme(axis.text.x = element_blank() , axis.text.y = element_blank()) + theme_logo() + scale_x_continuous(breaks = 1:ncol(tab_wt) , labels = colnames(tab_wt)) + scale_y_continuous( limits = c(0, 5)) p2 # further customise p3 = p2 + theme(legend.position = "none" , axis.text.x = element_text(size = 20 , angle = 90) , axis.text.y = element_blank()) p3 # Now combine using cowplot, which ensures the plots are aligned suppressMessages( require(cowplot) ) plot_grid(p1, p3, ncol = 1, align = 'v') #+ # background_grid(minor_mychisq = "xy" # , size.minor_mychisq = 1 # , colour.minor_mychisq = "grey86") #colour scheme #https://rdrr.io/cran/ggseqlogo/src/R/col_schemes.r