getwd() setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") getwd() ######################################################################## # Installing and loading required packages # ######################################################################## source("../Header_TT.R") #source("barplot_colour_function.R") require(data.table) ######################################################################## # Read file: call script for combining df # ######################################################################## source("../combining_two_df.R") #---------------------- PAY ATTENTION # the above changes the working dir #[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts" #---------------------- PAY ATTENTION #========================== # This will return: # df with NA: # merged_df2 # merged_df3 # df without NA: # merged_df2_comp # merged_df3_comp #========================== ########################### # Data for plots # you need merged_df2, comprehensive one # since this has one-many relationship # i.e the same SNP can belong to multiple lineages ########################### # uncomment as necessary #<<<<<<<<<<<<<<<<<<<<<<<<< # REASSIGNMENT my_df = merged_df2 #my_df = merged_df2_comp #<<<<<<<<<<<<<<<<<<<<<<<<< # delete variables not required rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) # quick checks colnames(my_df) str(my_df) # Ensure correct data type in columns to plot: need to be factor is.factor(my_df$lineage) my_df$lineage = as.factor(my_df$lineage) is.factor(my_df$lineage) #========================== # Plot: Lineage barplot # x = lineage y = No. of samples # col = Lineage # fill = lineage #============================ table(my_df$lineage) # lineage1 lineage2 lineage3 lineage4 lineage5 lineage6 lineageBOV #3 104 1293 264 1311 6 6 105 #=========================== # Plot: Lineage Barplots #=========================== #=================== # Data for plots #=================== #<<<<<<<<<<<<<<<<<<<<<<<<< # REASSIGNMENT df <- my_df #<<<<<<<<<<<<<<<<<<<<<<<<< rm(my_df) # get freq count of positions so you can subset freq<1 #setDT(df)[, lineage_count := .N, by = .(lineage)] #****************** # generate plot: barplot of mutation by lineage #****************** sel_lineages = c("lineage1" , "lineage2" , "lineage3" , "lineage4") df_lin = subset(df, subset = lineage %in% sel_lineages ) #FIXME; add sanity check for numbers. # Done this manually ############################################################ ######### # Data for barplot: Lineage barplot # to show total samples and number of unique mutations # within each linege ########## # Create df with lineage inform & no. of unique mutations # per lineage and total samples within lineage # this is essentially barplot with two y axis bar = bar = as.data.frame(sel_lineages) #4, 1 total_snps_u = NULL total_samples = NULL for (i in sel_lineages){ #print(i) curr_total = length(unique(df$id)[df$lineage==i]) total_samples = c(total_samples, curr_total) print(total_samples) foo = df[df$lineage==i,] print(paste0(i, "=======")) print(length(unique(foo$Mutationinformation))) curr_count = length(unique(foo$Mutationinformation)) total_snps_u = c(total_snps_u, curr_count) } print(total_snps_u) bar$num_snps_u = total_snps_u bar$total_samples = total_samples bar #***************** # generate plot: lineage barplot with two y-axis #https://stackoverflow.com/questions/13035295/overlay-bar-graphs-in-ggplot2 #***************** bar$num_snps_u = y1 bar$total_samples = y2 sel_lineages = x to_plot = data.frame(x = x , y1 = y1 , y2 = y2) to_plot melted = melt(to_plot, id = "x") melted # set output dir for plots getwd() setwd("~/git/Data/pyrazinamide/output/plots") getwd() svg('lineage_basic_barplot.svg') my_ats = 20 # axis text size my_als = 22 # axis label size g = ggplot(melted , aes(x = x , y = value , fill = variable) ) printFile = g + geom_bar( #g + geom_bar( stat = "identity" , position = position_stack(reverse = TRUE) , alpha=.75 , colour='grey75' ) + theme( axis.text.x = element_text( size = my_ats # , angle= 30 ) , axis.text.y = element_text(size = my_ats #, angle = 30 , hjust = 1 , vjust = 0) , axis.title.x = element_text( size = my_als , colour = 'black' ) , axis.title.y = element_text( size = my_als , colour = 'black' ) , legend.position = "top" , legend.text = element_text(size = my_als) #) + geom_text( ) + geom_label( aes(label = value) , size = 5 , hjust = 0.5 , vjust = 0.5 , colour = 'black' , show.legend = FALSE #, check_overlap = TRUE , position = position_stack(reverse = T) #, position = (' ) + labs( title = '' , x = '' , y = "Number" , fill = 'Variable' , colour = 'black' ) + scale_fill_manual( values = c('grey50', 'gray75') , name='' , labels=c('Mutations', 'Total Samples') ) + scale_x_discrete( breaks = c('lineage1', 'lineage2', 'lineage3', 'lineage4') , labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4') ) print(printFile) dev.off()