diff --git a/mcsm_analysis/pyrazinamide/scripts/.Rhistory b/mcsm_analysis/pyrazinamide/scripts/.Rhistory index 8055405..0112d24 100644 --- a/mcsm_analysis/pyrazinamide/scripts/.Rhistory +++ b/mcsm_analysis/pyrazinamide/scripts/.Rhistory @@ -1,512 +1,7 @@ -########################### -# you need merged_df3 -# or -# merged_df3_comp -# since these have unique SNPs -# I prefer to use the merged_df3 -# because using the _comp dataset means -# we lose some muts and at this level, we should use -# as much info as available -########################### -# uncomment as necessary -#%%%%%%%%%%%%%%%%%%%%%%%% -# REASSIGNMENT -my_df = merged_df3 -#my_df = merged_df3_comp -#%%%%%%%%%%%%%%%%%%%%%%%% -# delete variables not required -rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) -# quick checks -colnames(my_df) -str(my_df) -########################### -# Data for bfactor figure -# PS average -# Lig average -########################### -head(my_df$Position) -head(my_df$ratioDUET) -# order data frame -df = my_df[order(my_df$Position),] -head(df$Position) -head(df$ratioDUET) -#*********** -# PS: average by position -#*********** -mean_DUET_by_position <- df %>% -group_by(Position) %>% -summarize(averaged.DUET = mean(ratioDUET)) -#*********** -# Lig: average by position -#*********** -mean_Lig_by_position <- df %>% -group_by(Position) %>% -summarize(averaged.Lig = mean(ratioPredAff)) -#*********** -# cbind:mean_DUET_by_position and mean_Lig_by_position -#*********** -combined = as.data.frame(cbind(mean_DUET_by_position, mean_Lig_by_position )) -# sanity check -# mean_PS_Lig_Bfactor -colnames(combined) -colnames(combined) = c("Position" -, "average_DUETR" -, "Position2" -, "average_PredAffR") -colnames(combined) -identical(combined$Position, combined$Position2) -n = which(colnames(combined) == "Position2"); n -combined_df = combined[,-n] -max(combined_df$average_DUETR) ; min(combined_df$average_DUETR) -max(combined_df$average_PredAffR) ; min(combined_df$average_PredAffR) -#============= -# output csv -#============ -outDir = "~/Data/pyrazinamide/input/processed/" -outFile = paste0(outDir, "mean_PS_Lig_Bfactor.csv") -print(paste0("Output file with path will be:","", outFile)) -head(combined_df$Position); tail(combined_df$Position) -write.csv(combined_df, outFile -, row.names = F) getwd() setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") getwd() -######################################################################## -# Installing and loading required packages # -######################################################################## source("../Header_TT.R") -#source("barplot_colour_function.R") -require(data.table) -require(dplyr) -######################################################################## -# Read file: call script for combining df for PS # -######################################################################## source("../combining_two_df.R") -########################### -# This will return: -# df with NA: -# merged_df2 -# merged_df3 -# df without NA: -# merged_df2_comp -# merged_df3_comp -########################### -#---------------------- PAY ATTENTION -# the above changes the working dir -#[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts" -#---------------------- PAY ATTENTION -########################### -# you need merged_df3 -# or -# merged_df3_comp -# since these have unique SNPs -# I prefer to use the merged_df3 -# because using the _comp dataset means -# we lose some muts and at this level, we should use -# as much info as available -########################### -# uncomment as necessary -#%%%%%%%%%%%%%%%%%%%%%%%% -# REASSIGNMENT -my_df = merged_df3 -#my_df = merged_df3_comp -#%%%%%%%%%%%%%%%%%%%%%%%% -# delete variables not required -rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) -# quick checks -colnames(my_df) -str(my_df) -########################### -# Data for bfactor figure -# PS average -# Lig average -########################### -head(my_df$Position) -head(my_df$ratioDUET) -# order data frame -df = my_df[order(my_df$Position),] -head(df$Position) -head(df$ratioDUET) -#*********** -# PS: average by position -#*********** -mean_DUET_by_position <- df %>% -group_by(Position) %>% -summarize(averaged.DUET = mean(ratioDUET)) -#*********** -# Lig: average by position -#*********** -mean_Lig_by_position <- df %>% -group_by(Position) %>% -summarize(averaged.Lig = mean(ratioPredAff)) -#*********** -# cbind:mean_DUET_by_position and mean_Lig_by_position -#*********** -combined = as.data.frame(cbind(mean_DUET_by_position, mean_Lig_by_position )) -# sanity check -# mean_PS_Lig_Bfactor -colnames(combined) -colnames(combined) = c("Position" -, "average_DUETR" -, "Position2" -, "average_PredAffR") -colnames(combined) -identical(combined$Position, combined$Position2) -n = which(colnames(combined) == "Position2"); n -combined_df = combined[,-n] -max(combined_df$average_DUETR) ; min(combined_df$average_DUETR) -max(combined_df$average_PredAffR) ; min(combined_df$average_PredAffR) -#============= -# output csv -#============ -outDir = "~/git/Data/pyrazinamide/input/processed/" -outFile = paste0(outDir, "mean_PS_Lig_Bfactor.csv") -print(paste0("Output file with path will be:","", outFile)) -head(combined_df$Position); tail(combined_df$Position) -write.csv(combined_df, outFile -, row.names = F) -# read in pdb file complex1 -inDir = "~/git/Data/pyrazinamide/input/structure" -inFile = paste0(inDir, "complex1_no_water.pdb") -# read in pdb file complex1 -inDir = "~/git/Data/pyrazinamide/input/structure/" -inFile = paste0(inDir, "complex1_no_water.pdb") -complex1 = inFile -my_pdb = read.pdb(complex1 -, maxlines = -1 -, multi = FALSE -, rm.insert = FALSE -, rm.alt = TRUE -, ATOM.only = FALSE -, hex = FALSE -, verbose = TRUE) -######################### -#3: Read complex pdb file -########################## -source("Header_TT.R") -# list of 8 -my_pdb = read.pdb(complex1 -, maxlines = -1 -, multi = FALSE -, rm.insert = FALSE -, rm.alt = TRUE -, ATOM.only = FALSE -, hex = FALSE -, verbose = TRUE) -rm(inDir, inFile) -#====== end of script -inDir = "~/git/Data/pyrazinamide/input/structure/" -inFile = paste0(inDir, "complex1_no_water.pdb") -complex1 = inFile -complex1 = inFile -my_pdb = read.pdb(complex1 -, maxlines = -1 -, multi = FALSE -, rm.insert = FALSE -, rm.alt = TRUE -, ATOM.only = FALSE -, hex = FALSE -, verbose = TRUE) -inFile -inDir = "~/git/Data/pyrazinamide/input/structure/" -inFile = paste0(inDir, "complex1_no_water.pdb") -complex1 = inFile -#inFile2 = paste0(inDir, "complex2_no_water.pdb") -#complex2 = inFile2 -# list of 8 -my_pdb = read.pdb(complex1 -, maxlines = -1 -, multi = FALSE -, rm.insert = FALSE -, rm.alt = TRUE -, ATOM.only = FALSE -, hex = FALSE -, verbose = TRUE) -rm(inDir, inFile, complex1) -getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts") -getwd() -source("Header_TT.R") -getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts") -getwd() -######################################################################## -# Installing and loading required packages # -######################################################################## -source("Header_TT.R") -######################################################### -# TASK: replace B-factors in the pdb file with normalised values -# use the complex file with no water as mCSM lig was -# performed on this file. You can check it in the script: read_pdb file. -######################################################### -########################### -# 2: Read file: average stability values -# or mcsm_normalised file, output of step 4 mcsm pipeline -########################### -inDir = "~/git/Data/pyrazinamide/input/processed/" -inFile = paste0(inDir, "mean_PS_Lig_Bfactor.csv"); inFile -my_df <- read.csv(inFile -# , row.names = 1 -# , stringsAsFactors = F -, header = T) -str(my_df) -source("read_pdb.R") # list of 8 -# extract atom list into a variable -# since in the list this corresponds to data frame, variable will be a df -d = my_pdb[[1]] -# make a copy: required for downstream sanity checks -d2 = d -# sanity checks: B factor -max(d$b); min(d$b) -par(oma = c(3,2,3,0) -, mar = c(1,3,5,2) -, mfrow = c(3,2)) -#par(mfrow = c(3,2)) -#1: Original B-factor -hist(d$b -, xlab = "" -, main = "B-factor") -plot(density(d$b) -, xlab = "" -, main = "B-factor") -# 2: DUET scores -hist(my_df$average_DUETR -, xlab = "" -, main = "Norm_DUET") -plot(density(my_df$average_DUETR) -, xlab = "" -, main = "Norm_DUET") -# Set the margin on all sides -par(oma = c(3,2,3,0) -, mar = c(1,3,5,2) -, mfrow = c(3,2)) -#par(mfrow = c(3,2)) -#1: Original B-factor -hist(d$b -, xlab = "" -, main = "B-factor") -plot(density(d$b) -, xlab = "" -, main = "B-factor") -# 2: DUET scores -hist(my_df$average_DUETR -, xlab = "" -, main = "Norm_DUET") -plot(density(my_df$average_DUETR) -, xlab = "" -, main = "Norm_DUET") -#========= -# step 1_P1 -#========= -# Be brave and replace in place now (don't run sanity check) -# this makes all the B-factor values in the non-matched positions as NA -d$b = my_df$average_DUETR[match(d$resno, my_df$Position)] -#========= -# step 2_P1 -#========= -# count NA in Bfactor -b_na = sum(is.na(d$b)) ; b_na -# count number of 0's in Bactor -sum(d$b == 0) -# replace all NA in b factor with 0 -d$b[is.na(d$b)] = 0 -# sanity check: should be 0 -sum(is.na(d$b)) -# sanity check: should be True -if (sum(d$b == 0) == b_na){ -print ("Sanity check passed: NA's replaced with 0's successfully") -} else { -print("Error: NA replacement NOT successful, Debug code!") -} -max(d$b); min(d$b) -# sanity checks: should be True -if(max(d$b) == max(my_df$average_DUETR)){ -print("Sanity check passed: B-factors replaced correctly") -} else { -print ("Error: Debug code please") -} -if (min(d$b) == min(my_df$average_DUETR)){ -print("Sanity check passed: B-factors replaced correctly") -} else { -print ("Error: Debug code please") -} -#========= -# step 3_P1 -#========= -# sanity check: dim should be same before reassignment -# should be TRUE -dim(d) == dim(d2) -#========= -# step 4_P1 -#========= -# assign it back to the pdb file -my_pdb[[1]] = d -max(d$b); min(d$b) -#========= -# step 5_P1 -#========= -# output dir -getwd() -outDir = "~/git/Data/pyrazinamide/output/" -getwd() -outFile = paste0(outDir, "complex1_BwithNormDUET.pdb") -outFile = paste0(outDir, "complex1_BwithNormDUET.pdb"); outFile -outDir = "~/git/Data/pyrazinamide/input/structure" -outDir = "~/git/Data/pyrazinamide/input/structure/" -outFile = paste0(outDir, "complex1_BwithNormDUET.pdb"); outFile -write.pdb(my_pdb, outFile) -hist(d$b -, xlab = "" -, main = "repalced-B") -plot(density(d$b) -, xlab = "" -, main = "replaced-B") -# graph titles -mtext(text = "Frequency" -, side = 2 -, line = 0 -, outer = TRUE) -mtext(text = "DUET_stability" -, side = 3 -, line = 0 -, outer = TRUE) -#========================================================= -# Processing P2: Replacing B values with PredAff Scores -#========================================================= -# clear workspace -rm(list = ls()) -#========================================================= -# Processing P2: Replacing B values with PredAff Scores -#========================================================= -# clear workspace -rm(list = ls()) -########################### -# 2: Read file: average stability values -# or mcsm_normalised file, output of step 4 mcsm pipeline -########################### -inDir = "~/git/Data/pyrazinamide/input/processed/" -inFile = paste0(inDir, "mean_PS_Lig_Bfactor.csv"); inFile -my_df <- read.csv("../Data/mean_PS_Lig_Bfactor.csv" -# , row.names = 1 -# , stringsAsFactors = F -, header = T) -str(my_df) -#========================================================= -# Processing P2: Replacing B factor with mean ratioLig scores -#========================================================= -######################### -# 3: Read complex pdb file -# form the R script -########################## -source("read_pdb.R") # list of 8 -# extract atom list into a vari -inDir = "~/git/Data/pyrazinamide/input/processed/" -inFile = paste0(inDir, "mean_PS_Lig_Bfactor.csv"); inFile -my_df <- read.csv(inFile -# , row.names = 1 -# , stringsAsFactors = F -, header = T) -str(my_df) -# extract atom list into a variable -# since in the list this corresponds to data frame, variable will be a df -d = my_pdb[[1]] -# make a copy: required for downstream sanity checks -d2 = d -# sanity checks: B factor -max(d$b); min(d$b) -par(oma = c(3,2,3,0) -, mar = c(1,3,5,2) -, mfrow = c(3,2)) -#par(mfrow = c(3,2)) -# 1: Original B-factor -hist(d$b -, xlab = "" -, main = "B-factor") -plot(density(d$b) -, xlab = "" -, main = "B-factor") -# 2: Pred Aff scores -hist(my_df$average_PredAffR -, xlab = "" -, main = "Norm_lig_average") -plot(density(my_df$average_PredAffR) -, xlab = "" -, main = "Norm_lig_average") -# 3: After the following replacement -#******************************** -par(oma = c(3,2,3,0) -, mar = c(1,3,5,2) -, mfrow = c(3,2)) -#par(mfrow = c(3,2)) -# 1: Original B-factor -hist(d$b -, xlab = "" -, main = "B-factor") -plot(density(d$b) -, xlab = "" -, main = "B-factor") -# 2: Pred Aff scores -hist(my_df$average_PredAffR -, xlab = "" -, main = "Norm_lig_average") -plot(density(my_df$average_PredAffR) -, xlab = "" -, main = "Norm_lig_average") -# 3: After the following replacement -#******************************** -#========= -# step 1_P2: BE BRAVE and replace in place now (don't run step 0) -#========= -# this makes all the B-factor values in the non-matched positions as NA -d$b = my_df$average_PredAffR[match(d$resno, my_df$Position)] -#========= -# step 2_P2 -#========= -# count NA in Bfactor -b_na = sum(is.na(d$b)) ; b_na -# count number of 0's in Bactor -sum(d$b == 0) -# replace all NA in b factor with 0 -d$b[is.na(d$b)] = 0 -# sanity check: should be 0 -sum(is.na(d$b)) -if (sum(d$b == 0) == b_na){ -print ("Sanity check passed: NA's replaced with 0's successfully") -} else { -print("Error: NA replacement NOT successful, Debug code!") -} -max(d$b); min(d$b) -# sanity checks: should be True -if (max(d$b) == max(my_df$average_PredAffR)){ -print("Sanity check passed: B-factors replaced correctly") -} else { -print ("Error: Debug code please") -} -if (min(d$b) == min(my_df$average_PredAffR)){ -print("Sanity check passed: B-factors replaced correctly") -} else { -print ("Error: Debug code please") -} -#========= -# step 3_P2 -#========= -# sanity check: dim should be same before reassignment -# should be TRUE -dim(d) == dim(d2) -#========= -# step 4_P2 -#========= -# assign it back to the pdb file -my_pdb[[1]] = d -max(d$b); min(d$b) -#========= -# step 5_P2 -#========= -write.pdb(my_pdb, "Plotting/structure/complex1_BwithNormLIG.pdb") -# output dir -getwd() -# output dir -outDir = "~/git/Data/pyrazinamide/input/structure/" -outFile = paste0(outDir, "complex1_BwithNormLIG.pdb") -outFile = paste0(outDir, "complex1_BwithNormLIG.pdb"); outFile -write.pdb(my_pdb, outFile) +source("../combining_two_df.R") +source("../combining_two_df.R") diff --git a/mcsm_analysis/pyrazinamide/scripts/Header_TT.R b/mcsm_analysis/pyrazinamide/scripts/Header_TT.R index 2069fa7..9eae42a 100644 --- a/mcsm_analysis/pyrazinamide/scripts/Header_TT.R +++ b/mcsm_analysis/pyrazinamide/scripts/Header_TT.R @@ -1,25 +1,31 @@ ######################################################### ### A) Installing and loading required packages ######################################################### +#lib_loc = "/usr/local/lib/R/site-library") #if (!require("gplots")) { # install.packages("gplots", dependencies = TRUE) # library(gplots) #} -if (!require("tidyverse")) { - install.packages("tidyverse", dependencies = TRUE) - library(tidyverse) -} +#if (!require("tidyverse")) { +# install.packages("tidyverse", dependencies = TRUE) +# library(tidyverse) +#} if (!require("ggplot2")) { install.packages("ggplot2", dependencies = TRUE) library(ggplot2) } +if (!require("plotly")) { + install.packages("plotly", dependencies = TRUE) + library(plotly) +} + if (!require("cowplot")) { install.packages("copwplot", dependencies = TRUE) - library(ggplot2) + library(cowplot) } if (!require("ggcorrplot")) { @@ -43,37 +49,33 @@ if (!require ("GOplot")) { } if(!require("VennDiagram")) { - install.packages("VennDiagram", dependencies = T) library(VennDiagram) } if(!require("scales")) { - install.packages("scales", dependencies = T) library(scales) } if(!require("plotrix")) { - install.packages("plotrix", dependencies = T) library(plotrix) } if(!require("stats")) { - install.packages("stats", dependencies = T) library(stats) } if(!require("stats4")) { - install.packages("stats4", dependencies = T) library(stats4) } if(!require("data.table")) { - library(stats4) +install.packages("data.table") + library(data.table) } if (!require("PerformanceAnalytics")){ @@ -98,18 +100,17 @@ if (!require ("psych")){ if (!require ("dplyr")){ install.packages("dplyr") - library(psych) + library(dplyr) } - if (!require ("compare")){ install.packages("compare") - library(psych) + library(compare) } if (!require ("arsenal")){ install.packages("arsenal") - library(psych) + library(arsenal) } diff --git a/mcsm_analysis/pyrazinamide/scripts/combining_two_df.R b/mcsm_analysis/pyrazinamide/scripts/combining_two_df.R index 57e476a..d0e13cd 100644 --- a/mcsm_analysis/pyrazinamide/scripts/combining_two_df.R +++ b/mcsm_analysis/pyrazinamide/scripts/combining_two_df.R @@ -11,7 +11,7 @@ getwd() # Installing and loading required packages # ######################################################################## -source("Header_TT.R") +#source("Header_TT.R") #require(data.table) #require(arsenal) #require(compare) @@ -286,7 +286,7 @@ outDir = "~/git/Data/pyrazinamide/output/" getwd() outFile1 = paste0(outDir, "merged_df3.csv"); outFile1 -write.csv(merged_df3, outFile1) +#write.csv(merged_df3, outFile1) #outFile2 = paste0(outDir, "merged_df3_comp.csv"); outFile2 #write.csv(merged_df3_comp, outFile2) diff --git a/mcsm_analysis/pyrazinamide/scripts/plotting/lineage_dist_PS.R b/mcsm_analysis/pyrazinamide/scripts/plotting/lineage_dist_PS.R index cc7e15a..4c54681 100644 --- a/mcsm_analysis/pyrazinamide/scripts/plotting/lineage_dist_PS.R +++ b/mcsm_analysis/pyrazinamide/scripts/plotting/lineage_dist_PS.R @@ -1,5 +1,5 @@ getwd() -setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") # thinkpad +setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") getwd() ######################################################################## @@ -24,11 +24,11 @@ source("../combining_two_df.R") #========================== # This will return: -# df with NA: +# df with NA for pyrazinamide: # merged_df2 # merged_df3 -# df without NA: +# df without NA for pyrazinamide: # merged_df2_comp # merged_df3_comp #=========================== @@ -38,14 +38,17 @@ source("../combining_two_df.R") # you need merged_df2 or merged_df2_comp # since this is one-many relationship # i.e the same SNP can belong to multiple lineages +# using the _comp dataset means +# we lose some muts and at this level, we should use +# as much info as available, hence use df with NA ########################### # uncomment as necessary -#<<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!! # REASSIGNMENT my_df = merged_df2 #my_df = merged_df2_comp -#<<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!! # delete variables not required rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) @@ -59,12 +62,39 @@ is.factor(my_df$lineage) my_df$lineage = as.factor(my_df$lineage) is.factor(my_df$lineage) -table(my_df$mutation_info) +table(my_df$mutation_info); str(my_df$mutation_info) + +# subset df with dr muts only +my_df_dr = subset(my_df, mutation_info == "dr_mutations_pyrazinamide") ######################################################################## # end of data extraction and cleaning for plots # ######################################################################## +#========================== +# Data for plot: assign as +# necessary +#=========================== + +# uncomment as necessary +#!!!!!!!!!!!!!!!!!!!!!!! +# REASSIGNMENT + +#================== +# data for ALL muts +#================== +plot_df = my_df +my_plot_name = 'lineage_dist_PS.svg' +#my_plot_name = 'lineage_dist_PS_comp.svg' + +#======================= +# data for dr_muts ONLY +#======================= +#plot_df = my_df_dr +#my_plot_name = 'lineage_dist_dr_PS.svg' +#my_plot_name = 'lineage_dist_dr_PS_comp.svg' +#!!!!!!!!!!!!!!!!!!!!!!! + #========================== # Plot: Lineage Distribution # x = mcsm_values, y = dist @@ -74,6 +104,7 @@ table(my_df$mutation_info) #=================== # Data for plots #=================== +table(plot_df$lineage); str(plot_df$lineage) # subset only lineages1-4 sel_lineages = c("lineage1" @@ -82,34 +113,29 @@ sel_lineages = c("lineage1" , "lineage4") # uncomment as necessary -df_lin = subset(my_df, subset = lineage %in% sel_lineages ) +df_lin = subset(plot_df, subset = lineage %in% sel_lineages ) # refactor df_lin$lineage = factor(df_lin$lineage) table(df_lin$lineage) #{RESULT: No of samples within lineage} #lineage1 lineage2 lineage3 lineage4 -#104 1293 264 1311 - -# when merged_df2_comp is used -#lineage1 lineage2 lineage3 lineage4 -#99 1275 263 1255 length(unique(df_lin$Mutationinformation)) #{Result: No. of unique mutations the 4 lineages contribute to} # sanity checks r1 = 2:5 # when merged_df2 used: because there is missing lineages -if(sum(table(my_df$lineage)[r1]) == nrow(df_lin)) { +if(sum(table(plot_df$lineage)[r1]) == nrow(df_lin)) { print ("sanity check passed: numbers match") } else{ print("Error!: check your numbers") } -#<<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!!!! # REASSIGNMENT df <- df_lin -#<<<<<<<<<<<<<<<<<<<<<<<<< +#!!!!!!!!!!!!!!!!!!!!!!!!! rm(df_lin) @@ -117,8 +143,8 @@ rm(df_lin) # generate distribution plot of lineages #****************** # basic: could improve this! -library(plotly) -library(ggridges) +#library(plotly) +#library(ggridges) g <- ggplot(df, aes(x = ratioDUET)) + geom_density(aes(fill = DUET_outcome) @@ -129,64 +155,68 @@ g <- ggplot(df, aes(x = ratioDUET)) + ggplotly(g) # 2 : ggridges (good!) - my_ats = 15 # axis text size my_als = 20 # axis label size -fooNames=c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4') -names(fooNames)=c('lineage1', 'lineage2', 'lineage3', 'lineage4') +my_labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4') +names(my_labels) = c('lineage1', 'lineage2', 'lineage3', 'lineage4') # set output dir for plots getwd() setwd("~/git/Data/pyrazinamide/output/plots") getwd() -svg('lineage_dist_PS.svg') +# check plot name +my_plot_name -printFile = ggplot( df, aes(x = ratioDUET - , y = DUET_outcome) )+ +# output svg +svg(my_plot_name) +printFile = ggplot(df, aes(x = ratioDUET + , y = DUET_outcome))+ #printFile=geom_density_ridges_gradient( - geom_density_ridges_gradient( aes(fill = ..x..) + geom_density_ridges_gradient(aes(fill = ..x..) , scale = 3 , size = 0.3 ) + facet_wrap( ~lineage , scales = "free" # , switch = 'x' - , labeller = labeller(lineage = fooNames) ) + + , labeller = labeller(lineage = my_labels) ) + coord_cartesian( xlim = c(-1, 1) -# , ylim = c(0, 6) -# , clip = "off" - ) + - scale_fill_gradientn( colours = c("#f8766d", "white", "#00bfc4") +# , ylim = c(0, 6) +# , clip = "off" +) + + scale_fill_gradientn(colours = c("#f8766d", "white", "#00bfc4") , name = "DUET" ) + - theme( axis.text.x = element_text( size = my_ats + theme(axis.text.x = element_text(size = my_ats , angle = 90 , hjust = 1 , vjust = 0.4) -# , axis.text.y = element_text( size = my_ats -# , angle = 0 -# , hjust = 1 -# , vjust = 0) +# , axis.text.y = element_text(size = my_ats +# , angle = 0 +# , hjust = 1 +# , vjust = 0) , axis.text.y = element_blank() , axis.title.x = element_blank() , axis.title.y = element_blank() , axis.ticks.y = element_blank() , plot.title = element_blank() - , strip.text = element_text(size=my_als) - , legend.text = element_text(size=10) - , legend.title = element_text(size=my_als) -# , legend.position = c(0.3, 0.8) -# , legend.key.height = unit(1, 'mm') - ) + , strip.text = element_text(size = my_als) + , legend.text = element_text(size = 10) + , legend.title = element_text(size = my_als) +# , legend.position = c(0.3, 0.8) +# , legend.key.height = unit(1, 'mm') + ) print(printFile) dev.off() -#=!=!=!=!=!=! -# COMMENT: When you look at all mutations, the lineage differences disappear... +#=!=!=!=!=!=!=! +# COMMENT: Not much differences in the distributions +# when using merged_df2 or merged_df2_comp. +# Also, the lineage differences disappear when looking at all muts # The pattern we are interested in is possibly only for dr_mutations -#=!=!=!=!=!=! +#=!=!=!=!=!=!=! #=================================================== # COMPARING DISTRIBUTIONS