import commit
This commit is contained in:
commit
bccfe68192
39 changed files with 6837 additions and 0 deletions
215
mcsm_analysis/pyrazinamide/scripts/plotting/basic_barplots_LIG.R
Normal file
215
mcsm_analysis/pyrazinamide/scripts/plotting/basic_barplots_LIG.R
Normal file
|
@ -0,0 +1,215 @@
|
|||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
########################################################################
|
||||
# Installing and loading required packages #
|
||||
########################################################################
|
||||
|
||||
source("../Header_TT.R")
|
||||
|
||||
#require(data.table)
|
||||
#require(dplyr)
|
||||
|
||||
########################################################################
|
||||
# Read file: call script for combining df for lig #
|
||||
########################################################################
|
||||
|
||||
source("../combining_two_df_lig.R")
|
||||
|
||||
#---------------------- PAY ATTENTION
|
||||
# the above changes the working dir
|
||||
#[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts"
|
||||
#---------------------- PAY ATTENTION
|
||||
|
||||
#==========================
|
||||
# This will return:
|
||||
|
||||
# df with NA:
|
||||
# merged_df2
|
||||
# merged_df3
|
||||
|
||||
# df without NA:
|
||||
# merged_df2_comp
|
||||
# merged_df3_comp
|
||||
#===========================
|
||||
|
||||
###########################
|
||||
# Data for Lig plots
|
||||
# you need merged_df3
|
||||
# or
|
||||
# merged_df3_comp
|
||||
# since these have unique SNPs
|
||||
# I prefer to use the merged_df3
|
||||
# because using the _comp dataset means
|
||||
# we lose some muts and at this level, we should use
|
||||
# as much info as available
|
||||
###########################
|
||||
|
||||
# uncomment as necessary
|
||||
#<<<<<<<<<<<<<<<<<<<<<<<<<
|
||||
# REASSIGNMENT
|
||||
my_df = merged_df3
|
||||
#my_df = merged_df3_comp
|
||||
#<<<<<<<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
# delete variables not required
|
||||
rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp)
|
||||
|
||||
# quick checks
|
||||
colnames(my_df)
|
||||
str(my_df)
|
||||
|
||||
# Ensure correct data type in columns to plot: need to be factor
|
||||
# sanity check
|
||||
is.factor(my_df$Lig_outcome)
|
||||
my_df$Lig_outcome = as.factor(my_df$lig_outcome)
|
||||
is.factor(my_df$Lig_outcome)
|
||||
#[1] TRUE
|
||||
|
||||
#############################
|
||||
# Extra sanity check:
|
||||
# for mcsm_lig ONLY
|
||||
# Dis_lig_Ang should be <10
|
||||
#############################
|
||||
|
||||
if (max(my_df$Dis_lig_Ang) < 10){
|
||||
print ("Sanity check passed: lig data is <10Ang")
|
||||
}else{
|
||||
print ("Error: data should be filtered to be within 10Ang")
|
||||
}
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
########################################################################
|
||||
|
||||
#===========================
|
||||
# Plot: Basic barplots
|
||||
#===========================
|
||||
|
||||
#===================
|
||||
# Data for plots
|
||||
#===================
|
||||
|
||||
#<<<<<<<<<<<<<<<<<<<<<<<<<
|
||||
# REASSIGNMENT
|
||||
df = my_df
|
||||
#<<<<<<<<<<<<<<<<<<<<<<<<<
|
||||
rm(my_df)
|
||||
|
||||
# sanity checks
|
||||
str(df)
|
||||
|
||||
if (identical(df$Position, df$position)){
|
||||
print("Sanity check passed: Columns 'Position' and 'position' are identical")
|
||||
} else{
|
||||
print("Error!: Check column names and info contained")
|
||||
}
|
||||
|
||||
#****************
|
||||
# generate plot: No of stabilising and destabilsing muts
|
||||
#****************
|
||||
# set output dir for plots
|
||||
getwd()
|
||||
setwd("~/git/Data/pyrazinamide/output/plots")
|
||||
getwd()
|
||||
|
||||
svg('basic_barplots_LIG.svg')
|
||||
|
||||
my_ats = 25 # axis text size
|
||||
my_als = 22 # axis label size
|
||||
|
||||
# uncomment as necessary for either directly outputting results or
|
||||
# printing on the screen
|
||||
g = ggplot(df, aes(x = Lig_outcome))
|
||||
#prinfFile = g + geom_bar(
|
||||
g + geom_bar(
|
||||
aes(fill = Lig_outcome)
|
||||
, show.legend = TRUE
|
||||
) + geom_label(
|
||||
stat = "count"
|
||||
, aes(label = ..count..)
|
||||
, color = "black"
|
||||
, show.legend = FALSE
|
||||
, size = 10) + theme(
|
||||
axis.text.x = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
, axis.title.y = element_text(size=my_als)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
, legend.position = c(0.73,0.8)
|
||||
, legend.text = element_text(size=my_als-2)
|
||||
, legend.title = element_text(size=my_als)
|
||||
, plot.title = element_blank()
|
||||
) + labs(
|
||||
title = ""
|
||||
, y = "Number of SNPs"
|
||||
#, fill='Ligand Outcome'
|
||||
) + scale_fill_discrete(name = "Ligand Outcome"
|
||||
, labels = c("Destabilising", "Stabilising"))
|
||||
print(prinfFile)
|
||||
dev.off()
|
||||
|
||||
#****************
|
||||
# generate plot: No of positions
|
||||
#****************
|
||||
#get freq count of positions so you can subset freq<1
|
||||
#require(data.table)
|
||||
setDT(df)[, pos_count := .N, by = .(Position)] #169, 36
|
||||
|
||||
head(df$pos_count)
|
||||
table(df$pos_count)
|
||||
# this is cummulative
|
||||
#1 2 3 4 5 6
|
||||
#5 24 36 56 30 18
|
||||
|
||||
# use group by on this
|
||||
snpsBYpos_df <- df %>%
|
||||
group_by(Position) %>%
|
||||
summarize(snpsBYpos = mean(pos_count))
|
||||
|
||||
table(snpsBYpos_df$snpsBYpos)
|
||||
#1 2 3 4 5 6
|
||||
#5 12 12 14 6 3
|
||||
# this is what will get plotted
|
||||
|
||||
svg('position_count_LIG.svg')
|
||||
|
||||
my_ats = 25 # axis text size
|
||||
my_als = 22 # axis label size
|
||||
|
||||
g = ggplot(snpsBYpos_df, aes(x = snpsBYpos))
|
||||
prinfFile = g + geom_bar(
|
||||
#g + geom_bar(
|
||||
aes (alpha = 0.5)
|
||||
, show.legend = FALSE
|
||||
) +
|
||||
geom_label(
|
||||
stat = "count", aes(label = ..count..)
|
||||
, color = "black"
|
||||
, size = 10
|
||||
) +
|
||||
theme(
|
||||
axis.text.x = element_text(
|
||||
size = my_ats
|
||||
, angle = 0
|
||||
)
|
||||
, axis.text.y = element_text(
|
||||
size = my_ats
|
||||
, angle = 0
|
||||
, hjust = 1
|
||||
)
|
||||
, axis.title.x = element_text(size = my_als)
|
||||
, axis.title.y = element_text(size = my_als)
|
||||
, plot.title = element_blank()
|
||||
) +
|
||||
labs(
|
||||
x = "Number of SNPs"
|
||||
, y = "Number of Sites"
|
||||
)
|
||||
print(prinfFile)
|
||||
dev.off()
|
||||
########################################################################
|
||||
# end of Lig barplots #
|
||||
########################################################################
|
||||
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue