adding plots as I tidy and generate
This commit is contained in:
parent
acc6a42880
commit
b2b95b80fd
2 changed files with 346 additions and 0 deletions
130
scripts/plotting/Header_TT.R
Normal file
130
scripts/plotting/Header_TT.R
Normal file
|
@ -0,0 +1,130 @@
|
|||
#########################################################
|
||||
### A) Installing and loading required packages
|
||||
#########################################################
|
||||
#lib_loc = "/usr/local/lib/R/site-library")
|
||||
|
||||
#if (!require("gplots")) {
|
||||
# install.packages("gplots", dependencies = TRUE)
|
||||
# library(gplots)
|
||||
#}
|
||||
|
||||
#if (!require("tidyverse")) {
|
||||
# install.packages("tidyverse", dependencies = TRUE)
|
||||
# library(tidyverse)
|
||||
#}
|
||||
|
||||
if (!require("ggplot2")) {
|
||||
install.packages("ggplot2", dependencies = TRUE)
|
||||
library(ggplot2)
|
||||
}
|
||||
|
||||
if (!require("plotly")) {
|
||||
install.packages("plotly", dependencies = TRUE)
|
||||
library(plotly)
|
||||
}
|
||||
|
||||
if (!require("cowplot")) {
|
||||
install.packages("copwplot", dependencies = TRUE)
|
||||
library(cowplot)
|
||||
}
|
||||
|
||||
if (!require("ggcorrplot")) {
|
||||
install.packages("ggcorrplot", dependencies = TRUE)
|
||||
library(ggcorrplot)
|
||||
}
|
||||
|
||||
if (!require("ggpubr")) {
|
||||
install.packages("ggpubr", dependencies = TRUE)
|
||||
library(ggpubr)
|
||||
}
|
||||
|
||||
if (!require("RColorBrewer")) {
|
||||
install.packages("RColorBrewer", dependencies = TRUE)
|
||||
library(RColorBrewer)
|
||||
}
|
||||
|
||||
if (!require ("GOplot")) {
|
||||
install.packages("GOplot")
|
||||
library(GOplot)
|
||||
}
|
||||
|
||||
if(!require("VennDiagram")) {
|
||||
install.packages("VennDiagram", dependencies = T)
|
||||
library(VennDiagram)
|
||||
}
|
||||
|
||||
if(!require("scales")) {
|
||||
install.packages("scales", dependencies = T)
|
||||
library(scales)
|
||||
}
|
||||
|
||||
if(!require("plotrix")) {
|
||||
install.packages("plotrix", dependencies = T)
|
||||
library(plotrix)
|
||||
}
|
||||
|
||||
if(!require("stats")) {
|
||||
install.packages("stats", dependencies = T)
|
||||
library(stats)
|
||||
}
|
||||
|
||||
if(!require("stats4")) {
|
||||
install.packages("stats4", dependencies = T)
|
||||
library(stats4)
|
||||
}
|
||||
|
||||
if(!require("data.table")) {
|
||||
install.packages("data.table")
|
||||
library(data.table)
|
||||
}
|
||||
|
||||
if (!require("PerformanceAnalytics")){
|
||||
install.packages("PerformanceAnalytics", dependencies = T)
|
||||
library(PerformaceAnalytics)
|
||||
}
|
||||
|
||||
if (!require ("GGally")){
|
||||
install.packages("GGally")
|
||||
library(GGally)
|
||||
}
|
||||
|
||||
if (!require ("corrr")){
|
||||
install.packages("corrr")
|
||||
library(corrr)
|
||||
}
|
||||
|
||||
if (!require ("psych")){
|
||||
install.packages("psych")
|
||||
library(psych)
|
||||
}
|
||||
|
||||
if (!require ("dplyr")){
|
||||
install.packages("dplyr")
|
||||
library(dplyr)
|
||||
}
|
||||
|
||||
if (!require ("compare")){
|
||||
install.packages("compare")
|
||||
library(compare)
|
||||
}
|
||||
|
||||
if (!require ("arsenal")){
|
||||
install.packages("arsenal")
|
||||
library(arsenal)
|
||||
}
|
||||
|
||||
|
||||
####TIDYVERSE
|
||||
# Install
|
||||
#if(!require(devtools)) install.packages("devtools")
|
||||
#devtools::install_github("kassambara/ggcorrplot")
|
||||
|
||||
library(ggcorrplot)
|
||||
|
||||
|
||||
###for PDB files
|
||||
#install.packages("bio3d")
|
||||
if(!require(bio3d)){
|
||||
install.packages("bio3d")
|
||||
library(bio3d)
|
||||
}
|
216
scripts/plotting/basic_barplots_PS.R
Normal file
216
scripts/plotting/basic_barplots_PS.R
Normal file
|
@ -0,0 +1,216 @@
|
|||
getwd()
|
||||
setwd('~/git/LSHTM_analysis/scripts/plotting')
|
||||
getwd()
|
||||
|
||||
#########################################################
|
||||
# TASK:
|
||||
|
||||
#########################################################
|
||||
|
||||
|
||||
########################################################################
|
||||
# Installing and loading required packages and functions #
|
||||
########################################################################
|
||||
#source("Header_TT.R")
|
||||
#https://stackoverflow.com/questions/38851592/r-append-column-in-a-dataframe-with-frequency-count-based-on-two-columns
|
||||
|
||||
########################################################################
|
||||
# Read file: call script for combining df for PS #
|
||||
########################################################################
|
||||
|
||||
#?????????????
|
||||
#
|
||||
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
gene_match = paste0(gene,'_p.')
|
||||
cat(gene_match)
|
||||
|
||||
#=============
|
||||
# directories
|
||||
#=============
|
||||
datadir = paste0('~/git/Data')
|
||||
indir = paste0(datadir, '/', drug, '/input')
|
||||
outdir = paste0('~/git/Data', '/', drug, '/output')
|
||||
|
||||
#======
|
||||
# input
|
||||
#======
|
||||
#in_filename = 'mcsm_complex1_normalised.csv'
|
||||
in_filename_params = paste0(tolower(gene), '_all_params.csv')
|
||||
infile_params = paste0(outdir, '/', in_filename_params)
|
||||
cat(paste0('Input file 1:', infile_params) )
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
# plot 1
|
||||
duet_basic_bp = 'basic_barplots_DUET.svg'
|
||||
outfile_duet_basic_bp = paste0(outdir, '/plots/', duet_basic_bp)
|
||||
|
||||
# plot 2
|
||||
duet_pos_count = 'position_count_DUET.svg'
|
||||
outfile_duet_pos_count = paste0(outdir, '/plots/', duet_pos_count)
|
||||
|
||||
#%%===============================================================
|
||||
###########################
|
||||
# Read file: struct params
|
||||
###########################
|
||||
cat('Reading struct params including mcsm:', in_filename_params)
|
||||
|
||||
my_df = read.csv(infile_params
|
||||
#, stringsAsFactors = F
|
||||
, header = T)
|
||||
|
||||
cat('Input dimensions:', dim(my_df))
|
||||
|
||||
# clear variables
|
||||
rm(in_filename_params, infile_params)
|
||||
|
||||
# quick checks
|
||||
colnames(my_df)
|
||||
str(my_df)
|
||||
|
||||
# check for duplicate mutations
|
||||
if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){
|
||||
cat(paste0('CAUTION:', ' Duplicate mutations identified'
|
||||
, '\nExtracting these...'))
|
||||
dup_muts = my_df[duplicated(my_df$mutationinformation),]
|
||||
dup_muts_nu = length(unique(dup_muts$mutationinformation))
|
||||
cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts)
|
||||
, '\nNo. of unique duplicate mutations:', dup_muts_nu
|
||||
, '\n\nExtracting df with unique mutations only'))
|
||||
my_df_u = my_df[!duplicated(my_df$mutationinformation),]
|
||||
}else{
|
||||
cat(paste0('No duplicate mutations detected'))
|
||||
my_df_u = my_df
|
||||
}
|
||||
|
||||
#upos = unique(my_df_u$position)
|
||||
cat('Dim of clean df:'); cat(dim(my_df_u))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(unique(my_df_u$position)))
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
########################################################################
|
||||
|
||||
#================
|
||||
# Data for plots
|
||||
#================
|
||||
# REASSIGNMENT as necessary
|
||||
df = my_df_u
|
||||
rm(my_df)
|
||||
|
||||
# sanity checks
|
||||
str(df)
|
||||
library(ggplot2)
|
||||
#%%=======================================================================
|
||||
#****************
|
||||
# Plot 1:Count of stabilising and destabilsing muts
|
||||
#****************
|
||||
#svg('basic_barplots_DUET.svg')
|
||||
svg(outfile_duet_basic_bp)
|
||||
print(paste0('plot filename:', duet_basic_bp))
|
||||
|
||||
my_ats = 25 # axis text size
|
||||
my_als = 22 # axis label size
|
||||
|
||||
theme_set(theme_grey())
|
||||
|
||||
# uncomment as necessary for either directly outputting results or
|
||||
# printing on the screen
|
||||
g = ggplot(df, aes(x = duet_outcome))
|
||||
prinfFile = g + geom_bar(aes(fill = duet_outcome)
|
||||
, show.legend = TRUE) +
|
||||
geom_label(stat = "count"
|
||||
, aes(label = ..count..)
|
||||
, color = "black"
|
||||
, show.legend = FALSE
|
||||
, size = 10) +
|
||||
theme(axis.text.x = element_blank()
|
||||
, axis.title.x = element_blank()
|
||||
, axis.title.y = element_text(size=my_als)
|
||||
, axis.text.y = element_text(size = my_ats)
|
||||
, legend.position = c(0.73,0.8)
|
||||
, legend.text = element_text(size=my_als-2)
|
||||
, legend.title = element_text(size=my_als)
|
||||
, plot.title = element_blank()) +
|
||||
labs(title = ""
|
||||
, y = "Number of SNPs"
|
||||
#, fill='DUET Outcome'
|
||||
) +
|
||||
scale_fill_discrete(name = "DUET Outcome"
|
||||
, labels = c("Destabilising", "Stabilising"))
|
||||
|
||||
print(prinfFile)
|
||||
dev.off()
|
||||
#%%=======================================================================
|
||||
#****************
|
||||
# Plot 2: frequency of positions
|
||||
#****************
|
||||
library(data.table)
|
||||
#setDT(df)[, .(pos_count := .N), by = .(position)]
|
||||
setDT(df)[, pos_count := .N, by = .(position)]
|
||||
|
||||
# this is cummulative
|
||||
table(df$pos_count)
|
||||
|
||||
# use group by on this
|
||||
library(dplyr)
|
||||
|
||||
snpsBYpos_df <- df %>%
|
||||
group_by(position) %>%
|
||||
summarize(snpsBYpos = mean(pos_count))
|
||||
|
||||
table(snpsBYpos_df$snpsBYpos)
|
||||
|
||||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
# FIXME, get this mutation_info, perhaps useful!
|
||||
foo = select(df, mutationinformation
|
||||
, wild_pos
|
||||
, wild_type
|
||||
, mutant_type
|
||||
#, mutation_info # comes from meta data, notused yet
|
||||
, position
|
||||
, pos_count)
|
||||
|
||||
#write.csv(foo, "/pos_count_freq.csv")
|
||||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
|
||||
#svg('position_count_DUET.svg')
|
||||
svg(outfile_duet_pos_count)
|
||||
print(paste0('plot filename:', outfile_duet_pos_count))
|
||||
|
||||
my_ats = 25 # axis text size
|
||||
my_als = 22 # axis label size
|
||||
|
||||
my_x = sort(unique(snpsBYpos_df$snpsBYpos))
|
||||
|
||||
|
||||
g = ggplot(snpsBYpos_df, aes(x = snpsBYpos))
|
||||
prinfFile = g + geom_bar(aes (alpha = 0.5)
|
||||
, show.legend = FALSE) +
|
||||
scale_x_continuous(breaks = unique(snpsBYpos_df$snpsBYpos)) +
|
||||
#scale_x_continuous(breaks = my_x) +
|
||||
geom_label(stat = "count", aes(label = ..count..)
|
||||
, color = "black"
|
||||
, size = 10) +
|
||||
theme(axis.text.x = element_text(size = my_ats
|
||||
, angle = 0)
|
||||
, axis.text.y = element_text(size = my_ats
|
||||
, angle = 0
|
||||
, hjust = 1)
|
||||
, axis.title.x = element_text(size = my_als)
|
||||
, axis.title.y = element_text(size = my_als)
|
||||
, plot.title = element_blank()) +
|
||||
|
||||
labs(x = "Number of SNPs"
|
||||
, y = "Number of Sites")
|
||||
|
||||
print(prinfFile)
|
||||
dev.off()
|
||||
########################################################################
|
||||
# end of DUET barplots
|
||||
########################################################################
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue