Combining dfs for PS and lig in one
This commit is contained in:
parent
93e19e3186
commit
739e9eadf8
6 changed files with 464 additions and 621 deletions
|
@ -5,27 +5,30 @@ getwd()
|
|||
#########################################################
|
||||
# TASK: Basic lineage barplot showing numbers
|
||||
|
||||
# Output:
|
||||
# Output: Basic barplot with lineage samples and mut count
|
||||
|
||||
##########################################################
|
||||
# Installing and loading required packages
|
||||
##########################################################
|
||||
source("Header_TT.R")
|
||||
require(data.table)
|
||||
source("combining_two_df.R")
|
||||
|
||||
#==========================
|
||||
source("combining_dfs_plotting.R")
|
||||
# should return the following dfs, directories and variables
|
||||
|
||||
# df with NA:
|
||||
# merged_df2
|
||||
# merged_df3
|
||||
# PS combined:
|
||||
# 1) merged_df2
|
||||
# 2) merged_df2_comp
|
||||
# 3) merged_df3
|
||||
# 4) merged_df3_comp
|
||||
|
||||
# df without NA:
|
||||
# merged_df2_comp
|
||||
# merged_df3_comp
|
||||
# LIG combined:
|
||||
# 5) merged_df2_lig
|
||||
# 6) merged_df2_comp_lig
|
||||
# 7) merged_df3_lig
|
||||
# 8) merged_df3_comp_lig
|
||||
|
||||
# my_df_u
|
||||
# 9) my_df_u
|
||||
# 10) my_df_u_lig
|
||||
|
||||
cat(paste0("Directories imported:"
|
||||
, "\ndatadir:", datadir
|
||||
|
@ -38,13 +41,16 @@ cat(paste0("Variables imported:"
|
|||
, "\ngene:", gene
|
||||
, "\ngene_match:", gene_match
|
||||
, "\nAngstrom symbol:", angstroms_symbol
|
||||
, "\nNo. of cols:", df_ncols
|
||||
, "\nNo. of duplicated muts:", dup_muts_nu
|
||||
, "\nNA count for ORs:", na_count
|
||||
, "\nNA count in df2:", na_count_df2
|
||||
, "\nNA count in df3:", na_count_df3))
|
||||
|
||||
#=========================
|
||||
#===========
|
||||
# input
|
||||
#===========
|
||||
# output of combining_dfs_plotting.R
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
|
@ -82,15 +88,11 @@ is.factor(my_df$lineage)
|
|||
# fill = lineage
|
||||
#============================
|
||||
table(my_df$lineage)
|
||||
|
||||
#****************
|
||||
# Plot: Lineage Barplot
|
||||
#****************
|
||||
as.data.frame(table(my_df$lineage))
|
||||
|
||||
#=============
|
||||
# Data for plots
|
||||
#=============
|
||||
|
||||
# REASSIGNMENT
|
||||
df <- my_df
|
||||
|
||||
|
@ -111,18 +113,7 @@ sel_lineages = c("lineage1"
|
|||
#, "lineage7"
|
||||
)
|
||||
|
||||
df_lin = subset(df, subset = lineage %in% sel_lineages )
|
||||
|
||||
#FIXME; add sanity check for numbers.
|
||||
# Done this manually
|
||||
|
||||
############################################################
|
||||
|
||||
#########
|
||||
# Data for barplot: Lineage barplot
|
||||
# to show total samples and number of unique mutations
|
||||
# within each linege
|
||||
##########
|
||||
df_lin = subset(df, subset = lineage %in% sel_lineages)
|
||||
|
||||
# Create df with lineage inform & no. of unique mutations
|
||||
# per lineage and total samples within lineage
|
||||
|
@ -193,7 +184,7 @@ printFile = g + geom_bar(stat = "identity"
|
|||
, axis.title.y = element_text(size = my_als
|
||||
, colour = 'black')
|
||||
, legend.position = "top"
|
||||
, legend.text = element_text(size = my_als) +
|
||||
, legend.text = element_text(size = my_als)) +
|
||||
#geom_text() +
|
||||
geom_label(aes(label = value)
|
||||
, size = 5
|
||||
|
@ -212,7 +203,7 @@ printFile = g + geom_bar(stat = "identity"
|
|||
, name=''
|
||||
, labels=c('Mutations', 'Total Samples')) +
|
||||
scale_x_discrete(breaks = c('lineage1', 'lineage2', 'lineage3', 'lineage4')
|
||||
, labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4')))
|
||||
, labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4'))
|
||||
|
||||
print(printFile)
|
||||
dev.off()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue