Combining dfs for PS and lig in one

This commit is contained in:
Tanushree Tunstall 2020-09-07 14:05:46 +01:00
parent 2ef767f046
commit b4affa0c94
6 changed files with 464 additions and 621 deletions

View file

@ -5,27 +5,30 @@ getwd()
#########################################################
# TASK: Basic lineage barplot showing numbers
# Output:
# Output: Basic barplot with lineage samples and mut count
##########################################################
# Installing and loading required packages
##########################################################
source("Header_TT.R")
require(data.table)
source("combining_two_df.R")
#==========================
source("combining_dfs_plotting.R")
# should return the following dfs, directories and variables
# df with NA:
# merged_df2
# merged_df3
# PS combined:
# 1) merged_df2
# 2) merged_df2_comp
# 3) merged_df3
# 4) merged_df3_comp
# df without NA:
# merged_df2_comp
# merged_df3_comp
# LIG combined:
# 5) merged_df2_lig
# 6) merged_df2_comp_lig
# 7) merged_df3_lig
# 8) merged_df3_comp_lig
# my_df_u
# 9) my_df_u
# 10) my_df_u_lig
cat(paste0("Directories imported:"
, "\ndatadir:", datadir
@ -38,13 +41,16 @@ cat(paste0("Variables imported:"
, "\ngene:", gene
, "\ngene_match:", gene_match
, "\nAngstrom symbol:", angstroms_symbol
, "\nNo. of cols:", df_ncols
, "\nNo. of duplicated muts:", dup_muts_nu
, "\nNA count for ORs:", na_count
, "\nNA count in df2:", na_count_df2
, "\nNA count in df3:", na_count_df3))
#=========================
#===========
# input
#===========
# output of combining_dfs_plotting.R
#=======
# output
#=======
@ -82,15 +88,11 @@ is.factor(my_df$lineage)
# fill = lineage
#============================
table(my_df$lineage)
#****************
# Plot: Lineage Barplot
#****************
as.data.frame(table(my_df$lineage))
#=============
# Data for plots
#=============
# REASSIGNMENT
df <- my_df
@ -111,18 +113,7 @@ sel_lineages = c("lineage1"
#, "lineage7"
)
df_lin = subset(df, subset = lineage %in% sel_lineages )
#FIXME; add sanity check for numbers.
# Done this manually
############################################################
#########
# Data for barplot: Lineage barplot
# to show total samples and number of unique mutations
# within each linege
##########
df_lin = subset(df, subset = lineage %in% sel_lineages)
# Create df with lineage inform & no. of unique mutations
# per lineage and total samples within lineage
@ -193,7 +184,7 @@ printFile = g + geom_bar(stat = "identity"
, axis.title.y = element_text(size = my_als
, colour = 'black')
, legend.position = "top"
, legend.text = element_text(size = my_als) +
, legend.text = element_text(size = my_als)) +
#geom_text() +
geom_label(aes(label = value)
, size = 5
@ -212,7 +203,7 @@ printFile = g + geom_bar(stat = "identity"
, name=''
, labels=c('Mutations', 'Total Samples')) +
scale_x_discrete(breaks = c('lineage1', 'lineage2', 'lineage3', 'lineage4')
, labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4')))
, labels = c('Lineage 1', 'Lineage 2', 'Lineage 3', 'Lineage 4'))
print(printFile)
dev.off()