324 lines
12 KiB
R
324 lines
12 KiB
R
########################a###########################################################
|
|
# Input:
|
|
# Data
|
|
# plot_df: merged_df3 containing the OR column to use as y-axis or any other relevant column
|
|
|
|
# x_axis_colname = "position"
|
|
# symbol_mut_colname = "mutant_type"
|
|
# symbol_wt_colname = "mutant_type"
|
|
# omit_snp_count = c(0, 1, 2...) can be used to filter positions with specified snp count
|
|
|
|
# my_logo_col = c("chemistry", "hydrophobicity", "clustalx", "taylor")
|
|
# --> if clustalx and taylor, set variable to black bg + white font
|
|
# --> if chemistry and hydrophobicity, then grey bg + black font
|
|
|
|
# ...other params
|
|
|
|
# Returns: Logo plot from combined data containing all nsSNPs per position.
|
|
# Helps to see the overview of SNP diversity
|
|
|
|
# TODO: SHINY
|
|
# select/drop down: omit_snp_count
|
|
# select/drop down: my_logo_col
|
|
# should include WT??
|
|
|
|
# Make it hover over position and then get the corresponding data table!
|
|
####################################################################################
|
|
|
|
#==================
|
|
# logo data: OR
|
|
#==================
|
|
# NOTE: my_logo_col
|
|
|
|
LogoPlotSnps <- function(plot_df
|
|
, x_axis_colname = "position"
|
|
, symbol_mut_colname = "mutant_type"
|
|
, symbol_wt_colname = "wild_type"
|
|
, omit_snp_count = c(0) # can be 1, 2, etc.
|
|
, my_logo_col = "chemistry"
|
|
, x_lab = "Position"
|
|
, y_lab = "Count"
|
|
, x_ats = 14 # text size
|
|
, x_tangle = 90 # text angle
|
|
, y_ats = 22
|
|
, y_tangle = 0
|
|
, x_tts = 20 # title size
|
|
, y_tts = 23
|
|
, leg_pos = "none" # can be top, left, right and bottom or c(0.8, 0.9)
|
|
, leg_dir = "horizontal" #can be vertical or horizontal
|
|
, leg_ts = 20 # leg text size
|
|
, leg_tts = 16 # leg title size
|
|
, tpos0 = 0 # 0 is a magic number that does my sensible default
|
|
, tW0 = 1
|
|
, tH0 = 0.2
|
|
, debug=FALSE
|
|
|
|
)
|
|
|
|
{
|
|
# handle funky omit_snp_count. DOES NOT WORK YET
|
|
if (class(omit_snp_count) != "numeric"){
|
|
omit_snp_count <- as.numeric(unlist(str_extract_all(omit_snp_count, regex("[0-9]+"))))
|
|
}
|
|
############################################
|
|
# Data processing for logo plot for nsSNPS
|
|
############################################
|
|
|
|
# Generate "ligand distance" colour map
|
|
plot_df = generate_distance_colour_map(plot_df, debug=TRUE)
|
|
unique_colour_map = unique(plot_df[,c("position","ligD_colours")])
|
|
unique_colour_map = unique_colour_map[order(unique_colour_map$position), ]
|
|
rownames(unique_colour_map) = unique_colour_map$position
|
|
unique_colour_map2 = unique_colour_map
|
|
unique_colour_map2$position=as.factor(unique_colour_map2$position)
|
|
unique_colour_map2$ligD_colours = as.factor(unique_colour_map2$ligD_colours)
|
|
#
|
|
|
|
setDT(plot_df)[, mut_pos_occurrence := .N, by = .(eval(parse(text=x_axis_colname)))]
|
|
if (debug) {
|
|
table(plot_df[[x_axis_colname]])
|
|
table(plot_df$mut_pos_occurrence)
|
|
}
|
|
max_mut = max(table(plot_df[[x_axis_colname]]))
|
|
|
|
# Subset Data as specified by user
|
|
cat("\nDisplaying nsSNP position frequency:\n")
|
|
print(table(plot_df$mut_pos_occurrence))
|
|
|
|
if ( (length(omit_snp_count) ==1) && (omit_snp_count == 0) ){
|
|
my_data_snp = plot_df
|
|
u = unique(my_data_snp[[x_axis_colname]])
|
|
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
|
|
if (debug) {
|
|
cat("\nNo filtering requested:"
|
|
, "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence))
|
|
, "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
|
|
, "\nDim of data:", dim(my_data_snp)
|
|
, "\nNo. of positions:", length(u)
|
|
, "\nMax no. of muts at any position:", max_mult_mut)
|
|
}
|
|
} else {
|
|
|
|
my_data_snp = subset(plot_df, !(mut_pos_occurrence%in%omit_snp_count) )
|
|
|
|
exp_nrows = sum(table(plot_df$mut_pos_occurrence)) - sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
|
|
got_rows = sum(table(my_data_snp$mut_pos_occurrence))
|
|
u = unique(my_data_snp[[x_axis_colname]])
|
|
max_mult_mut = max(table(my_data_snp[[x_axis_colname]]))
|
|
if (debug) {
|
|
if (got_rows == exp_nrows) {
|
|
cat("\nPass: Position with the stated nsSNP frequency filtered:", omit_snp_count
|
|
, "\nTotal no. of nsSNPs:", sum(table(plot_df$mut_pos_occurrence))
|
|
, "\nTotal no. of nsSNPs omitted:", sum(table(plot_df$mut_pos_occurrence)[omit_snp_count])
|
|
, "\nDim of subsetted data:", dim(my_data_snp)
|
|
, "\nNo. of positions:", length(u)
|
|
, "\nMax no. of muts at any position:", max_mult_mut)
|
|
} else {
|
|
|
|
cat("\nFAIL:Position with the stated nsSNP frequency COULD NOT be filtered..."
|
|
, "\nExpected:",exp_nrows
|
|
, "\nGot:", got_rows )
|
|
}
|
|
}
|
|
}
|
|
|
|
#--------------------------------------
|
|
# matrix for mutant type
|
|
# frequency of mutant type by position
|
|
#---------------------------------------
|
|
table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]])
|
|
tab_mt = table(my_data_snp[[symbol_mut_colname]], my_data_snp[[x_axis_colname]])
|
|
class(tab_mt)
|
|
|
|
# unclass to convert to matrix
|
|
tab_mt = unclass(tab_mt)
|
|
|
|
if (is.matrix(tab_mt)){
|
|
if (debug) {
|
|
cat("\nPASS: Mutant matrix successfully created..."
|
|
#, "\nRownames of mutant matrix:", rownames(tab_mt)
|
|
#, "\nColnames of mutant matrix:", colnames(tab_mt)
|
|
)
|
|
}
|
|
} else{
|
|
tab_mt = as.matrix(tab_mt, rownames = T)
|
|
if (is.matrix(tab_mt)){
|
|
if (debug) {
|
|
cat("\nCreating mutant matrix..."
|
|
#, "\nRownames of mutant matrix:", rownames(tab_mt)
|
|
#, "\nColnames of mutant matrix:", colnames(tab_mt)
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
#-------------------------------------
|
|
# matrix for wild type
|
|
# frequency of wild type by position
|
|
#-------------------------------------
|
|
tab_wt = table(my_data_snp[[symbol_wt_colname]], my_data_snp[[x_axis_colname]]); tab_wt
|
|
tab_wt = unclass(tab_wt)
|
|
|
|
# Important: remove wt duplicates
|
|
#wt = my_data_snp[, c("position", "wild_type")]
|
|
wt = my_data_snp %>%
|
|
select(x_axis_colname, symbol_wt_colname)
|
|
|
|
wt = wt[!duplicated(wt),]
|
|
wt
|
|
|
|
tab_wt = table(wt[[symbol_wt_colname]], wt[[x_axis_colname]]); tab_wt # should all be 1
|
|
if (debug) {
|
|
if ( identical(colnames(tab_mt), colnames(tab_wt) ) && identical(ncol(tab_mt), ncol(tab_wt)) ){
|
|
|
|
cat("\nPASS: Wild type matrix successfully created"
|
|
, "\nDim of wt matrix:", dim(tab_wt)
|
|
, "\nDim of mutant matrix:", dim(tab_mt)
|
|
, "\n"
|
|
#, "\nRownames of mutant matrix:", rownames(tab_wt)
|
|
#, "\nColnames of mutant matrix:", colnames(tab_wt)
|
|
)
|
|
}
|
|
}
|
|
|
|
######################################
|
|
# Generating plots for muts and wt
|
|
#####################################
|
|
LogoPlotL <- list()
|
|
|
|
if (my_logo_col %in% c('clustalx','taylor')) {
|
|
cat("\nSelected colour scheme:", my_logo_col
|
|
, "\nUsing black theme\n")
|
|
|
|
theme_bgc = "black"
|
|
xfont_bgc = "white"
|
|
yfont_bgc = "white"
|
|
xtt_col = "white"
|
|
ytt_col = "white"
|
|
}
|
|
|
|
if (my_logo_col %in% c('chemistry', 'hydrophobicity')) {
|
|
cat('\nSelected colour scheme:', my_logo_col
|
|
, "\nUsing grey theme")
|
|
|
|
theme_bgc = "white"
|
|
xfont_bgc = "black"
|
|
yfont_bgc = "black"
|
|
xtt_col = "black"
|
|
ytt_col = "black"
|
|
}
|
|
|
|
#####################################
|
|
# Generating logo plots for nsSNPs
|
|
#####################################
|
|
cowplot::plot_grid(
|
|
#-------------------
|
|
# Mutant logo plot
|
|
#-------------------
|
|
ggseqlogo(tab_mt
|
|
, method = 'custom'
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'aa') +
|
|
|
|
scale_x_continuous(breaks = 1:ncol(tab_mt)
|
|
, expand = c(0.01,0)
|
|
, labels = colnames(tab_mt))+
|
|
|
|
scale_y_continuous(breaks = 0:(max_mult_mut-1)
|
|
, labels = c(1:max_mult_mut)
|
|
, limits = c(0, max_mult_mut)) +
|
|
ylab(y_lab) +
|
|
theme(text=element_text(family="FreeSans")
|
|
, legend.position = leg_pos
|
|
, legend.direction = leg_dir
|
|
, legend.title = element_text(size = leg_tts
|
|
, colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
|
|
, axis.text.x = element_text(size = x_ats
|
|
, angle = x_tangle
|
|
, hjust = 1
|
|
, vjust = 0.4
|
|
, colour = xfont_bgc)
|
|
, axis.text.y = element_text(size = y_ats
|
|
, angle = y_tangle
|
|
, hjust = 1
|
|
, vjust = -1.0
|
|
, colour = yfont_bgc)
|
|
, axis.title.x = element_text(size = x_tts
|
|
, colour = xtt_col)
|
|
, axis.title.y = element_text(size = y_tts
|
|
, colour = ytt_col)
|
|
|
|
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
|
),
|
|
ggseqlogo(tab_wt
|
|
, method = 'custom'
|
|
, col_scheme = my_logo_col
|
|
, seq_type = 'aa') +
|
|
scale_x_continuous(breaks = 1:ncol(tab_wt)
|
|
, expand = c(0.01,0)
|
|
, labels = as.factor(colnames(tab_wt))) +
|
|
theme(text = element_text(family="FreeSans")
|
|
, legend.position = "none"
|
|
, axis.text.x = element_blank()
|
|
, axis.text.y = element_blank()
|
|
, axis.title.x = element_blank()
|
|
, axis.title.y = element_blank()
|
|
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
|
) +
|
|
labs(x=NULL, y=NULL),
|
|
ggplot(
|
|
data=unique_colour_map2,
|
|
aes(
|
|
x=factor(position), 0 # heat-mapped distance tiles along the bot
|
|
, fill = position
|
|
, colour = position
|
|
, linetype = "blank"
|
|
)
|
|
) +
|
|
geom_tile() +
|
|
theme(
|
|
axis.text.x = element_blank()
|
|
, axis.ticks.x = element_blank()
|
|
# axis.text.x = element_text(size = x_ats
|
|
# , angle = x_tangle
|
|
# , hjust = 1
|
|
# , vjust = 0.4
|
|
# , colour = xfont_bgc)
|
|
, axis.text.y = element_blank()
|
|
, axis.ticks.y = element_blank()
|
|
, axis.title.x = element_blank()
|
|
|
|
# , axis.title.x = element_text(size = x_tts
|
|
# , colour = xtt_col)
|
|
# , axis.title.y = element_text(size = y_tts
|
|
# , colour = ytt_col)
|
|
# , legend.title = element_text(size = leg_tts
|
|
# , colour = ytt_col)
|
|
, legend.text = element_text(size = leg_ts)
|
|
|
|
, legend.position = leg_pos
|
|
, legend.direction = leg_dir
|
|
, plot.background = element_rect(fill = theme_bgc, colour=NA)
|
|
, plot.margin = margin(t=0)
|
|
, panel.grid=element_blank()
|
|
, panel.background = element_rect(fill = theme_bgc, colour=NA)
|
|
) +
|
|
scale_x_discrete(x_lab, labels=unique_colour_map$position) +
|
|
#scale_x_discrete(x_lab, labels=factor(unique_colour_map$position)) +
|
|
scale_color_manual(values=unique_colour_map$ligD_colours) +
|
|
scale_fill_manual(values=unique_colour_map$ligD_colours) +
|
|
labs(y = NULL)
|
|
, NULL
|
|
, position_annotation(plot_df, bg=theme_bgc)
|
|
, ncol=1
|
|
, align = "v"
|
|
, axis='lr'
|
|
, rel_heights = c(7/10, 2/7,1/7, -0.1, 0.5/7))
|
|
#------------------
|
|
# Wild logo plot
|
|
#------------------
|
|
}
|
|
|
|
#LogoPlotSnps(small_df3)
|