From da9075458f660ffe9f2167a5554df1e3910ee333 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Mon, 14 Sep 2020 15:13:52 +0100 Subject: [PATCH] saving logoplot attempts --- meta_data_analysis/run_mutate.sh | 9 ++- scripts/plotting/Header_TT.R | 2 + scripts/plotting/logo_plot.R | 135 +++++++++++++++++++++++++------ 3 files changed, 119 insertions(+), 27 deletions(-) diff --git a/meta_data_analysis/run_mutate.sh b/meta_data_analysis/run_mutate.sh index edfb436..1865a5e 100755 --- a/meta_data_analysis/run_mutate.sh +++ b/meta_data_analysis/run_mutate.sh @@ -1,8 +1,15 @@ #!/bin/bash +#https://www.biostars.org/p/336891/ +#python Mutate.py -v -o /path/to/output.fasta mutation_map_file.csv input.fasta + +# pnca_all_muts_msa_FIXME: This should be formatted like this from python script +# change to a cmd script that takes this "prefix" as an input +for i in $(cat pnca_all_muts_msa_FIXME.csv); do echo "3PL1,${i}"; done > pnca_copy.txt + # make sure there is no new line at the end of the mutation file (snps.csv) #python3 Mutate.py -v -o /home/tanu/git/Data/pyrazinamide/input/output.fasta mut_map.csv 3pl1.fasta.txt -python3 mutate.py -v -o /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt /home/tanu/git/Data/pyrazinamide/output/pnca_all_muts_msa.csv /home/tanu/git/Data/pyrazinamide/input/3pl1.fasta.txt +python3 mutate.py -v -o /home/tanu/git/Data/pyrazinamide/output/pnca_msa.txt /home/tanu/git/Data/pyrazinamide/output/pnca_all_muts_msa.csv /home/tanu/git/Data/pyrazinamide/input/pnca_fasta.txt # remove fasta style header lines in the output i.e # lines beginning with '>' so the file is just the mutated seqs diff --git a/scripts/plotting/Header_TT.R b/scripts/plotting/Header_TT.R index aa00f66..4bcd7d8 100755 --- a/scripts/plotting/Header_TT.R +++ b/scripts/plotting/Header_TT.R @@ -139,3 +139,5 @@ if(!require(bio3d)){ library(bio3d) } +#install.packages("protr") +library(protr) diff --git a/scripts/plotting/logo_plot.R b/scripts/plotting/logo_plot.R index 5b91392..a96cdb9 100644 --- a/scripts/plotting/logo_plot.R +++ b/scripts/plotting/logo_plot.R @@ -57,12 +57,13 @@ plot_logo_plot = paste0(plotdir,"/", logo_plot) # REASSIGNMENT #my_data = merged_df2 #my_data = merged_df2_comp -#my_data = merged_df3 +my_data = merged_df3 my_data = merged_df3_comp #%%%%%%%%%%%%%%%%%%%%%%%%%% # delete variables not required -rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) +rm(merged_df2, merged_df2_comp) +#rm(merged_df3, merged_df3_comp) # quick checks colnames(my_data) @@ -107,40 +108,63 @@ foo = my_data_snp[, c("position", "mutant_type","duet_scaled", "or_mychisq" , "mut_prop_polarity", "mut_prop_water") ] my_data_snp$log10or = log10(my_data_snp$or_mychisq) -bar = my_data_snp[, c("position", "mutant_type", "or_mychisq", "log10or")] +logo_data = my_data_snp[, c("position", "mutant_type", "or_mychisq", "log10or")] + +logo_data_or = my_data_snp[, c("position", "mutant_type", "or_mychisq")] +wide_df_or <- logo_data_or %>% spread(position, or_mychisq, fill = 0.0) -bar_or = my_data_snp[, c("position", "mutant_type", "or_mychisq")] -wide_df_or <- bar_or %>% spread(position, or_mychisq, fill = 0) wide_df_or = as.matrix(wide_df_or) rownames(wide_df_or) = wide_df_or[,1] wide_df_or = wide_df_or[,-1] +position_or = as.numeric(colnames(wide_df_or)) + +#=========================================== +#custom height (OR) logo plot: CORRECT x-axis labelling +#============================================ # custom height (OR) logo plot: yayy works + ggseqlogo(wide_df_or, method="custom", seq_type="aa") + ylab("my custom height") + - theme(legend.position = "bottom" - , axis.text.x = element_text(size = 11 - , angle = 90 - , hjust = 1 - , vjust = 0.4) - , axis.text.y = element_text(size = 15 - , angle = 0 - , hjust = 1 - , vjust = 0))+ - labs(title = "AA logo plot" - , x = "Wild-type position" - , y = "OR") + theme(legend.position = "bottom" + , axis.text.x = element_text(size = 11 + , angle = 90 + , hjust = 1 + , vjust = 0.4) + , axis.text.y = element_text(size = 15 + , angle = 0 + , hjust = 1 + , vjust = 0))+ + scale_x_discrete("Position" + #, breaks + , labels = position_or + , limits = factor(1:length(position_or))) + + ylab("Odds Ratio") + #%% end of logo plot with OR as height + #======================================================================= # extracting data with log10R -bar_logor = my_data_snp[, c("position", "mutant_type", "log10or")] -wide_df_logor <- bar_logor %>% spread(position, log10or, fill = 0) +logo_data_logor = my_data_snp[, c("position", "mutant_type", "log10or")] +wide_df_logor <- logo_data_logor %>% spread(position, log10or, fill = 0.0) wide_df_logor = as.matrix(wide_df_logor) + rownames(wide_df_logor) = wide_df_logor[,1] -wide_df_logor = wide_df_logor[,-1] +wide_df_logor = subset(wide_df_logor, select = -c(1) ) +colnames(wide_df_logor) +wide_df_logor_m = data.matrix(wide_df_logor) + +rownames(wide_df_logor_m) +colnames(wide_df_logor_m) + +# FIXME +#my_ylim_up = as.numeric(max(wide_df_logor_m)) * 5 +#my_ylim_low = as.numeric(min(wide_df_logor_m)) + +position_logor = as.numeric(colnames(wide_df_logor_m)) # custom height (log10OR) logo plot: yayy works -ggseqlogo(wide_df_logor, method="custom", seq_type="aa") + ylab("my custom height") + +ggseqlogo(wide_df_logor_m, method="custom", seq_type="aa") + ylab("my custom height") + theme(legend.position = "bottom" , axis.text.x = element_text(size = 11 , angle = 90 @@ -150,9 +174,12 @@ ggseqlogo(wide_df_logor, method="custom", seq_type="aa") + ylab("my custom heigh , angle = 0 , hjust = 1 , vjust = 0))+ - labs(title = "AA logo plot" - , x = "Wild-type position" - , y = "Log10(OR)") + scale_x_discrete("Position" + #, breaks + , labels = position_logor + , limits = factor(1:length(position_logor)))+ + ylab("Log (Odds Ratio)") + + scale_y_continuous(limits = c(0, 9)) #======================================================================= #%% logo plot from sequence @@ -167,7 +194,7 @@ library(Logolas) # data was pnca_msa.txt #FIXME: generate this file -seqs = read.csv("~/git//Data/pyrazinamide/snp_seqsfile.txt" +seqs = read.csv("~/git/Data/pyrazinamide/output/pnca_msa.txt" , header = FALSE , stringsAsFactors = FALSE)$V1 @@ -175,7 +202,63 @@ seqs = read.csv("~/git//Data/pyrazinamide/snp_seqsfile.txt" # my_data: useful! logomaker(seqs, type = "EDLogo", color_type = "per_symbol" , return_heights = TRUE) -logomaker(seqs, type = "Logo", color_type = "per_symbol") + +logomaker(seqs, type = "Logo", color_type = "per_symbol", return_heights = TRUE) #%% end of script #======================================================================= +#============== +# online logo: +#http://www.cbs.dtu.dk/biotools/Seq2Logo/ # good for getting pssm and psfm matrices +#https://weblogo.berkeley.edu/logo.cgi +#http://weblogo.threeplusone.com/create.cgi # weblogo3 + +#=============== +# PSSM logos= example from logomaker +#=============== + +data(pssm) +logo_pssm(pssm, control = list(round_off = 0)) + +#================= +# PSSM: output from http://www.cbs.dtu.dk/biotools/Seq2Logo/ +# of MSA: pnca_msa.txt +#================== +foo = read.csv("/home/tanu/git/Data/pyrazinamide/pssm_transpose.csv") +rownames(foo) = foo[,1] +df = subset(foo, select = -c(1) ) +colnames(df) +colnames(df) = seq(1:length(df)) + +df_m = as.matrix(df) +logo_pssm(df_m, control = list(round_off = 0)) + +#================= +# # PSFM: output from http://www.cbs.dtu.dk/biotools/Seq2Logo/ +# of MSA: pnca_msa.txt +#================= +# not designed for PSFM! +# may want to figure out how to do it! +logo_data = read.csv("/home/tanu/git/Data/pyrazinamide/psfm_transpose.csv") +rownames(logo_data) = logo_data[,1] +df2 = subset(logo_data, select = -c(1) ) +colnames(df2) +colnames(df2) = seq(1:length(df2)) + +df2_m = as.matrix(df2) +logo_pssm(df2_m, control = list(round_off = 0)) + + +#================= +# ggplots: +#https://stackoverflow.com/questions/5438474/plotting-a-sequence-logo-using-ggplot2 +#================= + +library(ggplot2) +library(gglogo) +ggplot(data = ggfortify(sequences, "peptide")) + + geom_logo(aes(x=position, y=bits, group=element, + label=element, fill=interaction(Polarity, Water)), + alpha = 0.6) + + scale_fill_brewer(palette="Paired") + + theme(legend.position = "bottom")