diff --git a/config/gid.R b/config/gid.R index 226af91..c00bd97 100644 --- a/config/gid.R +++ b/config/gid.R @@ -1,2 +1,8 @@ gene = "gid" drug = "streptomycin" + +rna_bind_aa_pos = c(96, 97, 118, 163) +bin_aa_pos = c(48, 51, 137, 200) +active_aa_pos = c(rna_bind_aa_pos, bin_aa_pos) + +#rna_site = G518 diff --git a/scripts/functions/logoP_msa.R b/scripts/functions/logoP_msa.R index 9673d09..f070b0c 100644 --- a/scripts/functions/logoP_msa.R +++ b/scripts/functions/logoP_msa.R @@ -20,33 +20,116 @@ LogoPlotMSA <- function(msaSeq_mut , msaSeq_wt + , plot_positions , msa_method = 'bits' # or probability - , my_logo_col = "chemistry" - , x_lab = "Wild-type position" - , y_lab = "Count" - , x_ats = 13 # text size - , x_tangle = 90 # text angle - , y_ats = 13 - , y_tangle = 0 - , x_tts = 13 # title size - , y_tts = 13 - , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) - , leg_dir = "horizontal" #can be vertical or horizontal - , leg_ts = 16 # leg text size - , leg_tts = 16 # leg title size + , my_logo_col = "chemistry" + , x_lab = "Wild-type position" + , y_lab = "" + , x_ats = 13 # text size + , x_tangle = 90 # text angle + , y_ats = 13 + , y_tangle = 0 + , x_tts = 13 # title size + , y_tts = 13 + , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" #can be vertical or horizontal + , leg_ts = 16 # leg text size + , leg_tts = 16 # leg title size ) { - ############################################ - # Data processing for logo plot for nsSNPS - ############################################ + + ############################################ + # Data processing for logo plot for nsSNPS + ########################################### + cat("\nLength of MSA", length(msaSeq_mut) + , "\nlength of WT seq:", length(msaSeq_wt)) + + if(missing(plot_positions)){ + #if(is.null(plot_positions)){ + cat("\nPlotting entire MSA") + msa_seq_plot = msaSeq_mut + wt_seq_plot = msaSeq_wt + + } else { + cat("\nUser specified plotting positions for MSA:" + , "These are:", plot_positions) - cat("\nLength of MSA", nrow(msaSeq_mut) - , "\nlength of WT seq:", nrow(msaSeq_wt)) + #----------- + # MSA: mut + #----------- + cat("\nGenerating MSA: filtered positions") + msa_interim = sapply(msaSeq_mut, function(x) unlist(strsplit(x,""))) - ###################################### - # Generating plots for muts and wt - ##################################### + if (any(is.na(msa_interim[plot_positions]))){ + cat("Plot_positions selected:", length(plot_positions)) + i_ofr = plot_positions[is.na(msa_interim[plot_positions])] + cat("\nIndex out of range: 1 or more" + , "\nThese are:", i_ofr + , "\nOmitting these and proceeding...") + i_extract = na.omit(msa_interim[plot_positions]) + cat("\nFinal positions being plottted:", length(i_extract) + , "\nNo. of positions dropped from request:", length(i_ofr)) + + }else{ + cat("\nAll positions within range" + , "\nProceeing with generating requested position MSA seqs...") + i_extract = plot_positions + } + + matP1 = msa_interim[i_extract, 1:ncol(msa_interim)] + + dfP1 = data.frame(t(matP1)) + names(dfP1) = i_extract + cols_to_paste = names(dfP1) + dfP1['chosen_seq'] = apply( dfP1[ , cols_to_paste] + , 1 + , paste, sep = '' + , collapse = "") + + msa_seq_plot = dfP1$chosen_seq + + #----------- + # WT: fasta + #----------- + cat("\nGenerating WT fasta: filtered positions") + + wt_interim = sapply(msaSeq_wt, function(x) unlist(strsplit(x,""))) + + if (any(is.na(wt_interim[plot_positions]))){ + cat("Plot_positions selected:", length(plot_positions)) + i2_ofr = plot_positions[is.na(wt_interim[plot_positions])] + cat("\nIndex out of range: 1 or more" + , "\nThese are:", i2_ofr + , "\nOmitting these and proceeding...") + i2_extract = na.omit(wt_interim[plot_positions]) + cat("\nFinal positions being plottted:", length(i2_extract) + , "\nNo. of positions dropped from request:", length(i2_ofr)) + + }else{ + cat("\nAll positions within range" + , "\nProceeing with generating requested position MSA seqs...") + i2_extract = plot_positions + } + + matP2 = wt_interim[i_extract, 1:ncol(wt_interim)] + + dfP2 = data.frame(t(matP2)) + names(dfP2) = i2_extract + cols_to_paste_P2 = names(dfP2) + + dfP2['chosen_seq'] = apply( dfP2[ , cols_to_paste_P2] + , 1 + , paste, sep = '' + , collapse = "") + + wt_seq_plot = dfP2$chosen_seq + } + + + ###################################### + # Generating plots for muts and wt + ##################################### LogoPlotMSAL <- list() if (my_logo_col %in% c('clustalx','taylor')) { @@ -78,12 +161,16 @@ LogoPlotMSA <- function(msaSeq_mut #------------------- # Mutant logo plot #------------------- - p0 = ggseqlogo(msaSeq_mut - #msaSeq_mut$V1 + p0 = ggseqlogo(msa_seq_plot , facet = "grid" , method = msa_method , col_scheme = my_logo_col - , seq_type = 'aa') + , seq_type = 'aa') + + scale_x_discrete(x_lab + , breaks = i_extract + , labels = i_extract + #, limits = min(i_extract): max(i_extract)) + , limits = factor(i_extract)) # further customisation msa_mut_logo_P = p0 + theme(legend.position = leg_pos @@ -111,6 +198,7 @@ LogoPlotMSA <- function(msaSeq_mut , plot.background = element_rect(fill = theme_bgc)) + cat('\nDone: msa_mut_logo_P') #return(msa_mut_logoP) LogoPlotMSAL[['msa_mut_logoP']] <- msa_mut_logo_P @@ -118,12 +206,16 @@ LogoPlotMSA <- function(msaSeq_mut #--------------------------------- # Wild-type MSA: gene_fasta file #--------------------------------- - p1 = ggseqlogo(msaSeq_wt - #msaSeq_wt$V1 + p1 = ggseqlogo(wt_seq_plot , facet = "grid" , method = msa_method , col_scheme = my_logo_col - , seq_type = 'aa') + , seq_type = 'aa')+ + scale_x_discrete(x_lab + , breaks = i_extract + , labels = i_extract + #, limits = min(i_extract): max(i_extract)) + , limits = factor(i_extract)) # further customisation msa_wt_logo_P = p1 + diff --git a/scripts/functions/tests/test_logo_plots.R b/scripts/functions/tests/test_logo_plots.R index aa0143e..3ca9666 100644 --- a/scripts/functions/tests/test_logo_plots.R +++ b/scripts/functions/tests/test_logo_plots.R @@ -2,10 +2,10 @@ source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/pnca.R") # YES #--------------------------------------------------- # FIXME -source("~/git/LSHTM_analysis/config/alr.R") -source("~/git/LSHTM_analysis/config/embb.R") -source("~/git/LSHTM_analysis/config/katg.R") -source("~/git/LSHTM_analysis/config/rpob.R") +# source("~/git/LSHTM_analysis/config/alr.R") +# source("~/git/LSHTM_analysis/config/embb.R") +# source("~/git/LSHTM_analysis/config/katg.R") +# source("~/git/LSHTM_analysis/config/rpob.R") #--------------------------------------------------- source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") @@ -72,16 +72,25 @@ LogoPlotSnps(plot_df = merged_df3 # wild-type and mutant aa # script: logoP_msa.R ######################################## -# msa1 = read.csv("/home/tanu/git/Data/cycloserine/output/alr_msa.csv", header = F) -# head(msa1) -# msa_seq= msa1$V1 -# head(msa_seq) -# -# msa2 = read.csv("/home/tanu/git/Data/cycloserine/input/alr.1fasta", header = F) -# head(msa2) -# wt_seq = msa2$V1 -# head(wt_seq) -# -# # BOTH WORK -# LogoPlotMSA(msa_seq, wt_seq) -# LogoPlotMSA(msa1, msa2) +# BOTH WORK +#LogoPlotMSA(msa_seq, wt_seq) + +LogoPlotMSA(msaSeq_mut = msa_seq + , msaSeq_wt = wt_seq + , msa_method = 'bits' # or probability + , my_logo_col = "chemistry" + #, plot_positions = active_aa_pos + , plot_positions + , x_lab = "Wild-type position" + , y_lab = "" + , x_ats = 13 # text size + , x_tangle = 90 # text angle + , y_ats = 13 + , y_tangle = 0 + , x_tts = 13 # title size + , y_tts = 13 + , leg_pos = "top" # can be top, left, right and bottom or c(0.8, 0.9) + , leg_dir = "horizontal" #can be vertical or horizontal + , leg_ts = 16 # leg text size + , leg_tts = 16 # leg title size +) \ No newline at end of file diff --git a/scripts/plotting/get_plotting_dfs.R b/scripts/plotting/get_plotting_dfs.R index 12e9671..bd41eee 100644 --- a/scripts/plotting/get_plotting_dfs.R +++ b/scripts/plotting/get_plotting_dfs.R @@ -110,9 +110,16 @@ merged_df3_comp = all_plot_dfs[[4]] #################################################################### #source(paste0(plot_script_path, "logo_data.R")) - #s1 = c("\nSuccessfully sourced logo_data.R") #cat(s1) +# input data is merged_df3 +# so repurposed it into a function so params can be passed instead to generate +# data required for plotting. +# Moved "logo_data.R" to redundant/ + +source(paste0(plot_script_path, "logo_data_msa.R")) +s1 = c("\nSuccessfully sourced logo_data_msa.R") +cat(s1) #################################################################### # Data for DM OM Plots: Long format dfs @@ -173,4 +180,4 @@ rm(c1 , vars0 , vars1 , vars2 - , vars3) \ No newline at end of file + , vars3) diff --git a/scripts/plotting/logo_data_msa.R b/scripts/plotting/logo_data_msa.R new file mode 100644 index 0000000..461ca9b --- /dev/null +++ b/scripts/plotting/logo_data_msa.R @@ -0,0 +1,49 @@ +#================================================= +# Data for Logo MSA plots +#================================================= +cat("\n==========================================" + , "\nLogo MSA Plots Data: ALL params" + , "\n=========================================") + +#msa1 = read.csv("/home/tanu/git/Data/cycloserine/output/gid_msa.csv", header = F) +#head(msa1) +#msa_seq= msa1$V1 +#head(msa_seq) + +#msa2 = read.csv("/home/tanu/git/Data/cycloserine/input/gid.1fasta", header = F) +#head(msa2) +#wt_seq = msa2$V1 +#head(wt_seq) + +# BOTH WORK +#LogoPlotMSA(msa_seq, wt_seq) +#LogoPlotMSA(msa1, msa2) +##################################### + +#================ +# MSA file: muts +#================ + +in_filename_msa = paste0(tolower(gene), "_msa.csv") +infile_msa = paste0(outdir, "/", in_filename_msa) +cat("\nInput file for MSA plots: ", infile_msa, "\n") + +msa1 = read.csv(infile_msa, header = F) +head(msa1) +cat("\nLength of MSA:", nrow(msa1)) +msa_seq = msa1$V1 +head(msa_seq) + +#================ +# fasta file: wt +#================ + +in_filename_fasta = paste0(tolower(gene), ".1fasta") +infile_fasta = paste0(indir, "/", in_filename_fasta) +cat("\nInput fasta file for WT: ", infile_fasta, "\n") + +msa2 = read.csv(infile_fasta, header = F) +head(msa2) +cat("\nLength of WT fasta:", nrow(msa2)) +wt_seq = msa2$V1 +head(wt_seq) diff --git a/scripts/plotting/logo_data.R b/scripts/plotting/redundant/logo_data.R similarity index 100% rename from scripts/plotting/logo_data.R rename to scripts/plotting/redundant/logo_data.R