minor tidy up to check interactive graphs Rshiny
This commit is contained in:
parent
7d60a09297
commit
c48fa1dbb0
6 changed files with 3 additions and 375 deletions
|
@ -1,63 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
library(ggplot2)
|
|
||||||
library(tidyverse)
|
|
||||||
library(data.table)
|
|
||||||
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/functions/")
|
|
||||||
getwd()
|
|
||||||
#############################################################
|
|
||||||
#===========================================
|
|
||||||
# load functions, data, dirs, hardocded vars
|
|
||||||
# that will be used in testing the functions
|
|
||||||
#===========================================
|
|
||||||
source("plotting_data.R")
|
|
||||||
infile = "/home/tanu/git/Data/streptomycin/output/"
|
|
||||||
pd_df = plotting_data(infile)
|
|
||||||
my_df = pd_df[[1]]
|
|
||||||
my_df_u = pd_df[[2]]
|
|
||||||
my_df_u_lig = pd_df[[3]]
|
|
||||||
dup_muts = pd_df[[4]]
|
|
||||||
|
|
||||||
source("../plotting_globals.R")
|
|
||||||
drug = "streptomycin"
|
|
||||||
gene = "gid"
|
|
||||||
|
|
||||||
import_dirs(drug, gene)
|
|
||||||
|
|
||||||
#=====================
|
|
||||||
# functions to test
|
|
||||||
#=====================
|
|
||||||
source("stability_count_bp.R")
|
|
||||||
source("position_count_bp.R")
|
|
||||||
#################################################################
|
|
||||||
##############################################
|
|
||||||
# read a sample file containing muts and prop
|
|
||||||
###############################################
|
|
||||||
df<- read.csv(file.choose())
|
|
||||||
|
|
||||||
setDT(df)[, pos_count := .N, by = .(position)]
|
|
||||||
foo = data.frame(df$position, df$pos_count)
|
|
||||||
|
|
||||||
#snpsBYpos_df <- df %>%
|
|
||||||
# group_by(position) %>%
|
|
||||||
# summarize(snpsBYpos = mean(pos_count))
|
|
||||||
|
|
||||||
# subset df without duplicates for position
|
|
||||||
df2 = df[!duplicated(df$position)]
|
|
||||||
##################################################################
|
|
||||||
# ---------------------------------------
|
|
||||||
# barplot for nssnps, coloured by aa prop
|
|
||||||
# ---------------------------------------
|
|
||||||
pos_colname = "position"
|
|
||||||
aa_prop_colname = "mut_prop_water"
|
|
||||||
aa_prop_colours = c("black", "blue")
|
|
||||||
my_legname = "aa_prop: water"
|
|
||||||
|
|
||||||
# call function
|
|
||||||
aa_prop_bp(plotdf = df
|
|
||||||
, position_colname = pos_colname
|
|
||||||
, fill_colname = aa_prop_colname
|
|
||||||
, fill_colours = aa_prop_cols
|
|
||||||
, leg_name = my_legname)
|
|
||||||
|
|
||||||
#===============================================================
|
|
|
@ -1,59 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: To calculate Allele Frequency and
|
|
||||||
# Odds Ratio from master data
|
|
||||||
#########################################################
|
|
||||||
# load libraries
|
|
||||||
#source("Header_TT.R")
|
|
||||||
require("getopt", quietly = TRUE) # cmd parse arguments
|
|
||||||
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/functions/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
# load functions
|
|
||||||
source("plotting_globals.R")
|
|
||||||
source("mychisq_or.R")
|
|
||||||
source("myaf_or_calcs.R")
|
|
||||||
|
|
||||||
# cmd options + sensible defaults
|
|
||||||
drug = "streptomycin"
|
|
||||||
gene = "gid"
|
|
||||||
|
|
||||||
# call function
|
|
||||||
import_dirs(drug, gene)
|
|
||||||
|
|
||||||
# input file 1: master data
|
|
||||||
#in_filename_master = 'original_tanushree_data_v2.csv' #19K
|
|
||||||
in_filename_master = 'mtb_gwas_meta_v6.csv' #35k
|
|
||||||
infile_master = paste0(datadir, in_filename_master)
|
|
||||||
cat(paste0('Reading infile1: raw data', ' ', infile_master) )
|
|
||||||
|
|
||||||
# input file 2: gene associated meta data file to extract valid snps and add calcs to.
|
|
||||||
# This is outfile_metadata from data_extraction.py
|
|
||||||
in_filename_metadata = paste0(tolower(gene), '_metadata.csv')
|
|
||||||
infile_metadata = paste0(outdir, '/', in_filename_metadata)
|
|
||||||
cat(paste0('Reading input file 2 i.e gene associated metadata:', infile_metadata))
|
|
||||||
|
|
||||||
# out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv')
|
|
||||||
out_filename_af_or = paste0(tolower(gene), '_af_or.csv')
|
|
||||||
outfile_af_or = paste0(outdir, '/', out_filename_af_or)
|
|
||||||
cat(paste0('Output file with full path:', outfile_af_or))
|
|
||||||
|
|
||||||
cat("master data:", infile_master)
|
|
||||||
cat("gene data:", infile_metadata)
|
|
||||||
|
|
||||||
dr_muts_col # comes from global (dr_mutations_<drug>)
|
|
||||||
other_muts_col # comes from global (other_mutations_<drug>)
|
|
||||||
#################################################
|
|
||||||
my_afor ( infile_master
|
|
||||||
, infile_metadata
|
|
||||||
, outfile = outfile_af_or
|
|
||||||
#, outfile = "FOO_TEST.csv"
|
|
||||||
, drug
|
|
||||||
, gene
|
|
||||||
, idcol = "id"
|
|
||||||
, dr_muts_col
|
|
||||||
, other_muts_col
|
|
||||||
)
|
|
|
@ -1,113 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/functions/")
|
|
||||||
getwd()
|
|
||||||
#############################################################
|
|
||||||
#===========================================
|
|
||||||
# load functions, data, dirs, hardocded vars
|
|
||||||
# that will be used in testing the functions
|
|
||||||
#===========================================
|
|
||||||
drug = "streptomycin"
|
|
||||||
gene = "gid"
|
|
||||||
|
|
||||||
source("plotting_data.R")
|
|
||||||
|
|
||||||
infile = paste0("~/git/Data/", drug, "/output/", gene, "_comb_stab_struc_params.csv")
|
|
||||||
infile_df = read.csv(infile)
|
|
||||||
|
|
||||||
lig_dist = 5
|
|
||||||
pd_df = plotting_data(infile_df
|
|
||||||
, lig_dist_colname = 'ligand_distance'
|
|
||||||
, lig_dist_cutoff = lig_dist)
|
|
||||||
|
|
||||||
my_df = pd_df[[1]]
|
|
||||||
my_df_u = pd_df[[2]]
|
|
||||||
my_df_u_lig = pd_df[[3]]
|
|
||||||
dup_muts = pd_df[[4]]
|
|
||||||
|
|
||||||
#=====================
|
|
||||||
# functions to test
|
|
||||||
#=====================
|
|
||||||
source("stability_count_bp.R")
|
|
||||||
source("position_count_bp.R")
|
|
||||||
|
|
||||||
##################################################################
|
|
||||||
# ------------------------------
|
|
||||||
# barplot for mscm stability
|
|
||||||
# ------------------------------
|
|
||||||
basic_bp_duet = paste0(tolower(gene), "_basic_barplot_PS.svg")
|
|
||||||
plot_basic_bp_duet = paste0(plotdir,"/", basic_bp_duet)
|
|
||||||
|
|
||||||
svg(plot_basic_bp_duet)
|
|
||||||
print(paste0("plot filename:", basic_bp_duet))
|
|
||||||
|
|
||||||
# function only
|
|
||||||
stability_count_bp(plotdf = my_df_u
|
|
||||||
, df_colname = "duet_outcome"
|
|
||||||
, leg_title = "DUET outcome"
|
|
||||||
, label_categories = c("Destabilising", "Stabilising")
|
|
||||||
, leg_position = "top")
|
|
||||||
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
# ------------------------------
|
|
||||||
# barplot for ligand affinity
|
|
||||||
# ------------------------------
|
|
||||||
basic_bp_ligand = paste0(tolower(gene), "_basic_barplot_LIG.svg")
|
|
||||||
plot_basic_bp_ligand = paste0(plotdir, "/", basic_bp_ligand)
|
|
||||||
|
|
||||||
svg(plot_basic_bp_ligand)
|
|
||||||
print(paste0("plot filename:", basic_bp_ligand))
|
|
||||||
|
|
||||||
# function only
|
|
||||||
lig_dist = 10
|
|
||||||
stability_count_bp(plotdf = my_df_u_lig
|
|
||||||
, df_colname = "ligand_outcome"
|
|
||||||
, leg_title = "Ligand outcome"
|
|
||||||
, yaxis_title = paste0("Number of nsSNPs\nLigand dist: <", lig_dist, "\u212b")
|
|
||||||
#, bp_plot_title = "Sites < 10 Ang of ligand"
|
|
||||||
)
|
|
||||||
|
|
||||||
dev.off()
|
|
||||||
# ------------------------------
|
|
||||||
# barplot for foldX
|
|
||||||
# ------------------------------
|
|
||||||
basic_bp_foldx = paste0(tolower(gene), "_basic_barplot_foldx.svg")
|
|
||||||
plot_basic_bp_foldx = paste0(plotdir,"/", basic_bp_foldx)
|
|
||||||
|
|
||||||
svg(plot_basic_bp_foldx)
|
|
||||||
print(paste0("plot filename:", plot_basic_bp_foldx))
|
|
||||||
|
|
||||||
stability_count_bp(plotdf = my_df_u
|
|
||||||
, df_colname = "foldx_outcome"
|
|
||||||
, leg_title = "FoldX outcome")
|
|
||||||
dev.off()
|
|
||||||
#===============================================================
|
|
||||||
# ------------------------------
|
|
||||||
# barplot for nssnp site count: all
|
|
||||||
# ------------------------------
|
|
||||||
pos_count_duet = paste0(tolower(gene), "_position_count_PS.svg")
|
|
||||||
plot_pos_count_duet = paste0(plotdir, "/", pos_count_duet)
|
|
||||||
|
|
||||||
svg(plot_pos_count_duet)
|
|
||||||
print(paste0("plot filename:", plot_pos_count_duet))
|
|
||||||
|
|
||||||
# function only
|
|
||||||
site_snp_count_bp(plotdf = my_df_u
|
|
||||||
, df_colname = "position")
|
|
||||||
|
|
||||||
dev.off()
|
|
||||||
# ------------------------------
|
|
||||||
# barplot for nssnp site count: within 10 Ang
|
|
||||||
# ------------------------------
|
|
||||||
pos_count_ligand = paste0(tolower(gene), "_position_count_LIG.svg")
|
|
||||||
plot_pos_count_ligand = paste0(plotdir, "/", pos_count_ligand)
|
|
||||||
|
|
||||||
svg(plot_pos_count_ligand)
|
|
||||||
print(paste0("plot filename:", plot_pos_count_ligand))
|
|
||||||
|
|
||||||
# function only
|
|
||||||
site_snp_count_bp(plotdf = my_df_u_lig
|
|
||||||
, df_colname = "position")
|
|
||||||
|
|
||||||
dev.off()
|
|
||||||
#===============================================================
|
|
|
@ -1,100 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
|
|
||||||
# working dir and loading libraries
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/functions/")
|
|
||||||
getwd()
|
|
||||||
|
|
||||||
# infile_params = paste0(outdir, "/" , tolower(gene), "_comb_afor.csv")
|
|
||||||
# infile_metadata = paste0(outdir, "/", tolower(gene), "_metadata")
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# source("combining_dfs_plotting_func.R")
|
|
||||||
#
|
|
||||||
####################################################################
|
|
||||||
# in_file_params = "~/git/Data/streptomycin/output/gid_comb_afor.csv"
|
|
||||||
# in_file_metadata = "~/git/Data/streptomycin/output/gid_metadata.csv"
|
|
||||||
#
|
|
||||||
# all_plot_dfs = combining_dfs_plotting(df1_mcsm_comb = infile_params
|
|
||||||
# , df2_gene_metadata = infile_metadata
|
|
||||||
# , lig_dist_colname = 'ligand_distance'
|
|
||||||
# , lig_dist_cutoff = 10)
|
|
||||||
#
|
|
||||||
# merged_df2 = all_plot_dfs[[1]]
|
|
||||||
# merged_df3 = all_plot_dfs[[2]]
|
|
||||||
# merged_df2_comp = all_plot_dfs[[3]]
|
|
||||||
# merged_df3_comp = all_plot_dfs[[4]]
|
|
||||||
# merged_df2_lig = all_plot_dfs[[5]]
|
|
||||||
# merged_df3_lig = all_plot_dfs[[6]]
|
|
||||||
#
|
|
||||||
# bar_colnames = data.frame(colnames(merged_df2))
|
|
||||||
###########################################################
|
|
||||||
source("plotting_globals.R")
|
|
||||||
source("plotting_data.R")
|
|
||||||
source("combining_dfs_plotting.R")
|
|
||||||
|
|
||||||
#---------------------
|
|
||||||
# call: import_dirs()
|
|
||||||
#---------------------
|
|
||||||
gene = 'gid'
|
|
||||||
drug = 'streptomycin'
|
|
||||||
|
|
||||||
import_dirs(drug_name = drug, gene_name = gene)
|
|
||||||
|
|
||||||
|
|
||||||
#============================
|
|
||||||
# Input 1: plotting_data()
|
|
||||||
#============================
|
|
||||||
if (!exists("infile_params") && exists("gene")){
|
|
||||||
#if (!is.character(infile_params) && exists("gene")){
|
|
||||||
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
|
||||||
in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
|
|
||||||
infile_params = paste0(outdir, "/", in_filename_params)
|
|
||||||
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
mcsm_comb_data = read.csv(infile_params, header = T)
|
|
||||||
|
|
||||||
#-------------------------------
|
|
||||||
# call function: plotting_data()
|
|
||||||
#-------------------------------
|
|
||||||
pd_df = plotting_data(df = mcsm_comb_data
|
|
||||||
, ligand_dist_colname = 'ligand_distance'
|
|
||||||
, lig_dist_cutoff = 10
|
|
||||||
my_df_u = pd_df[[2]]
|
|
||||||
|
|
||||||
#======================================
|
|
||||||
# Input 2: read <gene>_meta data.csv
|
|
||||||
#======================================
|
|
||||||
if (!exists("infile_metadata") && exists("gene")){
|
|
||||||
#if (!is.character(infile_params) && exists("gene")){{
|
|
||||||
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
|
|
||||||
infile_metadata = paste0(outdir, "/", in_filename_metadata)
|
|
||||||
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
|
|
||||||
}
|
|
||||||
|
|
||||||
cat("\nReading meta data file:", infile_metadata)
|
|
||||||
|
|
||||||
gene_metadata <- read.csv(infile_metadata
|
|
||||||
, stringsAsFactors = F
|
|
||||||
, header = T)
|
|
||||||
|
|
||||||
#-----------------------------------------
|
|
||||||
# test function: combining_dfs_plotting()
|
|
||||||
#-----------------------------------------
|
|
||||||
all_plot_dfs = combining_dfs_plotting(my_df_u
|
|
||||||
, gene_metadata
|
|
||||||
, lig_dist_colname = 'ligand_distance'
|
|
||||||
, lig_dist_cutoff = 10)
|
|
||||||
|
|
||||||
merged_df2 = all_plot_dfs[[1]]
|
|
||||||
merged_df3 = all_plot_dfs[[2]]
|
|
||||||
merged_df2_comp = all_plot_dfs[[3]]
|
|
||||||
merged_df3_comp = all_plot_dfs[[4]]
|
|
||||||
merged_df2_lig = all_plot_dfs[[5]]
|
|
||||||
merged_df3_lig = all_plot_dfs[[6]]
|
|
||||||
merged_df2_comp_lig = all_plot_dfs[[7]]
|
|
||||||
merged_df3_comp_lig = all_plot_dfs[[8]]
|
|
||||||
########################################################################
|
|
||||||
# End of script
|
|
||||||
########################################################################
|
|
|
@ -1,35 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
getwd()
|
|
||||||
setwd("~/git/LSHTM_analysis/scripts/functions/")
|
|
||||||
getwd()
|
|
||||||
#############################################################
|
|
||||||
#===========================================
|
|
||||||
# load functions, data, dirs, hardocded vars
|
|
||||||
# that will be used in testing the functions
|
|
||||||
#===========================================
|
|
||||||
source("plotting_globals.R")
|
|
||||||
|
|
||||||
drug = "streptomycin"
|
|
||||||
gene = "gid"
|
|
||||||
|
|
||||||
import_dirs(drug_name = drug, gene_name = gene)
|
|
||||||
|
|
||||||
#-------------------------------
|
|
||||||
# test function: plotting_data()
|
|
||||||
#-------------------------------
|
|
||||||
source("plotting_data.R")
|
|
||||||
|
|
||||||
infile_params = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
|
|
||||||
mcsm_comb_data = read.csv(infile_params, header = T)
|
|
||||||
|
|
||||||
pd_df = plotting_data(df = mcsm_comb_data
|
|
||||||
, ligand_dist_colname = 'ligand_distance'
|
|
||||||
, lig_dist_cutoff = 10)
|
|
||||||
|
|
||||||
my_df = pd_df[[1]]
|
|
||||||
my_df_u = pd_df[[2]]
|
|
||||||
my_df_u_lig = pd_df[[3]]
|
|
||||||
dup_muts = pd_df[[4]]
|
|
||||||
########################################################################
|
|
||||||
# End of script
|
|
||||||
########################################################################
|
|
|
@ -5,10 +5,9 @@
|
||||||
#########################################################
|
#########################################################
|
||||||
# working dir and loading libraries
|
# working dir and loading libraries
|
||||||
getwd()
|
getwd()
|
||||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
#setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
getwd()
|
|
||||||
|
|
||||||
source("Header_TT.R")
|
source("/home/tanu/git/LSHTM_analysis/scripts/plotting/Header_TT.R")
|
||||||
|
|
||||||
#********************
|
#********************
|
||||||
# cmd args passed
|
# cmd args passed
|
||||||
|
@ -36,8 +35,7 @@ import_dirs(drug, gene)
|
||||||
#---------------------------
|
#---------------------------
|
||||||
if (!exists("infile_params") && exists("gene")){
|
if (!exists("infile_params") && exists("gene")){
|
||||||
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
|
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
|
||||||
in_filename_params = paste0(tolower(gene), "_all_params.csv") #for pncA (and for gid finally) 10/09/21
|
in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
||||||
#in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
|
|
||||||
infile_params = paste0(outdir, "/", in_filename_params)
|
infile_params = paste0(outdir, "/", in_filename_params)
|
||||||
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
|
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue