tweaking baic bp to make generic
This commit is contained in:
parent
4f60e93abb
commit
d21605b31f
2 changed files with 55 additions and 73 deletions
|
@ -14,7 +14,49 @@ getwd()
|
|||
library(ggplot2)
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
|
||||
# Set globals:
|
||||
source("plotting_globals.R")
|
||||
# pretent cli
|
||||
drug = "streptomycin"
|
||||
gene = "gid"
|
||||
infile = "merged_df3_short.csv"
|
||||
|
||||
import_dirs(drug, gene)
|
||||
|
||||
source("plotting_data.R")
|
||||
plotting_data("merged_df3_short.csv")
|
||||
|
||||
if (!exists("infile") && exists("gene")){
|
||||
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
||||
in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined for gid
|
||||
infile = paste0(outdir, "/", in_filename_params)
|
||||
}
|
||||
|
||||
plotting_data(infile)
|
||||
|
||||
|
||||
#=======================================================================
|
||||
# command line args
|
||||
spec = matrix(c(
|
||||
"drug" , "d", 1, "character",
|
||||
"gene" , "g", 1, "character"
|
||||
), byrow = TRUE, ncol = 4)
|
||||
|
||||
opt = getopt(spec)
|
||||
|
||||
#FIXME: detect if script running from cmd, then set these
|
||||
drug = opt$drug
|
||||
gene = opt$gene
|
||||
|
||||
# hardcoding when not using cmd
|
||||
#drug = "streptomycin"
|
||||
#gene = "gid"
|
||||
|
||||
if(is.null(drug)|is.null(gene)) {
|
||||
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
||||
}
|
||||
#=======================================================================
|
||||
|
||||
# should return the following dfs, directories and variables
|
||||
# my_df
|
||||
|
@ -31,7 +73,7 @@ cat(paste0("Directories imported:"
|
|||
cat(paste0("Variables imported:"
|
||||
, "\ndrug:", drug
|
||||
, "\ngene:", gene
|
||||
, "\ngene_match:", gene_match
|
||||
#, "\ngene_match:", gene_match
|
||||
, "\nLength of upos:", length(upos)
|
||||
, "\nAngstrom symbol:", angstroms_symbol))
|
||||
|
||||
|
|
|
@ -6,52 +6,19 @@
|
|||
#https://stackoverflow.com/questions/38851592/r-append-column-in-a-dataframe-with-frequency-count-based-on-two-columns
|
||||
#########################################################
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
#source("Header_TT.R")
|
||||
library(ggplot2)
|
||||
#getwd()
|
||||
#setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
#getwd()
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
require("getopt", quietly = TRUE) #cmd parse arguments
|
||||
#========================================================
|
||||
# command line args
|
||||
spec = matrix(c(
|
||||
"drug" , "d", 1, "character",
|
||||
"gene" , "g", 1, "character"
|
||||
), byrow = TRUE, ncol = 4)
|
||||
|
||||
opt = getopt(spec)
|
||||
#=========================================================
|
||||
|
||||
#FIXME: detect if script running from cmd, then set these
|
||||
#drug = opt$drug
|
||||
#gene = opt$gene
|
||||
|
||||
# hardcoding when not using cmd
|
||||
drug = "streptomycin"
|
||||
gene = "gid"
|
||||
|
||||
if(is.null(drug)|is.null(gene)) {
|
||||
stop("Missing arguments: --drug and --gene must both be specified (case-sensitive)")
|
||||
}
|
||||
#========================================================
|
||||
# Load functions
|
||||
# import dir structure
|
||||
source("dirs.R")
|
||||
import_dirs(drug, gene)
|
||||
#=======================================================
|
||||
|
||||
#======
|
||||
# input
|
||||
#======
|
||||
#in_filename = "mcsm_complex1_normalised.csv"
|
||||
#in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
||||
in_filename_params = paste0(tolower(gene), "_comb_stab_struc_params.csv") # part combined
|
||||
infile_params = paste0(outdir, "/", in_filename_params)
|
||||
cat(paste0("Input file 1:", infile_params) )
|
||||
plotting_data <- function(infile_params) {
|
||||
|
||||
cat(paste0("Input file 1:", infile_params, '\n') )
|
||||
|
||||
# These globals are created by import_dirs()
|
||||
cat('columns based on variables:\n'
|
||||
, drug
|
||||
, '\n'
|
||||
|
@ -66,7 +33,7 @@ cat('columns based on variables:\n'
|
|||
###########################
|
||||
# Read file: struct params
|
||||
###########################
|
||||
cat("Reading struct params including mcsm:", in_filename_params)
|
||||
#cat("Reading struct params including mcsm:", in_filename_params)
|
||||
|
||||
my_df = read.csv(infile_params, header = T)
|
||||
|
||||
|
@ -146,41 +113,14 @@ cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
|||
###########################
|
||||
# extract mutations <10Angstroms and symbols
|
||||
###########################
|
||||
table(my_df_u$ligand_distance<10)
|
||||
table(my_df_u$ligand_distance<mcsm_lig_cutoff)
|
||||
|
||||
my_df_u_lig = my_df_u[my_df_u$ligand_distance <10,]
|
||||
my_df_u_lig = my_df_u[my_df_u$ligand_distance <mcsm_lig_cutoff,]
|
||||
|
||||
|
||||
#==================
|
||||
# Angstroms symbol
|
||||
#==================
|
||||
|
||||
angstroms_symbol = "\u212b"
|
||||
cat(paste0("There are ", nrow(my_df_u_lig), " sites lying within 10", angstroms_symbol, " of the ligand\n"))
|
||||
|
||||
#==================
|
||||
# Delta symbol
|
||||
#==================
|
||||
|
||||
delta_symbol = "\u0394"; delta_symbol
|
||||
|
||||
###########################
|
||||
# variables for my cols
|
||||
###########################
|
||||
|
||||
mcsm_red2 = "#ae301e" # most negative
|
||||
mcsm_red1 = "#f8766d"
|
||||
|
||||
mcsm_mid = "white" # middle
|
||||
|
||||
mcsm_blue1 = "#00bfc4"
|
||||
mcsm_blue2 = "#007d85" # most positive
|
||||
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
########################################################################
|
||||
# clear variables
|
||||
rm(opt, spec)
|
||||
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue