ml df stuff

This commit is contained in:
Tanushree Tunstall 2022-09-01 11:39:11 +01:00
parent c2b46286d8
commit 82e2da4f3b
4 changed files with 815 additions and 26 deletions

View file

@ -0,0 +1,103 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Get formatted data for plots
#########################################################
# working dir and loading libraries
getwd()
source("~/git/LSHTM_analysis/scripts/Header_TT.R")
source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
# cmd args passed
# in from other scripts
# to call this
# set drug and gene name
#==========================================
# variables for affinity:
# comes from functions/plotting_globals.R
#==========================================
cat("\nGlobal variables for Ligand:"
, "\nligand distance colname:", LigDist_colname
, "\nligand distance cut off:", LigDist_cutoff)
cat("\nGlobal variables for mCSM-PPI2 affinity:"
, "\nPPI2 distance colname:", ppi2Dist_colname
, "\nPPI2 cut off:", DistCutOff)
cat("\nGlobal variables for mCSM-NA affinity:"
, "\nligand distance colname:", naDist_colname
, "\nligand distance cut off:", DistCutOff)
#===========
# input
#===========
#--------------------------------------------
# call: import_dirs()
# comes from functions/plotting_globals.R
#--------------------------------------------
import_dirs(drug, gene)
#---------------------------
# call: plotting_data()
#---------------------------
if (!exists("infile_params") && exists("gene")){
#if (!is.character(infile_params) && exists("gene")){ # when running as cmd
in_filename_params = paste0(tolower(gene), "_all_params.csv")
infile_params = paste0(outdir, "/", in_filename_params)
cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
}
# Input 1:
cat("\nReading mcsm combined data file: ", infile_params)
mcsm_df = read.csv(infile_params, header = T)
if (tolower(gene)%in%c("rpob")){
mcsm_df = mcsm_df[mcsm_df$position!=1148,]
}
pd_df = plotting_data(mcsm_df
, gene = gene # ADDED
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
my_df = pd_df[[1]]
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
max_ang <- round(max(my_df_u[[LigDist_colname]]))
min_ang <- round(min(my_df_u[[LigDist_colname]]))
cat("\nLigand distance colname:", LigDist_colname
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
, "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
#--------------------------------
# call: combining_dfs_plotting()
#--------------------------------
if (!exists("infile_metadata") && exists("gene")){
#if (!is.character(infile_metadata) && exists("gene")){ # when running as cmd
in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
infile_metadata = paste0(outdir, "/", in_filename_metadata)
cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
}
# Input 2: read <gene>_meta data.csv
cat("\nReading meta data file: ", infile_metadata)
gene_metadata <- read.csv(infile_metadata
, stringsAsFactors = F
, header = T)
cat("\nDim of meta data file: ", dim(gene_metadata))
all_plot_dfs = combining_dfs_plotting(my_df_u
, gene_metadata
#, gene = gene # ADDED
, lig_dist_colname = ''
, lig_dist_cutoff = ''
, plotting = FALSE)
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
#merged_df2_comp = all_plot_dfs[[3]]
#merged_df3_comp = all_plot_dfs[[4]]