131 lines
3 KiB
R
131 lines
3 KiB
R
getwd()
|
|
setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting")
|
|
getwd()
|
|
|
|
########################################################################
|
|
# Installing and loading required packages #
|
|
########################################################################
|
|
|
|
source("../Header_TT.R")
|
|
#source("barplot_colour_function.R")
|
|
require(data.table)
|
|
require(dplyr)
|
|
|
|
########################################################################
|
|
# Read file: call script for combining df for PS #
|
|
########################################################################
|
|
|
|
source("../combining_two_df.R")
|
|
|
|
###########################
|
|
# This will return:
|
|
|
|
# df with NA:
|
|
# merged_df2
|
|
# merged_df3
|
|
|
|
# df without NA:
|
|
# merged_df2_comp
|
|
# merged_df3_comp
|
|
###########################
|
|
|
|
#---------------------- PAY ATTENTION
|
|
# the above changes the working dir
|
|
#[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts"
|
|
#---------------------- PAY ATTENTION
|
|
|
|
###########################
|
|
# you need merged_df3
|
|
# or
|
|
# merged_df3_comp
|
|
# since these have unique SNPs
|
|
# I prefer to use the merged_df3
|
|
# because using the _comp dataset means
|
|
# we lose some muts and at this level, we should use
|
|
# as much info as available
|
|
###########################
|
|
|
|
# uncomment as necessary
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
# REASSIGNMENT
|
|
my_df = merged_df3
|
|
#my_df = merged_df3_comp
|
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
# delete variables not required
|
|
rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp)
|
|
|
|
# quick checks
|
|
colnames(my_df)
|
|
str(my_df)
|
|
|
|
###########################
|
|
# Data for bfactor figure
|
|
# PS average
|
|
# Lig average
|
|
###########################
|
|
|
|
head(my_df$Position)
|
|
head(my_df$ratioDUET)
|
|
|
|
# order data frame
|
|
df = my_df[order(my_df$Position),]
|
|
|
|
head(df$Position)
|
|
head(df$ratioDUET)
|
|
|
|
#***********
|
|
# PS: average by position
|
|
#***********
|
|
|
|
mean_DUET_by_position <- df %>%
|
|
group_by(Position) %>%
|
|
summarize(averaged.DUET = mean(ratioDUET))
|
|
|
|
#***********
|
|
# Lig: average by position
|
|
#***********
|
|
mean_Lig_by_position <- df %>%
|
|
group_by(Position) %>%
|
|
summarize(averaged.Lig = mean(ratioPredAff))
|
|
|
|
|
|
#***********
|
|
# cbind:mean_DUET_by_position and mean_Lig_by_position
|
|
#***********
|
|
|
|
combined = as.data.frame(cbind(mean_DUET_by_position, mean_Lig_by_position ))
|
|
|
|
# sanity check
|
|
# mean_PS_Lig_Bfactor
|
|
|
|
colnames(combined)
|
|
|
|
colnames(combined) = c("Position"
|
|
, "average_DUETR"
|
|
, "Position2"
|
|
, "average_PredAffR")
|
|
|
|
colnames(combined)
|
|
|
|
identical(combined$Position, combined$Position2)
|
|
|
|
n = which(colnames(combined) == "Position2"); n
|
|
|
|
combined_df = combined[,-n]
|
|
|
|
max(combined_df$average_DUETR) ; min(combined_df$average_DUETR)
|
|
|
|
max(combined_df$average_PredAffR) ; min(combined_df$average_PredAffR)
|
|
|
|
#=============
|
|
# output csv
|
|
#============
|
|
outDir = "~/git/Data/pyrazinamide/input/processed/"
|
|
outFile = paste0(outDir, "mean_PS_Lig_Bfactor.csv")
|
|
print(paste0("Output file with path will be:","", outFile))
|
|
|
|
head(combined_df$Position); tail(combined_df$Position)
|
|
|
|
write.csv(combined_df, outFile
|
|
, row.names = F)
|