import commit

2020-01-08 16:15:33 +00:00 · 2020-01-08 16:15:33 +00:00 · bccfe68192
commit bccfe68192
39 changed files with 6837 additions and 0 deletions
--- a/mcsm_analysis/pyrazinamide/scripts/mcsm_mean_stability.R
+++ b/mcsm_analysis/pyrazinamide/scripts/mcsm_mean_stability.R
@ -0,0 +1,131 @@
+getwd()
+setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting")
+getwd()
+
+########################################################################
+# 				Installing and loading required packages 			   #
+########################################################################
+
+source("../Header_TT.R")
+#source("barplot_colour_function.R")
+require(data.table)
+require(dplyr)
+
+########################################################################
+#		 Read file: call script for combining df for PS		   	   #
+########################################################################
+
+source("../combining_two_df.R")
+
+###########################
+# This will return:
+
+# df with NA:
+# merged_df2 
+# merged_df3
+
+# df without NA:
+# merged_df2_comp
+# merged_df3_comp
+###########################
+
+#---------------------- PAY ATTENTION
+# the above changes the working dir
+#[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts"
+#---------------------- PAY ATTENTION
+
+###########################
+# you need merged_df3 
+# or
+# merged_df3_comp
+# since these have unique SNPs
+# I prefer to use the merged_df3
+# because using the _comp dataset means
+# we lose some muts and at this level, we should use
+# as much info as available
+###########################
+
+# uncomment as necessary
+#%%%%%%%%%%%%%%%%%%%%%%%%
+# REASSIGNMENT
+my_df  = merged_df3 
+#my_df = merged_df3_comp
+#%%%%%%%%%%%%%%%%%%%%%%%%
+
+# delete variables not required
+rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp)
+
+# quick checks
+colnames(my_df)
+str(my_df)
+
+###########################
+# Data for bfactor figure
+# PS average 
+# Lig average
+###########################
+
+head(my_df$Position)
+head(my_df$ratioDUET)
+
+# order data frame 
+df = my_df[order(my_df$Position),]
+
+head(df$Position)
+head(df$ratioDUET)
+
+#***********
+# PS: average by position
+#***********
+
+mean_DUET_by_position <- df %>%
+  group_by(Position) %>%
+  summarize(averaged.DUET = mean(ratioDUET))
+
+#***********
+# Lig: average by position
+#***********
+mean_Lig_by_position <- df %>%
+  group_by(Position) %>%
+  summarize(averaged.Lig = mean(ratioPredAff))
+
+
+#***********
+# cbind:mean_DUET_by_position and mean_Lig_by_position
+#***********
+
+combined = as.data.frame(cbind(mean_DUET_by_position, mean_Lig_by_position ))
+
+# sanity check
+# mean_PS_Lig_Bfactor
+
+colnames(combined)
+
+colnames(combined) = c("Position"
+                       , "average_DUETR"
+                       , "Position2"
+                       , "average_PredAffR")
+
+colnames(combined)
+
+identical(combined$Position, combined$Position2)
+
+n = which(colnames(combined) == "Position2"); n
+
+combined_df = combined[,-n]
+
+max(combined_df$average_DUETR) ; min(combined_df$average_DUETR)
+
+max(combined_df$average_PredAffR) ; min(combined_df$average_PredAffR)
+
+#=============
+# output csv
+#============
+outDir = "~/git/Data/pyrazinamide/input/processed/"
+outFile = paste0(outDir, "mean_PS_Lig_Bfactor.csv")
+print(paste0("Output file with path will be:","", outFile))
+
+head(combined_df$Position); tail(combined_df$Position)
+
+write.csv(combined_df, outFile
+          , row.names = F)