getwd() setwd("~/git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts/plotting") getwd() ######################################################################## # Installing and loading required packages # ######################################################################## source("../Header_TT.R") #source("barplot_colour_function.R") require(data.table) require(dplyr) ######################################################################## # Read file: call script for combining df for PS # ######################################################################## source("../combining_two_df.R") ########################### # This will return: # df with NA: # merged_df2 # merged_df3 # df without NA: # merged_df2_comp # merged_df3_comp ########################### #---------------------- PAY ATTENTION # the above changes the working dir #[1] "git/LSHTM_analysis/mcsm_analysis/pyrazinamide/scripts" #---------------------- PAY ATTENTION ########################### # you need merged_df3 # or # merged_df3_comp # since these have unique SNPs # I prefer to use the merged_df3 # because using the _comp dataset means # we lose some muts and at this level, we should use # as much info as available ########################### # uncomment as necessary #%%%%%%%%%%%%%%%%%%%%%%%% # REASSIGNMENT my_df = merged_df3 #my_df = merged_df3_comp #%%%%%%%%%%%%%%%%%%%%%%%% # delete variables not required rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) # quick checks colnames(my_df) str(my_df) ########################### # Data for bfactor figure # PS average # Lig average ########################### head(my_df$Position) head(my_df$ratioDUET) # order data frame df = my_df[order(my_df$Position),] head(df$Position) head(df$ratioDUET) #*********** # PS: average by position #*********** mean_DUET_by_position <- df %>% group_by(Position) %>% summarize(averaged.DUET = mean(ratioDUET)) #*********** # Lig: average by position #*********** mean_Lig_by_position <- df %>% group_by(Position) %>% summarize(averaged.Lig = mean(ratioPredAff)) #*********** # cbind:mean_DUET_by_position and mean_Lig_by_position #*********** combined = as.data.frame(cbind(mean_DUET_by_position, mean_Lig_by_position )) # sanity check # mean_PS_Lig_Bfactor colnames(combined) colnames(combined) = c("Position" , "average_DUETR" , "Position2" , "average_PredAffR") colnames(combined) identical(combined$Position, combined$Position2) n = which(colnames(combined) == "Position2"); n combined_df = combined[,-n] max(combined_df$average_DUETR) ; min(combined_df$average_DUETR) max(combined_df$average_PredAffR) ; min(combined_df$average_PredAffR) #============= # output csv #============ outDir = "~/git/Data/pyrazinamide/input/processed/" outFile = paste0(outDir, "mean_PS_Lig_Bfactor.csv") print(paste0("Output file with path will be:","", outFile)) head(combined_df$Position); tail(combined_df$Position) write.csv(combined_df, outFile , row.names = F)