getwd() setwd("~/git/LSHTM_analysis/scripts/plotting") getwd() ######################################################### # TASK: ######################################################### source("Header_TT.R") require(data.table) require(dplyr) source("plotting_data.R") # should return #my_df #my_df_u #dup_muts #======================================================== # Read file: call script for combining df for PS #source("../combining_two_df.R") #======================================================== #%% variable assignment: input and output paths & filenames drug = "pyrazinamide" gene = "pncA" gene_match = paste0(gene,"_p.") cat(gene_match) #============= # directories #============= datadir = paste0("~/git/Data") indir = paste0(datadir, "/", drug, "/input") outdir = paste0("~/git/Data", "/", drug, "/output") #====== # input #====== #in_filename = "mcsm_complex1_normalised.csv" in_filename_params = paste0(tolower(gene), "_all_params.csv") infile_params = paste0(outdir, "/", in_filename_params) cat(paste0("Input file 1:", infile_params) ) #======= # output #======= out_filename_mean_stability = paste0(tolower(gene), "_mean_stability.csv") outfile_mean_stability = paste0(outdir, "/", out_filename_mean_stability) print(paste0("Output file:", outfile_mean_stability)) #%%=============================================================== #================ # Data for plots #================ # REASSIGNMENT as necessary df = my_df_u rm(my_df) ########################### # Data for bfactor figure # PS average # Lig average ########################### head(df$position); head(df$mutationinformation) head(df$duet_scaled) # order data frame #df = df[order(df$position),] #already done head(df$position); head(df$mutationinformation) head(df$duet_scaled) #*********** # PS: average by position #*********** mean_duet_by_position <- df %>% group_by(position) %>% summarize(averaged.duet = mean(duet_scaled)) #*********** # Lig: average by position #*********** mean_affinity_by_position <- df %>% group_by(position) %>% summarize(averaged.affinity = mean(affinity_scaled)) #*********** # cbind:mean_duet_by_position and mean_affinity_by_position #*********** combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position )) # sanity check # mean_PS_affinity_Bfactor colnames(combined) colnames(combined) = c("position" , "average_duet_scaled" , "position2" , "average_affinity_scaled") colnames(combined) identical(combined$position, combined$position2) n = which(colnames(combined) == "position2"); n combined_df = combined[,-n] max(combined_df$average_duet_scaled) ; min(combined_df$average_duet_scaled) max(combined_df$average_affinity_scaled) ; min(combined_df$average_affinity_scaled) head(combined_df$position); tail(combined_df$position) #%%============================================================ # output write.csv(combined_df, outfile_mean_stability , row.names = F) cat("Finished writing file:\n" , outfile_mean_stability , "\nNo. of rows:", nrow(combined_df) , "\nNo. of cols:", ncol(combined_df)) # end of script #===============================================================