#!/usr/bin/env Rscript ######################################################### # TASK: Script to format data for dm om plots: # generating WF and LF data for each of the parameters: # duet, mcsm-lig, foldx, deepddg, dynamut2, mcsm-na, mcsm-ppi2, encom, dynamut..etc # Called by get_plotting_dfs.R # dm_om_wf_lf_data() # INPUT: # df: merged_df3 (data with all parameters) # NOTE*: merged_df2 will not be appropriate as it brings up most params as significant!?,atleast for gid # gene: [conditional generation of dfs like mcsm-NA, mcsm-ppi2 as not all genes have all these values] # colnames_to_extract : columns to extract, either user-specified. #By default it is c("mutationinformation" , "duet_affinity_change...") # ligand_dist_colname : column name containing ligand distance. By deafult, it is LigDist_colname (imported from globals) # dr_muts : dr_muts_col (imported from globals; dr_mutations_) # other_muts : other_muts_col (imported from globals ...other_mutations_) # snp_colname : SNP column name. By default it is "mutationinformation" # aa_pos_colname : Column name containing the aa position. This is used to sort the df by. # mut_colname : Column name containing snp info in format ". By default, it is "mutation" # mut_info_colname : Column name containing mutation info whether it is DM or OM. By default, it is "mutation_info" # mut_info_label_colname : Column containing pre-formatted labels for mutation info. # For my use case, this is called "mutation_info_labels" # This column has short labels like DM and OM coresponding to dr_muts and other_muts. # NOTE*: if this is left empty, then the arg ('dr_other_muts_labels') will be used # dr_other_muts_labels : User specified labels, must correspond to dr_muts and other_muts. # NOTE*: Only used if the arg (mut_info_label_colname) is empty! # categ_cols_to_factor : Column names to convert to factors. These mainly correspond to the outcome columns associated with the # arg ('colnames_to_extract'). These have the suffix "_outcome" in their colnames. Additionally column 'mutation_info' is also # converted to factor. By default, it converts the cols with '_outcome'and 'info' to factor. # Users are able to provide a vector of their corresponding column names # RETURNS: List # WF nd LF data grouped by mutation_info i.e DM (drug mutations) and OM (other mutations) # TO DO: SHINY #1) df to choose (merged_df3 or merged_df2) #2) ################################################################## DistCutOff = 10 LigDist_colname # = "ligand_distance" # from globals ppi2Dist_colname = "interface_dist" naDist_colname = "TBC" dm_om_wf_lf_data <- function(df , gene_name = gene # from globals , colnames_to_extract , ligand_dist_colname = LigDist_colname # from globals , dr_muts = dr_muts_col # from globals , other_muts = other_muts_col # from globals , snp_colname = "mutationinformation" , aa_pos_colname = "position" # to sort df by , mut_colname = "mutation" , mut_info_colname = "mutation_info" , mut_info_label_colname = "mutation_info_labels" # if empty, below used , dr_other_muts_labels = c("DM", "OM") # only used if ^^ = "" , categ_cols_to_factor){ df = as.data.frame(df) df['sensitivity'] = ifelse(df['dst_mode'] == 1, "R", "S") table(df['sensitivity']) df[[mut_info_label_colname]] = ifelse(df[[mut_info_label_colname]] == "DM", "R", "S") table(df[[mut_info_label_colname]]) # Initialise the required dfs based on gene name geneL_normal = c("pnca") #geneL_na_dy = c("gid") geneL_na = c("gid", "rpob") geneL_dy = c("gid") geneL_ppi2 = c("alr", "embb", "katg", "rpob") # common_dfs common_dfsL = list( wf_duet = data.frame() , lf_duet = data.frame() , wf_mcsm_lig = data.frame() , lf_mcsm_lig = data.frame() , wf_foldx = data.frame() , lf_foldx = data.frame() , wf_deepddg = data.frame() , lf_deepddg = data.frame() , wf_dynamut2 = data.frame() , lf_dynamut2 = data.frame() , wf_consurf = data.frame() , lf_consurf = data.frame() , wf_snap2 = data.frame() , lf_snap2 = data.frame() ) # additional dfs if (tolower(gene_name)%in%geneL_normal){ wf_lf_dataL = common_dfsL } if (tolower(gene_name)%in%geneL_na){ additional_dfL = list( wf_mcsm_na = data.frame() , lf_mcsm_na = data.frame() ) wf_lf_dataL = c(common_dfsL, additional_dfL) } if (tolower(gene_name)%in%geneL_ppi2){ additional_dfL = list( wf_mcsm_ppi2 = data.frame() , lf_mcsm_ppi2 = data.frame() ) wf_lf_dataL = c(common_dfsL, additional_dfL) } if (tolower(gene_name)%in%geneL_dy){ additional_dfL = list( wf_mcsm_na = data.frame() , lf_mcsm_na = data.frame() , wf_dynamut = data.frame() , lf_dynamut = data.frame() , wf_encomddg = data.frame() , lf_encomddg = data.frame() , wf_encomdds = data.frame() , lf_encomdds = data.frame() , wf_sdm = data.frame() , lf_sdm = data.frame() , wf_mcsm = data.frame() , lf_mcsm = data.frame() ) wf_lf_dataL = c(common_dfsL, additional_dfL) } cat("\nInitializing an empty list of length:" , length(wf_lf_dataL)) #======================================================================= if (missing(colnames_to_extract)){ colnames_to_extract = c(snp_colname , mut_colname, mut_info_colname, mut_info_label_colname , aa_pos_colname , LigDist_colname , ppi2Dist_colname, naDist_colname , "duet_stability_change" , "duet_scaled" , "duet_outcome" , "ligand_affinity_change", "affinity_scaled" , "ligand_outcome" , "ddg_foldx" , "foldx_scaled" , "foldx_outcome" , "deepddg" , "deepddg_scaled" , "deepddg_outcome" , "asa" , "rsa" , "rd_values" , "kd_values" , "log10_or_mychisq" , "neglog_pval_fisher" , "af" , "ddg_dynamut2" , "ddg_dynamut2_scaled", "ddg_dynamut2_outcome" , "mcsm_ppi2_affinity" , "mcsm_ppi2_scaled" , "mcsm_ppi2_outcome" , "consurf_score" , "consurf_scaled" #, "consurf_outcome" , "snap2_score" , "snap2_scaled" , "snap2_outcome" , "mcsm_na_affinity" , "mcsm_na_scaled" , "mcsm_na_outcome" , "ddg_dynamut" , "ddg_dynamut_scaled" , "ddg_dynamut_outcome" , "ddg_encom" , "ddg_encom_scaled" , "ddg_encom_outcome" , "dds_encom" , "dds_encom_scaled" , "dds_encom_outcome" , "ddg_mcsm" , "ddg_mcsm_scaled" , "ddg_mcsm_outcome" , "ddg_sdm" , "ddg_sdm_scaled" , "ddg_sdm_outcome" , "ddg_duet" , "ddg_duet_scaled" , "ddg_duet_outcome") }else{ colnames_to_extract = c(mut_colname, mut_info_colname, mut_info_label_colname , aa_pos_colname, LigDist_colname , colnames_to_extract) } comb_df = df[, colnames(df)%in%colnames_to_extract] comb_df_s = dplyr::arrange(comb_df, aa_pos_colname) #======================================================================= if(missing(categ_cols_to_factor)){ categ_cols_to_factor = grep( "_outcome|_info", colnames(comb_df_s) ) }else{ categ_cols_to_factor = categ_cols_to_factor } #fact_cols = colnames(comb_df_s)[grepl( "_outcome|_info", colnames(comb_df_s) )] fact_cols = colnames(comb_df_s)[categ_cols_to_factor] if (any(lapply(comb_df_s[, fact_cols], class) == "character")){ cat("\nChanging", length(categ_cols_to_factor), "cols to factor") comb_df_s[, fact_cols] <- lapply(comb_df_s[, fact_cols], as.factor) if (all(lapply(comb_df_s[, fact_cols], class) == "factor")){ cat("\nSuccessful: cols changed to factor") } }else{ cat("\nRequested cols aready factors") } #======================================================================= table(comb_df_s[[mut_info_colname]]) # further checks to make sure dr and other muts are indeed unique dr_muts = comb_df_s[comb_df_s[[mut_info_colname]] == dr_muts,] dr_muts_names = unique(dr_muts$mutation) other_muts = comb_df_s[comb_df_s[[mut_info_colname]] == other_muts,] other_muts_names = unique(other_muts$mutation) if ( table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) && table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names) ){ cat("PASS: dr and other muts are indeed unique") }else{ cat("FAIL: dr and others muts are NOT unique!") quit() } # pretty display names i.e. labels to reduce major code duplication later foo_cnames = data.frame(colnames(comb_df_s)) names(foo_cnames) <- "old_name" stability_suffix <- paste0(delta_symbol, delta_symbol, "G") flexibility_suffix <- paste0(delta_symbol, delta_symbol, "S") lig_dn = paste0("Ligand distance (", angstroms_symbol, ")"); lig_dn mcsm_lig_dn = paste0("Ligand affinity (log fold change)"); mcsm_lig_dn duet_dn = paste0("DUET ", stability_suffix); duet_dn foldx_dn = paste0("FoldX ", stability_suffix); foldx_dn deepddg_dn = paste0("Deepddg " , stability_suffix); deepddg_dn dynamut2_dn = paste0("Dynamut2 " , stability_suffix); dynamut2_dn mcsm_na_dn = paste0("mCSM-NA affinity ", stability_suffix); mcsm_na_dn mcsm_ppi2_dn = paste0("mCSM-PPI2 affinity ", stability_suffix); mcsm_ppi2_dn consurf_dn = paste0("Consurf"); consurf_dn snap2_dn = paste0("SNAP2"); snap2_dn dynamut_dn = paste0("Dynamut ", stability_suffix); dynamut_dn encom_ddg_dn = paste0("EnCOM " , stability_suffix); encom_ddg_dn encom_dds_dn = paste0("EnCOM " , flexibility_suffix ); encom_dds_dn sdm_dn = paste0("SDM " , stability_suffix); sdm_dn mcsm_dn = paste0("mCSM " , stability_suffix ); mcsm_dn # change column names: plyr new_colnames = c(asa = "ASA" , rsa = "RSA" , rd_values = "RD" , kd_values = "KD" , log10_or_mychisq = "Log10 (OR)" , neglog_pval_fisher = "-Log (P)" , af = "MAF" #, ligand_dist_colname = lig_dn # cannot handle variable name 'ligand_dist_colname' , affinity_scaled = mcsm_lig_dn , duet_scaled = duet_dn , foldx_scaled = foldx_dn , deepddg_scaled = deepddg_dn , ddg_dynamut2_scaled = dynamut2_dn , mcsm_na_scaled = mcsm_na_dn , mcsm_ppi2_affinity = mcsm_ppi2_dn , consurf_score = consurf_dn , snap2_score = snap2_dn , ddg_dynamut_scaled = dynamut_dn , ddg_encom_scaled = encom_ddg_dn , dds_encom_scaled = encom_dds_dn , ddg_sdm = sdm_dn , ddg_mcsm = mcsm_dn) comb_df_sl1 = plyr::rename(comb_df_s , replace = new_colnames , warn_missing = T , warn_duplicated = T) # renaming colname using variable i.e ligand_dist_colname: dplyr comb_df_sl = comb_df_sl1 %>% dplyr::rename(!!lig_dn := all_of(ligand_dist_colname)) names(comb_df_sl) ##################################################################### if (mut_info_label_colname == "") { cat("\nAssigning labels:", dr_other_muts_labels, "--> to column:", mut_info_colname) table(comb_df_sl[[mut_info_colname]]) # dr_muts levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==dr_muts] <- dr_other_muts_labels[[1]] # other_muts levels(comb_df_sl[[mut_info_colname]])[levels(comb_df_sl[[mut_info_colname]])==other_muts] <- dr_other_muts_labels[[2]] table(comb_df_sl[[mut_info_colname]]) static_cols1 = mut_info_colname }else{ table(comb_df_sl[[mut_info_label_colname]]) static_cols1 = mut_info_label_colname } ####################################################################### #====================== # Selecting dfs # with appropriate cols #======================= static_cols_start = c(snp_colname , aa_pos_colname , mut_colname , static_cols1) # ordering is important! static_cols_end = c(lig_dn , "ASA" , "RSA" , "RD" , "KD" , "MAF" , "Log10 (OR)" , "-Log (P)") ######################################################################### #============== # DUET #============== # WF data: duet cols_to_select_duet = c(static_cols_start, c("duet_outcome", duet_dn), static_cols_end) wf_duet = comb_df_sl[, cols_to_select_duet] #pivot_cols_ps = cols_to_select_ps[1:5]; pivot_cols_ps pivot_cols_duet = cols_to_select_duet[1: (length(static_cols_start) + 1)]; pivot_cols_duet expected_rows_lf = nrow(wf_duet) * (length(wf_duet) - length(pivot_cols_duet)) expected_rows_lf # LF data: duet lf_duet = gather(wf_duet , key = param_type , value = param_value , all_of(duet_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_duet) == expected_rows_lf){ cat("\nPASS: long format data created for ", duet_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_duet']] = wf_duet wf_lf_dataL[['lf_duet']] = lf_duet ############################################################################ #============== # mCSM-lig #============== # WF data: mcsm_lig cols_to_select_mcsm_lig = c(static_cols_start, c("ligand_outcome", mcsm_lig_dn), static_cols_end) wf_mcsm_lig = comb_df_sl[, cols_to_select_mcsm_lig] pivot_cols_mcsm_lig = cols_to_select_mcsm_lig[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_lig expected_rows_lf = nrow(wf_mcsm_lig) * (length(wf_mcsm_lig) - length(pivot_cols_mcsm_lig)) expected_rows_lf # LF data: mcsm_lig lf_mcsm_lig = gather(wf_mcsm_lig , key = param_type , value = param_value , all_of(mcsm_lig_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_mcsm_lig) == expected_rows_lf){ cat("\nPASS: long format data created for ", mcsm_lig_dn) }else{ cat("\nFAIL: long format data could not be created for mcsm_lig") quit() } # Assign them to the output list wf_lf_dataL[['wf_mcsm_lig']] = wf_mcsm_lig wf_lf_dataL[['lf_mcsm_lig']] = lf_mcsm_lig ############################################################################ #============== # FoldX #============== # WF data: Foldx cols_to_select_foldx= c(static_cols_start, c("foldx_outcome", foldx_dn), static_cols_end) wf_foldx = comb_df_sl[, cols_to_select_foldx] pivot_cols_foldx = cols_to_select_foldx[1: (length(static_cols_start) + 1)]; pivot_cols_foldx expected_rows_lf = nrow(wf_foldx) * (length(wf_foldx) - length(pivot_cols_foldx)) expected_rows_lf # LF data: Foldx lf_foldx = gather(wf_foldx , key = param_type , value = param_value , all_of(foldx_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_foldx) == expected_rows_lf){ cat("\nPASS: long format data created for ", foldx_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_foldx']] = wf_foldx wf_lf_dataL[['lf_foldx']] = lf_foldx ############################################################################ #============== # Deepddg #============== # WF data: deepddg cols_to_select_deepddg = c(static_cols_start, c("deepddg_outcome", deepddg_dn), static_cols_end) wf_deepddg = comb_df_sl[, cols_to_select_deepddg] pivot_cols_deepddg = cols_to_select_deepddg[1: (length(static_cols_start) + 1)]; pivot_cols_deepddg expected_rows_lf = nrow(wf_deepddg) * (length(wf_deepddg) - length(pivot_cols_deepddg)) expected_rows_lf # LF data: Deepddg lf_deepddg = gather(wf_deepddg , key = param_type , value = param_value , all_of(deepddg_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_deepddg) == expected_rows_lf){ cat("\nPASS: long format data created for ", deepddg_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_deepddg']] = wf_deepddg wf_lf_dataL[['lf_deepddg']] = lf_deepddg ############################################################################ #============== # Dynamut2: LF #============== # WF data: dynamut2 cols_to_select_dynamut2 = c(static_cols_start, c("ddg_dynamut2_outcome", dynamut2_dn), static_cols_end) wf_dynamut2 = comb_df_sl[, cols_to_select_dynamut2] pivot_cols_dynamut2 = cols_to_select_dynamut2[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut2 expected_rows_lf = nrow(wf_dynamut2) * (length(wf_dynamut2) - length(pivot_cols_dynamut2)) expected_rows_lf # LF data: dynamut2 lf_dynamut2 = gather(wf_dynamut2 , key = param_type , value = param_value , all_of(dynamut2_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_dynamut2) == expected_rows_lf){ cat("\nPASS: long format data created for ", dynamut2_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_dynamut2']] = wf_dynamut2 wf_lf_dataL[['lf_dynamut2']] = lf_dynamut2 ############################################################################ #================== # Consurf: LF #https://consurf.tau.ac.il/overview.php # consurf_score: # <0 (below average): slowly evolving i.e CONSERVED # >0 (above average): rapidly evolving, i.e VARIABLE #table(df$consurf_colour_rev) # TODO #1--> "most_variable", 2--> "", 3-->"", 4-->"" #5-->"", 6-->"", 7-->"", 8-->"", 9-->"most_conserved" #==================== # FIXME: if you add category column to consurf #cols_to_select_consurf = c(static_cols_start, c("consurf_outcome", consurf_dn), static_cols_end) #wf_consurf = comb_df_sl[, cols_to_select_consurf] #pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start) + 1)]; pivot_cols_consurf # WF data: consurf cols_to_select_consurf = c(static_cols_start, c(consurf_dn), static_cols_end) wf_consurf = comb_df_sl[, cols_to_select_consurf] pivot_cols_consurf = cols_to_select_consurf[1: (length(static_cols_start))]; pivot_cols_consurf expected_rows_lf = nrow(wf_consurf) * (length(wf_consurf) - length(pivot_cols_consurf)) expected_rows_lf # LF data: consurf lf_consurf = gather(wf_consurf , key = param_type , value = param_value , all_of(consurf_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_consurf) == expected_rows_lf){ cat("\nPASS: long format data created for ", consurf_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_consurf']] = wf_consurf wf_lf_dataL[['lf_consurf']] = lf_consurf ########################################################################### #============== # SNAP2: LF #============== # WF data: snap2 cols_to_select_snap2 = c(static_cols_start, c("snap2_outcome", snap2_dn), static_cols_end) wf_snap2 = comb_df_sl[, cols_to_select_snap2] pivot_cols_snap2 = cols_to_select_snap2[1: (length(static_cols_start) + 1)]; pivot_cols_snap2 expected_rows_lf = nrow(wf_snap2) * (length(wf_snap2) - length(pivot_cols_snap2)) expected_rows_lf # LF data: snap2 lf_snap2 = gather(wf_snap2 , key = param_type , value = param_value , all_of(snap2_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_snap2) == expected_rows_lf){ cat("\nPASS: long format data created for ", snap2_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_snap2']] = wf_snap2 wf_lf_dataL[['lf_snap2']] = lf_snap2 ############################################################################ if (tolower(gene_name)%in%geneL_na){ #============== # mCSM-NA: LF #============== # WF data: mcsm-na cols_to_select_mcsm_na = c(static_cols_start, c("mcsm_na_outcome", mcsm_na_dn), static_cols_end) wf_mcsm_na = comb_df_sl[, cols_to_select_mcsm_na] pivot_cols_mcsm_na = cols_to_select_mcsm_na[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_na expected_rows_lf = nrow(wf_mcsm_na) * (length(wf_mcsm_na) - length(pivot_cols_mcsm_na)) expected_rows_lf # LF data: mcsm-na lf_mcsm_na = gather(wf_mcsm_na , key = param_type , value = param_value , all_of(mcsm_na_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_mcsm_na) == expected_rows_lf){ cat("\nPASS: long format data created for ", mcsm_na_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_mcsm_na']] = wf_mcsm_na wf_lf_dataL[['lf_mcsm_na']] = lf_mcsm_na } #------------------------------------------------------------------- if (tolower(gene_name)%in%geneL_ppi2){ #============== # mCSM-PPI2: LF #============== # WF data: mcsm-ppi2 cols_to_select_mcsm_ppi2 = c(static_cols_start, c("mcsm_ppi2_outcome", mcsm_ppi2_dn), static_cols_end) wf_mcsm_ppi2 = comb_df_sl[, cols_to_select_mcsm_ppi2] pivot_cols_mcsm_ppi2 = cols_to_select_mcsm_ppi2[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm_ppi2 expected_rows_lf = nrow(wf_mcsm_ppi2) * (length(wf_mcsm_ppi2) - length(pivot_cols_mcsm_ppi2)) expected_rows_lf # LF data: mcsm-ppi2 lf_mcsm_ppi2 = gather(wf_mcsm_ppi2 , key = param_type , value = param_value , all_of(mcsm_ppi2_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_mcsm_ppi2) == expected_rows_lf){ cat("\nPASS: long format data created for ", mcsm_ppi2_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_mcsm_ppi2']] = wf_mcsm_ppi2 wf_lf_dataL[['lf_mcsm_ppi2']] = lf_mcsm_ppi2 } #------------------------------------------------------------------- if (tolower(gene_name)%in%geneL_dy){ #============== # Dynamut: LF #============== # WF data: dynamut cols_to_select_dynamut = c(static_cols_start, c("ddg_dynamut_outcome", dynamut_dn), static_cols_end) wf_dynamut = comb_df_sl[, cols_to_select_dynamut] pivot_cols_dynamut = cols_to_select_dynamut[1: (length(static_cols_start) + 1)]; pivot_cols_dynamut expected_rows_lf = nrow(wf_dynamut) * (length(wf_dynamut) - length(pivot_cols_dynamut)) expected_rows_lf # LF data: dynamut lf_dynamut = gather(wf_dynamut , key = param_type , value = param_value , all_of(dynamut_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_dynamut) == expected_rows_lf){ cat("\nPASS: long format data created for ", dynamut_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_dynamut']] = wf_dynamut wf_lf_dataL[['lf_dynamut']] = lf_dynamut #------------------------------------------------------------------------- #============== # EnCOM ddg: LF #============== # WF data: encomddg cols_to_select_encomddg = c(static_cols_start, c("ddg_encom_outcome", encom_ddg_dn), static_cols_end) wf_encomddg = comb_df_sl[, cols_to_select_encomddg] pivot_cols_encomddg = cols_to_select_encomddg[1: (length(static_cols_start) + 1)]; pivot_cols_encomddg expected_rows_lf = nrow(wf_encomddg ) * (length(wf_encomddg ) - length(pivot_cols_encomddg)) expected_rows_lf # LF data: encomddg lf_encomddg = gather(wf_encomddg , key = param_type , value = param_value , all_of(encom_ddg_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_encomddg) == expected_rows_lf){ cat("\nPASS: long format data created for ", encom_ddg_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_encomddg']] = wf_encomddg wf_lf_dataL[['lf_encomddg']] = lf_encomddg #------------------------------------------------------------------------- #============== # EnCOM dds: LF #============== # WF data: encomdds cols_to_select_encomdds = c(static_cols_start, c("dds_encom_outcome", encom_dds_dn), static_cols_end) wf_encomdds = comb_df_sl[, cols_to_select_encomdds] pivot_cols_encomdds = cols_to_select_encomdds[1: (length(static_cols_start) + 1)]; pivot_cols_encomdds expected_rows_lf = nrow(wf_encomdds) * (length(wf_encomdds) - length(pivot_cols_encomdds)) expected_rows_lf # LF data: encomdds lf_encomdds = gather(wf_encomdds , key = param_type , value = param_value , all_of(encom_dds_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_encomdds) == expected_rows_lf){ cat("\nPASS: long format data created for", encom_dds_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_encomdds']] = wf_encomdds wf_lf_dataL[['lf_encomdds']] = lf_encomdds #------------------------------------------------------------------------- #============== # SDM: LF #============== # WF data: sdm cols_to_select_sdm = c(static_cols_start, c("ddg_sdm_outcome", sdm_dn), static_cols_end) wf_sdm = comb_df_sl[, cols_to_select_sdm] pivot_cols_sdm = cols_to_select_sdm[1: (length(static_cols_start) + 1)]; pivot_cols_sdm expected_rows_lf = nrow(wf_sdm) * (length(wf_sdm) - length(pivot_cols_sdm)) expected_rows_lf # LF data: sdm lf_sdm = gather(wf_sdm , key = param_type , value = param_value , all_of(sdm_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_sdm) == expected_rows_lf){ cat("\nPASS: long format data created for", sdm_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_sdm']] = wf_sdm wf_lf_dataL[['lf_sdm']] = lf_sdm #------------------------------------------------------------------------- #============== # mCSM: LF #============== # WF data: mcsm cols_to_select_mcsm = c(static_cols_start, c("ddg_mcsm_outcome", mcsm_dn), static_cols_end) wf_mcsm = comb_df_sl[, cols_to_select_mcsm] pivot_cols_mcsm = cols_to_select_mcsm[1: (length(static_cols_start) + 1)]; pivot_cols_mcsm expected_rows_lf = nrow(wf_mcsm) * (length(wf_mcsm) - length(pivot_cols_mcsm)) expected_rows_lf # LF data: mcsm lf_mcsm = gather(wf_mcsm , key = param_type , value = param_value , all_of(mcsm_dn):tail(static_cols_end,1) , factor_key = TRUE) if (nrow(lf_mcsm) == expected_rows_lf){ cat("\nPASS: long format data created for", mcsm_dn) }else{ cat("\nFAIL: long format data could not be created for duet") quit() } # Assign them to the output list wf_lf_dataL[['wf_mcsm']] = wf_mcsm wf_lf_dataL[['lf_mcsm']] = lf_mcsm } #------------------------------------------------------------------------- return(wf_lf_dataL) } ############################################################################