added shorter scripts for each different processing for plots to make it wasire to read code
This commit is contained in:
parent
27f0b15d4c
commit
3f3fe89a6b
6 changed files with 1292 additions and 0 deletions
117
scripts/plotting/redundant/other_dfs_data.R
Normal file
117
scripts/plotting/redundant/other_dfs_data.R
Normal file
|
@ -0,0 +1,117 @@
|
|||
#!/usr/bin/env Rscript
|
||||
|
||||
# Didn't end up using it: sorted it at the source
|
||||
# .py script to combine all dfs to output all_params
|
||||
|
||||
#################################################################
|
||||
# TASK: Script to add all other dfs to merged_df2 and merged_df3
|
||||
|
||||
#################################################################
|
||||
# Combine other dfs:
|
||||
# dynamut_df, dynamut2_df, mcsm_na_df,
|
||||
# perhaps : deepddg and mcsm ppi (for embb)
|
||||
################################################################
|
||||
# read other files
|
||||
infilename_dynamut = paste0("~/git/Data/", drug, "/output/dynamut_results/", gene
|
||||
, "_complex_dynamut_norm.csv")
|
||||
|
||||
infilename_dynamut2 = paste0("~/git/Data/", drug, "/output/dynamut_results/dynamut2/", gene
|
||||
, "_complex_dynamut2_norm.csv")
|
||||
|
||||
infilename_mcsm_na = paste0("~/git/Data/", drug, "/output/mcsm_na_results/", gene
|
||||
, "_complex_mcsm_na_norm.csv")
|
||||
|
||||
infilename_mcsm_f_snps <- paste0("~/git/Data/", drug, "/output/", gene
|
||||
, "_mcsm_formatted_snps.csv")
|
||||
|
||||
dynamut_df = read.csv(infilename_dynamut)
|
||||
dynamut2_df = read.csv(infilename_dynamut2)
|
||||
mcsm_na_df = read.csv(infilename_mcsm_na)
|
||||
mcsm_f_snps = read.csv(infilename_mcsm_f_snps, header = F)
|
||||
names(mcsm_f_snps) = "mutationinformation"
|
||||
|
||||
#=================================
|
||||
# check with intersect to find the common col, but use
|
||||
c1 = length(intersect(names(dynamut_df), names(dynamut2_df)))
|
||||
c2 = length(intersect(names(dynamut2_df), names(mcsm_na_df)))
|
||||
|
||||
if (c1 == 1 && c2 == 1) {
|
||||
n_common = 1
|
||||
}else{
|
||||
cat("\nMore than one common col found, inspect before merging!")
|
||||
}
|
||||
|
||||
# mutationinformation column to be on the safe side
|
||||
# delete chain from dynamut2_df
|
||||
#dynamut2_df = subset(dynamut2_df, select = -chain)
|
||||
|
||||
# quick checks
|
||||
lapply(list(dynamut_df
|
||||
, dynamut2_df
|
||||
, mcsm_na_df), ncol)
|
||||
|
||||
lapply(list(dynamut_df
|
||||
, dynamut2_df
|
||||
, mcsm_na_df), colnames)
|
||||
|
||||
lapply(list(dynamut_df
|
||||
, dynamut2_df
|
||||
, mcsm_na_df), nrow)
|
||||
|
||||
ncols_comb = lapply(list(dynamut_df
|
||||
, dynamut2_df
|
||||
, mcsm_na_df), ncol)
|
||||
|
||||
#---------------------------------
|
||||
# Combine 1: all other params dfs
|
||||
#---------------------------------
|
||||
combined_dfs = Reduce(inner_join, list(dynamut_df
|
||||
, dynamut2_df
|
||||
, mcsm_na_df))
|
||||
# Reduce("+", ncols_comb)
|
||||
|
||||
#-----------------------------------------
|
||||
# Combine 2: combine1 result + merged_df2
|
||||
#-----------------------------------------
|
||||
drop_cols = intersect(names(combined_dfs), names(merged_df2))
|
||||
drop_cols = drop_cols
|
||||
|
||||
drop_cols = drop_cols[! drop_cols %in% c("mutationinformation")]
|
||||
|
||||
combined_dfs_f = combined_dfs[, !colnames(combined_dfs)%in%drop_cols]
|
||||
|
||||
nrow(combined_dfs_f); nrow(merged_df2)
|
||||
ncol(combined_dfs_f); ncol(merged_df2)
|
||||
|
||||
#-----------------------------------------
|
||||
# Combined merged_df2
|
||||
#-----------------------------------------
|
||||
merged_df2_combined = merge(merged_df2
|
||||
, combined_dfs_f
|
||||
, by = "mutationinformation"
|
||||
)
|
||||
|
||||
expected_ncols = ncol(combined_dfs_f)+ ncol(merged_df2) - 1
|
||||
|
||||
if ( nrow(merged_df2_combined) == nrow(merged_df2) && ncol(merged_df2_combined) == expected_ncols ){
|
||||
|
||||
cat("\nPASS: merged_df2 combined with other parameters dfs."
|
||||
, "\nUse this for lineage distribution plots")
|
||||
}else{
|
||||
|
||||
cat("\nFAIL: merged_df2 didn't combine successfully with other parameters dfs")
|
||||
quit()
|
||||
|
||||
}
|
||||
|
||||
rm(combined_dfs, combined_dfs_f)
|
||||
|
||||
#================================
|
||||
# combined data
|
||||
# short_df ps: ~ merged_df3
|
||||
# TODO: later integrate properly
|
||||
#================================
|
||||
#-----------------------------------------
|
||||
# Combined merged_df2
|
||||
#-----------------------------------------
|
||||
merged_df3_combined = merged_df2_combined[!duplicated(merged_df2_combined$mutationinformation),]
|
Loading…
Add table
Add a link
Reference in a new issue