playing with dm_om (other)plots data and graph on gid branch

This commit is contained in:
Tanushree Tunstall 2021-08-26 16:35:46 +01:00
parent 1e3670f935
commit e36e7736db
4 changed files with 502 additions and 410 deletions

View file

@ -25,8 +25,8 @@ source("../functions/bp_subcolours.R")
# variables for lig
#====================
LigDist_colname = "ligand_distance"
LigDist_cutoff = 10
#LigDist_colname = "ligand_distance"
#LigDist_cutoff = 10
#===========
# input
@ -54,10 +54,15 @@ pd_df = plotting_data(mcsm_df
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
my_df = pd_df[[1]]
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
my_df_u_lig = pd_df[[3]]
dup_muts = pd_df[[4]]
my_df = pd_df[[1]]
my_df_u = pd_df[[2]] # this forms one of the input for combining_dfs_plotting()
max_ang <- round(max(my_df_u[LigDist_colname]))
min_ang <- round(min(my_df_u[LigDist_colname]))
cat("\nLigand distance cut off, colname:", LigDist_colname
, "\nThe max distance", gene, "structure df" , ":", max_ang, "\u212b"
, "\nThe min distance", gene, "structure df" , ":", min_ang, "\u212b")
#--------------------------------
# call: combining_dfs_plotting()
@ -81,14 +86,22 @@ all_plot_dfs = combining_dfs_plotting(my_df_u
, lig_dist_colname = LigDist_colname
, lig_dist_cutoff = LigDist_cutoff)
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
merged_df2_comp = all_plot_dfs[[3]]
merged_df3_comp = all_plot_dfs[[4]]
merged_df2_lig = all_plot_dfs[[5]]
merged_df3_lig = all_plot_dfs[[6]]
merged_df2_comp_lig = all_plot_dfs[[7]]
merged_df3_comp_lig = all_plot_dfs[[8]]
merged_df2 = all_plot_dfs[[1]]
merged_df3 = all_plot_dfs[[2]]
#======================================================================
# read other files
infilename_dynamut = paste0("~/git/Data/", drug, "/output/dynamut_results/", gene
, "_complex_dynamut_norm.csv")
infilename_dynamut2 = paste0("~/git/Data/", drug, "/output/dynamut_results/dynamut2/", gene
, "_complex_dynamut2_norm.csv")
infilename_mcsm_na = paste0("~/git/Data/", drug, "/output/mcsm_na_results/", gene
, "_complex_mcsm_na_norm.csv")
dynamut_df = read.csv(infilename_dynamut)
dynamut2_df = read.csv(infilename_dynamut2)
mcsm_na_df = read.csv(infilename_mcsm_na)
####################################################################
# Data for subcols barplot (~heatmpa)
@ -168,61 +181,6 @@ subcolsR_ps <- ColourPalleteMulti(subcols_df_ps, "duet_outcome", "my_grp_r")
print(paste0("Colour palette generated for my_grp: ", length(subcols_ps), " colours"))
print(paste0("Colour palette generated for my_grp_r: ", length(subcolsR_ps), " colours"))
#=======================
# Data for sub colours
# barplot: LIG
#=======================
cat("\nNo. of cols to select:", length(cols_to_select))
subcols_df_lig = merged_df3_lig[, cols_to_select]
cat("\nNo of unique positions for LIG:"
, length(unique(subcols_df_lig$position)))
# should be a factor
if (is.factor(subcols_df_lig$ligand_outcome)){
cat("\nLigand_outcome is factor")
table(subcols_df_lig$ligand_outcome)
}else{
cat("\nConverting ligand_outcome to factor")
subcols_df_lig$ligand_outcome = as.factor(subcols_df_lig$ligand_outcome)
table(subcols_df_lig$ligand_outcome)
}
# should be -1 and 1
min(subcols_df_lig$affinity_scaled)
max(subcols_df_lig$affinity_scaled)
tapply(subcols_df_lig$affinity_scaled, subcols_df_lig$ligand_outcome, min)
tapply(subcols_df_lig$affinity_scaled, subcols_df_lig$ligand_outcome, max)
# check unique values in normalised data
cat("\nNo. of unique values in affinity scaled, no rounding:"
, length(unique(subcols_df_lig$affinity_scaled)))
# No rounding
my_grp_lig = subcols_df_lig$affinity_scaled; length(my_grp_lig)
# Add rounding is to be used
n = 3
subcols_df_lig$affinity_scaledR = round(subcols_df_lig$affinity_scaled, n)
cat("\nNo. of unique values in duet scaled", n, "places rounding:"
, length(unique(subcols_df_lig$affinity_scaledR)))
my_grp_lig_r = subcols_df_lig$affinity_scaledR # rounding
# Add grp cols
subcols_df_lig$group_lig <- paste0(subcols_df_lig$ligand_outcome, "_", my_grp_lig, sep = "")
subcols_df_lig$group_ligR <- paste0(subcols_df_lig$ligand_outcome, "_", my_grp_lig_r, sep = "")
# Call the function to create the palette based on the group defined above
subcols_lig <- ColourPalleteMulti(subcols_df_lig, "ligand_outcome", "my_grp_lig")
subcolsR_lig <- ColourPalleteMulti(subcols_df_lig, "ligand_outcome", "my_grp_lig_r")
print(paste0("Colour palette generated for my_grp: ", length(subcols_lig), " colours"))
print(paste0("Colour palette generated for my_grp_r: ", length(subcolsR_lig), " colours"))
####################################################################
# Data for logoplots
####################################################################
@ -472,113 +430,6 @@ if (nrow(corr_ps_df3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) {
, "\nGot: ", check1)
}
#=================================
# Data for Correlation plots: LIG
#=================================
cat("\n=========================================="
, "\nCORR PLOTS data: LIG"
, "\n===========================================")
df_lig = merged_df2_lig
table(df_lig$ligand_outcome)
#--------------------
# adding log cols : NEW UNCOMMENT
#--------------------
#df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
#df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
##df_lig$log10_or_kin = log10(df_lig$or_kin)
##df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
#----------------------------
# columns for corr plots:PS
#----------------------------
# subset data to generate pairwise correlations
cols_to_select = c("mutationinformation"
, "affinity_scaled"
#, "mutation_info_labels"
, "asa"
, "rsa"
, "rd_values"
, "kd_values"
, "log10_or_mychisq"
, "neglog_pval_fisher"
##, "or_kin"
##, "neglog_pwald_kin"
, "af"
##, "af_kin"
, "ligand_outcome"
, drug)
corr_data_lig = df_lig[, cols_to_select]
dim(corr_data_lig)
#--------------------------------------
# assign nice colnames (for display)
#--------------------------------------
my_corr_colnames = c("Mutation"
, "Ligand Affinity"
#, "Mutation class"
, "ASA"
, "RSA"
, "RD"
, "KD"
, "Log (OR)"
, "-Log (P)"
##, "Adjusted (OR)"
##, "-Log (P wald)"
, "MAF"
##, "MAF_kin"
, "ligand_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_lig)
colnames(corr_data_lig) <- my_corr_colnames
colnames(corr_data_lig)
start = 1
end = which(colnames(corr_data_lig) == drug); end # should be the last column
offset = 1
#=============================
# Corr data for plots: LIG
# big_df lig: ~ merged_df2_lig
#==============================
#corr_lig_df2 = corr_data_lig[start:(end-offset)] # without drug
corr_lig_df2 = corr_data_lig[start:end]
head(corr_lig_df2)
#=============================
# Corr data for plots: LIG
# short_df lig: ~ merged_df3_lig
#==============================
corr_lig_df3 = corr_lig_df2[!duplicated(corr_lig_df2$Mutation),]
na_or_lig = sum(is.na(corr_lig_df3$`Log (OR)`))
check1_lig = nrow(corr_lig_df3) - na_or_lig
if (nrow(corr_lig_df3) == nrow(merged_df3_lig) && nrow(merged_df3_comp_lig) == check1_lig) {
cat( "\nPASS: No. of rows for corr_lig_df3 match"
, "\nPASS: No. of OR values checked: " , check1_lig)
} else {
cat("\nFAIL: Numbers mismatch:"
, "\nExpected nrows: ", nrow(merged_df3_lig)
, "\nGot: ", nrow(corr_ps_df3_lig)
, "\nExpected OR values: ", nrow(merged_df3_comp_lig)
, "\nGot: ", check1_lig)
}
# remove unnecessary columns
identical(corr_data_lig, corr_lig_df2)
identical(corr_data_ps, corr_ps_df2)
#rm(df_ps, df_lig, corr_data_ps, corr_data_lig)
########################################################################
# End of script
########################################################################