added maf column in appendix_tables

This commit is contained in:
Tanushree Tunstall 2022-08-14 12:18:12 +01:00
parent 6f354ab390
commit 2acea43bcf
3 changed files with 84 additions and 283 deletions

View file

@ -23,31 +23,82 @@ geneL_normal = c("pnca")
geneL_na = c("gid", "rpob") geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob")
# LigDist_colname # from globals used #from plotting_globals()
# ppi2Dist_colname #from globals used LigDist_colname
# naDist_colname #from globals used ppi2Dist_colname
naDist_colname
delta_symbol #delta_symbol = "\u0394"; delta_symbol
angstroms_symbol
#===========
# Data used
#===========
df3 = merged_df3 df3 = merged_df3
# Add adjusted P-values cols_to_output = c("mutationinformation"
df3$p_adj_fdr = p.adjust(df3$pval_fisher, method = "fdr") , "position"
df3$signif_fdr = df3$p_adj_fdr , affinity_dist_colnames[1]
df3 = dplyr::mutate(df3 , "ligand_affinity_change"
, "ligand_outcome"
, "mmcsm_lig"
, "mmcsm_lig_outcome"
, affinity_dist_colnames[2]
, "mcsm_ppi2_affinity"
, "mcsm_ppi2_outcome"
, "maf"
, "or_mychisq"
, "pval_fisher")
cols_to_output
df3_output = df3[, cols_to_output]
colnames(df3_output)
#===============================================
# Add COLS and rounding: adjusted P-values + MAF
#==============================================
#-----------------------------
# adjusted P-values
#-----------------------------
# add cols: p_adj_fdr and signif_fdr
df3_output$p_adj_fdr = p.adjust(df3_output$pval_fisher, method = "fdr")
df3_output$signif_fdr = df3_output$p_adj_fdr
df3_output = dplyr::mutate(df3_output
, signif_fdr = case_when(signif_fdr == 0.05 ~ "." , signif_fdr = case_when(signif_fdr == 0.05 ~ "."
, signif_fdr <=0.0001 ~ '****' , signif_fdr <=0.0001 ~ '****'
, signif_fdr <=0.001 ~ '***' , signif_fdr <=0.001 ~ '***'
, signif_fdr <=0.01 ~ '**' , signif_fdr <=0.01 ~ '**'
, signif_fdr <0.05 ~ '*' , signif_fdr <0.05 ~ '*'
, TRUE ~ 'ns')) , TRUE ~ 'ns'))
# rounding
df3_output$or_mychisq = round(df3_output$or_mychisq,2)
df3_output$p_adj_fdr = round(df3_output$p_adj_fdr,2)
head(df3_output)
#----------
# MAF (%)
#----------
# add col maf_percent
df3_output$maf_percent = df3_output$maf*100
# rounding
df3_output$maf_percent = round(df3_output$maf_percent,2)
head(df3_output$af); head(df3_output$maf);head(df3_output$maf_percent)
#----------
# P-value
#----------
df3_output$pval_fisher = round(df3_output$pval_fisher,2)
class(df3_output)
head(df3_output)
#################################### ####################################
# Appendix: ligand affinity # Appendix: ligand affinity
#################################### ####################################
df_lig = df3[df3[[LigDist_colname]]<DistCutOff,] df_lig = df3_output[df3_output[[LigDist_colname]]<DistCutOff,]
cols_to_output_lig = c("mutationinformation" cols_to_output_lig = c("mutationinformation"
, "position" , "position"
@ -56,20 +107,20 @@ cols_to_output_lig = c("mutationinformation"
, "ligand_outcome" , "ligand_outcome"
, "mmcsm_lig" , "mmcsm_lig"
, "mmcsm_lig_outcome" , "mmcsm_lig_outcome"
, "maf_percent"
, "or_mychisq" , "or_mychisq"
, "pval_fisher"
, "p_adj_fdr" , "p_adj_fdr"
, "signif_fdr") , "signif_fdr")
# select cols
# counting
Out_df_lig = df_lig[, cols_to_output_lig] Out_df_lig = df_lig[, cols_to_output_lig]
Out_df_ligS = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
delta_symbol = "\u0394"; delta_symbol # sort df by OR and then MAF: highest OR and highest MAF
#Out_df_ligS1 = Out_df_lig[order(Out_df_lig$or_mychisq, decreasing = T), ]
Out_df_ligS = Out_df_lig[order(-Out_df_lig$or_mychisq, Out_df_lig$maf_percent), ]
Out_df_ligS$or_mychisq = round(Out_df_ligS$or_mychisq,2) #head(Out_df_ligS1); tail(Out_df_ligS1)
Out_df_ligS$p_adj_fdr = round(Out_df_ligS$p_adj_fdr,2) head(Out_df_ligS); tail(Out_df_ligS)
Out_df_ligS
colsNames_to_output_lig = c("Mutation" colsNames_to_output_lig = c("Mutation"
, "position" , "position"
@ -78,12 +129,15 @@ colsNames_to_output_lig = c("Mutation"
, "mCSM ligand_outcome" , "mCSM ligand_outcome"
, "mmCSM-ligand affinity" , "mmCSM-ligand affinity"
, "mmCSM ligand_outcome" , "mmCSM ligand_outcome"
, paste0("MAF ","(%)")
, "Odds Ratio" , "Odds Ratio"
, "Adj P-value" , "P-value"
, "Adjusted P-value"
, "P-value significance") , "P-value significance")
colnames(Out_df_ligS) = colsNames_to_output_lig colnames(Out_df_ligS) = colsNames_to_output_lig
Out_df_ligS head(Out_df_ligS)
#-------------------- #--------------------
# write output file: KS test within grpup # write output file: KS test within grpup
#---------------------- #----------------------
@ -99,27 +153,28 @@ write.csv(Out_df_ligS, Out_ligT, row.names = FALSE)
# Appendix: PPi2 affinity # Appendix: PPi2 affinity
#################################### ####################################
df_ppi2 = df3[df3[[ppi2Dist_colname]]<DistCutOff,] # Filtered data
df_ppi2 = df3_output[df3_output[[ppi2Dist_colname]]<DistCutOff,]
# select cols
cols_to_output_ppi2 = c("mutationinformation" cols_to_output_ppi2 = c("mutationinformation"
, "position" , "position"
, ppi2Dist_colname , ppi2Dist_colname
, "mcsm_ppi2_affinity" , "mcsm_ppi2_affinity"
, "mcsm_ppi2_outcome" , "mcsm_ppi2_outcome"
, "maf_percent"
, "or_mychisq" , "or_mychisq"
, "pval_fisher"
, "p_adj_fdr" , "p_adj_fdr"
, "signif_fdr") , "signif_fdr")
# extract output cols # extract output cols
Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2] Out_df_ppi2 = df_ppi2[, cols_to_output_ppi2]
Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
delta_symbol = "\u0394"; delta_symbol # sort df by OR and then MAF: Highest OR and Highest MAF
#Out_df_ppi2S = Out_df_ppi2[order(Out_df_ppi2$or_mychisq, decreasing = T), ]
Out_df_ppi2S = Out_df_ppi2[order(-Out_df_ppi2$or_mychisq, Out_df_ppi2$maf_percent), ]
Out_df_ppi2S$or_mychisq = round(Out_df_ppi2S$or_mychisq,2)
Out_df_ppi2S$p_adj_fdr = round(Out_df_ppi2S$p_adj_fdr,2)
Out_df_ppi2S
colsNames_to_output_ppi2 = c("Mutation" colsNames_to_output_ppi2 = c("Mutation"
, "position" , "position"
@ -127,7 +182,8 @@ colsNames_to_output_ppi2 = c("Mutation"
, paste0("mCSM-PPI2 (", delta_symbol, ")") , paste0("mCSM-PPI2 (", delta_symbol, ")")
, "mCSM-PPI2 outcome" , "mCSM-PPI2 outcome"
, "Odds Ratio" , "Odds Ratio"
, "Adj P-value" , "P-value"
, "Adjusted P-value"
, "P-value significance") , "P-value significance")
colnames(Out_df_ppi2S) = colsNames_to_output_ppi2 colnames(Out_df_ppi2S) = colsNames_to_output_ppi2

View file

@ -1,156 +0,0 @@
"colnames(merged_df3)"
"mutationinformation"
"id"
"sample"
"lineage"
"sublineage"
"country_code"
"drtype"
"ethambutol"
"mutation"
"drug_name"
"mutation_info"
"mutation_info_orig"
"mutation_info_v1"
"wild_type"
"mutant_type"
"position"
"Mut"
"Mut_copy"
"index_orig"
"index_orig_copy"
"snp_frequency"
"pos_count"
"wt_prop_water"
"mut_prop_water"
"wt_prop_polarity"
"mut_prop_polarity"
"wt_calcprop"
"mut_calcprop"
"total_id_ucount"
"maf"
"drtype_numeric"
"drtype_all_vals"
"drtype_all_names"
"drtype_multimode"
"drtype_mode"
"drtype_max"
"mutation_info_labels"
"dm_om_numeric"
"dm_om_numeric_orig"
"dst"
"dst_multimode"
"dst_mode"
"mutation_info_labels_v1"
"mutation_info_labels_orig"
"lineage_list_all"
"lineage_count_all"
"lineage_count_unique"
"lineage_list_unique"
"lineage_multimode"
"chain"
"ligand_id"
"ligand_distance"
"duet_stability_change"
"duet_outcome"
"ligand_affinity_change"
"ligand_outcome"
"duet_scaled"
"affinity_scaled"
"wild_pos"
"wild_chain_pos"
"ddg_foldx"
"contacts"
"electro_rr"
"electro_mm"
"electro_sm"
"electro_ss"
"disulfide_rr"
"disulfide_mm"
"disulfide_sm"
"disulfide_ss"
"hbonds_rr"
"hbonds_mm"
"hbonds_sm"
"hbonds_ss"
"partcov_rr"
"partcov_mm"
"partcov_sm"
"partcov_ss"
"vdwclashes_rr"
"vdwclashes_mm"
"vdwclashes_sm"
"vdwclashes_ss"
"volumetric_rr"
"volumetric_mm"
"volumetric_sm"
"volumetric_ss"
"foldx_scaled"
"foldx_outcome"
"deepddg"
"deepddg_outcome"
"deepddg_scaled"
"asa"
"rsa"
"ss"
"ss_class"
"kd_values"
"rd_values"
"wt_3upper"
"consurf_score"
"consurf_scaled"
"consurf_colour"
"consurf_colour_rev"
"consurf_ci_upper"
"consurf_ci_lower"
"consurf_ci_colour"
"consurf_msa_data"
"consurf_aa_variety"
"snap2_score"
"snap2_scaled"
"snap2_accuracy_pc"
"snap2_outcome"
"af"
"beta_logistic"
"or_logistic"
"pval_logistic"
"se_logistic"
"zval_logistic"
"ci_low_logistic"
"ci_hi_logistic"
"or_mychisq"
"log10_or_mychisq"
"or_fisher"
"pval_fisher"
"neglog_pval_fisher"
"ci_low_fisher"
"ci_hi_fisher"
"est_chisq"
"pval_chisq"
"ddg_dynamut2"
"ddg_dynamut2_scaled"
"ddg_dynamut2_outcome"
"mcsm_ppi2_affinity"
"mcsm_ppi2_scaled"
"mcsm_ppi2_outcome"
"interface_dist"
"mut_3upper"
"seq_offset4pdb"
"provean_score"
"provean_outcome"
"provean_scaled"
"mmcsm_lig"
"mmcsm_lig_scaled"
"mmcsm_lig_outcome"
"gene_name"
"pdb_file"
"lineage_labels"
"consurf_outcome"
"sensitivity"
"foldx_scaled_signC"
"avg_stability"
"avg_stability_outcome"
"avg_stability_scaled"
"avg_lig_affinity"
"avg_lig_affinity_outcome"
"avg_lig_affinity_scaled"
1 colnames(merged_df3)
2 mutationinformation
3 id
4 sample
5 lineage
6 sublineage
7 country_code
8 drtype
9 ethambutol
10 mutation
11 drug_name
12 mutation_info
13 mutation_info_orig
14 mutation_info_v1
15 wild_type
16 mutant_type
17 position
18 Mut
19 Mut_copy
20 index_orig
21 index_orig_copy
22 snp_frequency
23 pos_count
24 wt_prop_water
25 mut_prop_water
26 wt_prop_polarity
27 mut_prop_polarity
28 wt_calcprop
29 mut_calcprop
30 total_id_ucount
31 maf
32 drtype_numeric
33 drtype_all_vals
34 drtype_all_names
35 drtype_multimode
36 drtype_mode
37 drtype_max
38 mutation_info_labels
39 dm_om_numeric
40 dm_om_numeric_orig
41 dst
42 dst_multimode
43 dst_mode
44 mutation_info_labels_v1
45 mutation_info_labels_orig
46 lineage_list_all
47 lineage_count_all
48 lineage_count_unique
49 lineage_list_unique
50 lineage_multimode
51 chain
52 ligand_id
53 ligand_distance
54 duet_stability_change
55 duet_outcome
56 ligand_affinity_change
57 ligand_outcome
58 duet_scaled
59 affinity_scaled
60 wild_pos
61 wild_chain_pos
62 ddg_foldx
63 contacts
64 electro_rr
65 electro_mm
66 electro_sm
67 electro_ss
68 disulfide_rr
69 disulfide_mm
70 disulfide_sm
71 disulfide_ss
72 hbonds_rr
73 hbonds_mm
74 hbonds_sm
75 hbonds_ss
76 partcov_rr
77 partcov_mm
78 partcov_sm
79 partcov_ss
80 vdwclashes_rr
81 vdwclashes_mm
82 vdwclashes_sm
83 vdwclashes_ss
84 volumetric_rr
85 volumetric_mm
86 volumetric_sm
87 volumetric_ss
88 foldx_scaled
89 foldx_outcome
90 deepddg
91 deepddg_outcome
92 deepddg_scaled
93 asa
94 rsa
95 ss
96 ss_class
97 kd_values
98 rd_values
99 wt_3upper
100 consurf_score
101 consurf_scaled
102 consurf_colour
103 consurf_colour_rev
104 consurf_ci_upper
105 consurf_ci_lower
106 consurf_ci_colour
107 consurf_msa_data
108 consurf_aa_variety
109 snap2_score
110 snap2_scaled
111 snap2_accuracy_pc
112 snap2_outcome
113 af
114 beta_logistic
115 or_logistic
116 pval_logistic
117 se_logistic
118 zval_logistic
119 ci_low_logistic
120 ci_hi_logistic
121 or_mychisq
122 log10_or_mychisq
123 or_fisher
124 pval_fisher
125 neglog_pval_fisher
126 ci_low_fisher
127 ci_hi_fisher
128 est_chisq
129 pval_chisq
130 ddg_dynamut2
131 ddg_dynamut2_scaled
132 ddg_dynamut2_outcome
133 mcsm_ppi2_affinity
134 mcsm_ppi2_scaled
135 mcsm_ppi2_outcome
136 interface_dist
137 mut_3upper
138 seq_offset4pdb
139 provean_score
140 provean_outcome
141 provean_scaled
142 mmcsm_lig
143 mmcsm_lig_scaled
144 mmcsm_lig_outcome
145 gene_name
146 pdb_file
147 lineage_labels
148 consurf_outcome
149 sensitivity
150 foldx_scaled_signC
151 avg_stability
152 avg_stability_outcome
153 avg_stability_scaled
154 avg_lig_affinity
155 avg_lig_affinity_outcome
156 avg_lig_affinity_scaled

View file

@ -1,99 +0,0 @@
"mutationinformation"
"id"
"lineage"
"drtype"
drug
"mutation"
"drug_name"
"mutation_info"
#"mutation_info_orig"
#"mutation_info_v1"
#"wild_type"
#"mutant_type"
#"position"
#"Mut"
"snp_frequency"
"pos_count"
"total_id_ucount"
"maf"
"drtype_mode"
"drtype_max"
"mutation_info_labels"
"dst"
"dst_mode"
#"mutation_info_labels_v1"
#"mutation_info_labels_orig"
"lineage_count_all"
"lineage_count_unique"
"chain"
"ligand_id"
LigDist_colname
"duet_stability_change"
"duet_outcome"
"ligand_affinity_change"
"ligand_outcome"
"duet_scaled"
"affinity_scaled"
"wild_pos"
"wild_chain_pos"
"ddg_foldx"
"foldx_scaled"
"foldx_outcome"
"deepddg"
"deepddg_outcome"
"deepddg_scaled"
"asa"
"rsa"
"ss"
"ss_class"
"kd_values"
"rd_values"
"wt_3upper"
"consurf_score"
"consurf_scaled"
"consurf_colour"
"consurf_colour_rev"
"consurf_ci_upper"
"consurf_ci_lower"
"consurf_ci_colour"
"consurf_msa_data"
"consurf_aa_variety"
"snap2_score"
"snap2_scaled"
"snap2_accuracy_pc"
"snap2_outcome"
"af"
"or_logistic"
"pval_logistic"
"or_mychisq"
"log10_or_mychisq"
"or_fisher"
"pval_fisher"
"neglog_pval_fisher"
"ddg_dynamut2"
"ddg_dynamut2_scaled"
"ddg_dynamut2_outcome"
"mcsm_ppi2_affinity"
"mcsm_ppi2_scaled"
"mcsm_ppi2_outcome"
ppi2Dist_colname
"mut_3upper"
"seq_offset4pdb"
"provean_score"
"provean_outcome"
"provean_scaled"
"mmcsm_lig"
"mmcsm_lig_scaled"
"mmcsm_lig_outcome"
"gene_name"
"pdb_file"
"lineage_labels"
"consurf_outcome"
"sensitivity"
"foldx_scaled_signC"
"avg_stability"
"avg_stability_outcome"
"avg_stability_scaled"
"avg_lig_affinity"
"avg_lig_affinity_outcome"
"avg_lig_affinity_scaled"
Can't render this file because it contains an unexpected character in line 9 and column 2.