From 9784bc1729dc0cd76594a1a38f92972785e30e97 Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Wed, 24 Feb 2021 09:56:36 +0000 Subject: [PATCH] updated count.py with indel and stop codon count --- scripts/count.py | 30 ++++++++++++++++++++++--- scripts/plotting/other_plots_combined.R | 14 +----------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/scripts/count.py b/scripts/count.py index 444af46..9121cda 100755 --- a/scripts/count.py +++ b/scripts/count.py @@ -276,13 +276,37 @@ foo_count = foo.loc[foo[dr_muts_col].str.contains('del', na = False, regex = Tru bar = meta_data[other_muts_col].value_counts() bar = bar.reset_index(name = 'values') -bar.columns = [other_muts_col, 'dr_muts_count'] #64 +bar.columns = [other_muts_col, 'other_muts_count'] #64 bar_count = bar.loc[bar[other_muts_col].str.contains('del', na = False, regex = True, case = False)] tot = len(foo_count) + len(bar_count) n_del = tot/len(meta_data) -n_del*100 +n_del*100 #0.6 +#============================ +foo2 = meta_data[dr_muts_col].value_counts() +foo2 = foo2.reset_index(name = 'values') +foo2.columns = [dr_muts_col, 'dr_muts_count'] #171 +foo2_count = foo2.loc[foo2[dr_muts_col].str.contains('ins', na = False, regex = True, case = False) ] -baz = meta_data.loc[meta_data[dr_muts_col].str.contains(nssnp_match, na = False, regex = True, case = False) | meta_data[other_muts_col].str.contains(nssnp_match, na = False, regex = True, case = False) ] +bar2 = meta_data[other_muts_col].value_counts() +bar2 = bar2.reset_index(name = 'values') +bar2.columns = [other_muts_col, 'pther_muts_count'] #64 +bar2_count = bar2.loc[bar2[other_muts_col].str.contains('ins', na = False, regex = True, case = False)] + +tot2 = len(foo2_count) + len(bar2_count) +n_ins = tot2/len(meta_data) +n_ins*100 #0.5 + +#========= +del_ins_tot = n_del*100 + n_ins*100 +#========== +# stop codons +baz_count = foo.loc[foo[dr_muts_col].str.contains('\*', na = False, regex = True, case = False) ] +baz_count2 = bar.loc[bar[other_muts_col].str.contains('\*', na = False, regex = True, case = False)] +tot3 = len(baz_count) + len(baz_count2) +n_stop = tot3/len(meta_data) +n_stop*100 #0.11 + +all_tot = n_del*100 + n_ins*100 + n_stop*100 #1.2 diff --git a/scripts/plotting/other_plots_combined.R b/scripts/plotting/other_plots_combined.R index ca0632b..d927808 100644 --- a/scripts/plotting/other_plots_combined.R +++ b/scripts/plotting/other_plots_combined.R @@ -131,7 +131,6 @@ p2 = ggplot(df_lf_foldx, aes(x = mutation_info p2 - ########################################################################## #============== # Plot 3: LIG @@ -248,21 +247,10 @@ print(Outplot_labelled) dev.off() - #--------- # plot 3: legend #--------- #svg(plot_point_legend, width = 6, height = 7) #OutPlot3 = legend #print(OutPlot3) -#dev.off() - - - - - - - - - - +#dev.off() \ No newline at end of file