updating lineage_country.R with different data slices

2020-09-15 13:14:33 +01:00 · 2020-09-15 13:14:33 +01:00 · 44d1f64e88
commit 44d1f64e88
parent 645827570f
1 changed files with 32 additions and 57 deletions
--- a/scripts/plotting/ggridges_lineage_country.R
+++ b/scripts/plotting/ggridges_lineage_country.R
@ -23,7 +23,7 @@ source("combining_dfs_plotting.R")
 # Data for plot
 #########################
 df = merged_df2
-df = merged_df2_comp 
+#df = merged_df2_comp 
 #========================
@ -32,10 +32,6 @@ df = merged_df2_comp
 # col = Lineage
 # fill = lineage
 #========================
 is.factor(df$lineage)
 df$lineage = as.factor(df$lineage)
 is.factor(df$lineage)
 table(df$lineage)
 # subset only lineages1-4
@ -57,6 +53,35 @@ table(df_lin$lineage)
 df <- df_lin
 #%%%%%%%%%%%%%%%%%%%%%%%%%
 #%%%%%%%%%%%%%%%%%%%%%%%%
 # REASSIGNMENT
 df2 = df
 #%%%%%%%%%%%%%%%%%%%%%%%%
 df2 = df2%>%
  add_count(country_code)
 str(df2$country_code); str(df2$n)
 n = which(colnames(df2) == "n")
 colnames(df2)[n] = "count_country"
 table(df2$count_country>100 & df$country_code!= "")
 df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
 #%%%%%%%%%%%%%%%%%%%%%%%%
 # REASSIGNMENT
 df = df3
 #%%%%%%%%%%%%%%%%%%%%%%%%
 sample = sum(table(unique(df$id))); sample
 table(df$country_code)
 tab = sum(table(df$country_code)); tab
 View(table(df$country_code))
 View(t1)
 ############## begin plot
 g = ggplot(df, aes(x = lineage))
 g + geom_bar(aes(fill = lineage)) +
@ -112,7 +137,7 @@ g + geom_point(aes(col = lineage
 ### begin plot
 table(df$lineage)
-g = ggplot(df, aes(x = lineage
+g = ggplot(df_lin, aes(x = lineage
                   , y = duet_scaled))
 g + geom_point(aes(col = lineage
                   , size = or_mychisq)) +
@ -131,60 +156,10 @@ g + geom_point(aes(col = lineage
       , y = "DUET (PS)")
 #========================
-# Plot 4-6: Distributions
+# Plot 4-5: Distributions
 # ggrdiges
 #========================
 df$country_code_symbols = as.character(df$country_code)
 foo = df
 foo = foo%>%
  add_count(country_code)
 n = which(colnames(foo) == "n")
 colnames(foo)[n] = "count_country"
 table(foo$count_country)
 check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country))
 str(check)
 check$V2 = as.numeric(check$V2)
 min(check$V2); max(check$V2)
 table(check$V2)
 check2 = subset(check, check$V2>100)
 check2 = subset(check2, is.na(check2$V1))
 min(check2$V2); max(check2$V2)
 check3 = subset(check2, check2$V1 != "")
 table(check3$V1 != "")
 # subset df with country containing >100 samples
 # and no missing 
 #%%%%%%%%%%%%%%%%%%%%%%%%
 # REASSIGNMENT
 df2 = df
 #%%%%%%%%%%%%%%%%%%%%%%%%
 df2 = df2%>%
  add_count(country_code)
 str(df2$country_code); str(df2$n)
 n = which(colnames(df2) == "n")
 colnames(df2)[n] = "count_country"
 table(df2$count_country>100 & df$country_code!= "")
 df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
 #%%%%%%%%%%%%%%%%%%%%%%%%
 # REASSIGNMENT
 df = df3
 #%%%%%%%%%%%%%%%%%%%%%%%%
 #==================================================
 my_ats = 15 # axis text size