updating lineage_country.R with different data slices

2020-09-15 13:14:33 +01:00 · 2020-09-15 13:14:33 +01:00 · 44d1f64e88
commit 44d1f64e88
parent 645827570f
1 changed files with 32 additions and 57 deletions
--- a/scripts/plotting/ggridges_lineage_country.R
+++ b/scripts/plotting/ggridges_lineage_country.R
@ -23,7 +23,7 @@ source("combining_dfs_plotting.R")
 # Data for plot
 #########################
 df = merged_df2
-df = merged_df2_comp 
+#df = merged_df2_comp 


 #========================
@ -32,10 +32,6 @@ df = merged_df2_comp
 # col = Lineage
 # fill = lineage
 #========================
-is.factor(df$lineage)
-df$lineage = as.factor(df$lineage)
-is.factor(df$lineage)
-
 table(df$lineage)

 # subset only lineages1-4
@ -57,6 +53,35 @@ table(df_lin$lineage)
 df <- df_lin
 #%%%%%%%%%%%%%%%%%%%%%%%%%

+#%%%%%%%%%%%%%%%%%%%%%%%%
+# REASSIGNMENT
+df2 = df
+#%%%%%%%%%%%%%%%%%%%%%%%%
+df2 = df2%>%
+  add_count(country_code)
+
+str(df2$country_code); str(df2$n)
+
+n = which(colnames(df2) == "n")
+colnames(df2)[n] = "count_country"
+
+table(df2$count_country>100 & df$country_code!= "")
+df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%
+# REASSIGNMENT
+df = df3
+#%%%%%%%%%%%%%%%%%%%%%%%%
+
+sample = sum(table(unique(df$id))); sample
+table(df$country_code)
+tab = sum(table(df$country_code)); tab
+
+
+View(table(df$country_code))
+View(t1)
+
 ############## begin plot
 g = ggplot(df, aes(x = lineage))
 g + geom_bar(aes(fill = lineage)) +
@ -112,7 +137,7 @@ g + geom_point(aes(col = lineage
 ### begin plot
 table(df$lineage)

-g = ggplot(df, aes(x = lineage
+g = ggplot(df_lin, aes(x = lineage
                   , y = duet_scaled))
 g + geom_point(aes(col = lineage
                   , size = or_mychisq)) +
@ -131,60 +156,10 @@ g + geom_point(aes(col = lineage
       , y = "DUET (PS)")

 #========================
-# Plot 4-6: Distributions
+# Plot 4-5: Distributions
 # ggrdiges
 #========================

-df$country_code_symbols = as.character(df$country_code)
-foo = df
-
-foo = foo%>%
-  add_count(country_code)
-
-n = which(colnames(foo) == "n")
-colnames(foo)[n] = "count_country"
-
-
-table(foo$count_country)
-
-
-check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country))
-str(check)
-check$V2 = as.numeric(check$V2)
-min(check$V2); max(check$V2)
-table(check$V2)
-
-check2 = subset(check, check$V2>100)
-check2 = subset(check2, is.na(check2$V1))
-min(check2$V2); max(check2$V2)
-
-check3 = subset(check2, check2$V1 != "")
-table(check3$V1 != "")
-
-
-# subset df with country containing >100 samples
-# and no missing 
-
-#%%%%%%%%%%%%%%%%%%%%%%%%
-# REASSIGNMENT
-df2 = df
-#%%%%%%%%%%%%%%%%%%%%%%%%
-df2 = df2%>%
-  add_count(country_code)
-
-str(df2$country_code); str(df2$n)
-
-n = which(colnames(df2) == "n")
-colnames(df2)[n] = "count_country"
-
-table(df2$count_country>100 & df$country_code!= "")
-df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
-
-
-#%%%%%%%%%%%%%%%%%%%%%%%%
-# REASSIGNMENT
-df = df3
-#%%%%%%%%%%%%%%%%%%%%%%%%

 #==================================================
 my_ats = 15 # axis text size