diff --git a/scripts/plotting/ggridges_lineage_country.R b/scripts/plotting/ggridges_lineage_country.R index a63cdd9..cc10c37 100644 --- a/scripts/plotting/ggridges_lineage_country.R +++ b/scripts/plotting/ggridges_lineage_country.R @@ -23,7 +23,7 @@ source("combining_dfs_plotting.R") # Data for plot ######################### df = merged_df2 -df = merged_df2_comp +#df = merged_df2_comp #======================== @@ -32,10 +32,6 @@ df = merged_df2_comp # col = Lineage # fill = lineage #======================== -is.factor(df$lineage) -df$lineage = as.factor(df$lineage) -is.factor(df$lineage) - table(df$lineage) # subset only lineages1-4 @@ -57,6 +53,35 @@ table(df_lin$lineage) df <- df_lin #%%%%%%%%%%%%%%%%%%%%%%%%% +#%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df2 = df +#%%%%%%%%%%%%%%%%%%%%%%%% +df2 = df2%>% + add_count(country_code) + +str(df2$country_code); str(df2$n) + +n = which(colnames(df2) == "n") +colnames(df2)[n] = "count_country" + +table(df2$count_country>100 & df$country_code!= "") +df3 = subset(df2, df2$count_country>100 & df2$country_code != "") + + +#%%%%%%%%%%%%%%%%%%%%%%%% +# REASSIGNMENT +df = df3 +#%%%%%%%%%%%%%%%%%%%%%%%% + +sample = sum(table(unique(df$id))); sample +table(df$country_code) +tab = sum(table(df$country_code)); tab + + +View(table(df$country_code)) +View(t1) + ############## begin plot g = ggplot(df, aes(x = lineage)) g + geom_bar(aes(fill = lineage)) + @@ -112,7 +137,7 @@ g + geom_point(aes(col = lineage ### begin plot table(df$lineage) -g = ggplot(df, aes(x = lineage +g = ggplot(df_lin, aes(x = lineage , y = duet_scaled)) g + geom_point(aes(col = lineage , size = or_mychisq)) + @@ -131,60 +156,10 @@ g + geom_point(aes(col = lineage , y = "DUET (PS)") #======================== -# Plot 4-6: Distributions +# Plot 4-5: Distributions # ggrdiges #======================== -df$country_code_symbols = as.character(df$country_code) -foo = df - -foo = foo%>% - add_count(country_code) - -n = which(colnames(foo) == "n") -colnames(foo)[n] = "count_country" - - -table(foo$count_country) - - -check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country)) -str(check) -check$V2 = as.numeric(check$V2) -min(check$V2); max(check$V2) -table(check$V2) - -check2 = subset(check, check$V2>100) -check2 = subset(check2, is.na(check2$V1)) -min(check2$V2); max(check2$V2) - -check3 = subset(check2, check2$V1 != "") -table(check3$V1 != "") - - -# subset df with country containing >100 samples -# and no missing - -#%%%%%%%%%%%%%%%%%%%%%%%% -# REASSIGNMENT -df2 = df -#%%%%%%%%%%%%%%%%%%%%%%%% -df2 = df2%>% - add_count(country_code) - -str(df2$country_code); str(df2$n) - -n = which(colnames(df2) == "n") -colnames(df2)[n] = "count_country" - -table(df2$count_country>100 & df$country_code!= "") -df3 = subset(df2, df2$count_country>100 & df2$country_code != "") - - -#%%%%%%%%%%%%%%%%%%%%%%%% -# REASSIGNMENT -df = df3 -#%%%%%%%%%%%%%%%%%%%%%%%% #================================================== my_ats = 15 # axis text size