updating lineage_country.R with different data slices
This commit is contained in:
parent
645827570f
commit
44d1f64e88
1 changed files with 32 additions and 57 deletions
|
@ -23,7 +23,7 @@ source("combining_dfs_plotting.R")
|
|||
# Data for plot
|
||||
#########################
|
||||
df = merged_df2
|
||||
df = merged_df2_comp
|
||||
#df = merged_df2_comp
|
||||
|
||||
|
||||
#========================
|
||||
|
@ -32,10 +32,6 @@ df = merged_df2_comp
|
|||
# col = Lineage
|
||||
# fill = lineage
|
||||
#========================
|
||||
is.factor(df$lineage)
|
||||
df$lineage = as.factor(df$lineage)
|
||||
is.factor(df$lineage)
|
||||
|
||||
table(df$lineage)
|
||||
|
||||
# subset only lineages1-4
|
||||
|
@ -57,6 +53,35 @@ table(df_lin$lineage)
|
|||
df <- df_lin
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df2 = df
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
df2 = df2%>%
|
||||
add_count(country_code)
|
||||
|
||||
str(df2$country_code); str(df2$n)
|
||||
|
||||
n = which(colnames(df2) == "n")
|
||||
colnames(df2)[n] = "count_country"
|
||||
|
||||
table(df2$count_country>100 & df$country_code!= "")
|
||||
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
|
||||
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df = df3
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
sample = sum(table(unique(df$id))); sample
|
||||
table(df$country_code)
|
||||
tab = sum(table(df$country_code)); tab
|
||||
|
||||
|
||||
View(table(df$country_code))
|
||||
View(t1)
|
||||
|
||||
############## begin plot
|
||||
g = ggplot(df, aes(x = lineage))
|
||||
g + geom_bar(aes(fill = lineage)) +
|
||||
|
@ -112,7 +137,7 @@ g + geom_point(aes(col = lineage
|
|||
### begin plot
|
||||
table(df$lineage)
|
||||
|
||||
g = ggplot(df, aes(x = lineage
|
||||
g = ggplot(df_lin, aes(x = lineage
|
||||
, y = duet_scaled))
|
||||
g + geom_point(aes(col = lineage
|
||||
, size = or_mychisq)) +
|
||||
|
@ -131,60 +156,10 @@ g + geom_point(aes(col = lineage
|
|||
, y = "DUET (PS)")
|
||||
|
||||
#========================
|
||||
# Plot 4-6: Distributions
|
||||
# Plot 4-5: Distributions
|
||||
# ggrdiges
|
||||
#========================
|
||||
|
||||
df$country_code_symbols = as.character(df$country_code)
|
||||
foo = df
|
||||
|
||||
foo = foo%>%
|
||||
add_count(country_code)
|
||||
|
||||
n = which(colnames(foo) == "n")
|
||||
colnames(foo)[n] = "count_country"
|
||||
|
||||
|
||||
table(foo$count_country)
|
||||
|
||||
|
||||
check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country))
|
||||
str(check)
|
||||
check$V2 = as.numeric(check$V2)
|
||||
min(check$V2); max(check$V2)
|
||||
table(check$V2)
|
||||
|
||||
check2 = subset(check, check$V2>100)
|
||||
check2 = subset(check2, is.na(check2$V1))
|
||||
min(check2$V2); max(check2$V2)
|
||||
|
||||
check3 = subset(check2, check2$V1 != "")
|
||||
table(check3$V1 != "")
|
||||
|
||||
|
||||
# subset df with country containing >100 samples
|
||||
# and no missing
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df2 = df
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
df2 = df2%>%
|
||||
add_count(country_code)
|
||||
|
||||
str(df2$country_code); str(df2$n)
|
||||
|
||||
n = which(colnames(df2) == "n")
|
||||
colnames(df2)[n] = "count_country"
|
||||
|
||||
table(df2$count_country>100 & df$country_code!= "")
|
||||
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
|
||||
|
||||
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df = df3
|
||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#==================================================
|
||||
my_ats = 15 # axis text size
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue