updating lineage_country.R with different data slices
This commit is contained in:
parent
645827570f
commit
44d1f64e88
1 changed files with 32 additions and 57 deletions
|
@ -23,7 +23,7 @@ source("combining_dfs_plotting.R")
|
||||||
# Data for plot
|
# Data for plot
|
||||||
#########################
|
#########################
|
||||||
df = merged_df2
|
df = merged_df2
|
||||||
df = merged_df2_comp
|
#df = merged_df2_comp
|
||||||
|
|
||||||
|
|
||||||
#========================
|
#========================
|
||||||
|
@ -32,10 +32,6 @@ df = merged_df2_comp
|
||||||
# col = Lineage
|
# col = Lineage
|
||||||
# fill = lineage
|
# fill = lineage
|
||||||
#========================
|
#========================
|
||||||
is.factor(df$lineage)
|
|
||||||
df$lineage = as.factor(df$lineage)
|
|
||||||
is.factor(df$lineage)
|
|
||||||
|
|
||||||
table(df$lineage)
|
table(df$lineage)
|
||||||
|
|
||||||
# subset only lineages1-4
|
# subset only lineages1-4
|
||||||
|
@ -57,6 +53,35 @@ table(df_lin$lineage)
|
||||||
df <- df_lin
|
df <- df_lin
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
#%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
# REASSIGNMENT
|
||||||
|
df2 = df
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
df2 = df2%>%
|
||||||
|
add_count(country_code)
|
||||||
|
|
||||||
|
str(df2$country_code); str(df2$n)
|
||||||
|
|
||||||
|
n = which(colnames(df2) == "n")
|
||||||
|
colnames(df2)[n] = "count_country"
|
||||||
|
|
||||||
|
table(df2$count_country>100 & df$country_code!= "")
|
||||||
|
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
|
||||||
|
|
||||||
|
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
# REASSIGNMENT
|
||||||
|
df = df3
|
||||||
|
#%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
sample = sum(table(unique(df$id))); sample
|
||||||
|
table(df$country_code)
|
||||||
|
tab = sum(table(df$country_code)); tab
|
||||||
|
|
||||||
|
|
||||||
|
View(table(df$country_code))
|
||||||
|
View(t1)
|
||||||
|
|
||||||
############## begin plot
|
############## begin plot
|
||||||
g = ggplot(df, aes(x = lineage))
|
g = ggplot(df, aes(x = lineage))
|
||||||
g + geom_bar(aes(fill = lineage)) +
|
g + geom_bar(aes(fill = lineage)) +
|
||||||
|
@ -112,7 +137,7 @@ g + geom_point(aes(col = lineage
|
||||||
### begin plot
|
### begin plot
|
||||||
table(df$lineage)
|
table(df$lineage)
|
||||||
|
|
||||||
g = ggplot(df, aes(x = lineage
|
g = ggplot(df_lin, aes(x = lineage
|
||||||
, y = duet_scaled))
|
, y = duet_scaled))
|
||||||
g + geom_point(aes(col = lineage
|
g + geom_point(aes(col = lineage
|
||||||
, size = or_mychisq)) +
|
, size = or_mychisq)) +
|
||||||
|
@ -131,60 +156,10 @@ g + geom_point(aes(col = lineage
|
||||||
, y = "DUET (PS)")
|
, y = "DUET (PS)")
|
||||||
|
|
||||||
#========================
|
#========================
|
||||||
# Plot 4-6: Distributions
|
# Plot 4-5: Distributions
|
||||||
# ggrdiges
|
# ggrdiges
|
||||||
#========================
|
#========================
|
||||||
|
|
||||||
df$country_code_symbols = as.character(df$country_code)
|
|
||||||
foo = df
|
|
||||||
|
|
||||||
foo = foo%>%
|
|
||||||
add_count(country_code)
|
|
||||||
|
|
||||||
n = which(colnames(foo) == "n")
|
|
||||||
colnames(foo)[n] = "count_country"
|
|
||||||
|
|
||||||
|
|
||||||
table(foo$count_country)
|
|
||||||
|
|
||||||
|
|
||||||
check = as.data.frame(cbind(foo$country_code_symbols, foo$count_country))
|
|
||||||
str(check)
|
|
||||||
check$V2 = as.numeric(check$V2)
|
|
||||||
min(check$V2); max(check$V2)
|
|
||||||
table(check$V2)
|
|
||||||
|
|
||||||
check2 = subset(check, check$V2>100)
|
|
||||||
check2 = subset(check2, is.na(check2$V1))
|
|
||||||
min(check2$V2); max(check2$V2)
|
|
||||||
|
|
||||||
check3 = subset(check2, check2$V1 != "")
|
|
||||||
table(check3$V1 != "")
|
|
||||||
|
|
||||||
|
|
||||||
# subset df with country containing >100 samples
|
|
||||||
# and no missing
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# REASSIGNMENT
|
|
||||||
df2 = df
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
df2 = df2%>%
|
|
||||||
add_count(country_code)
|
|
||||||
|
|
||||||
str(df2$country_code); str(df2$n)
|
|
||||||
|
|
||||||
n = which(colnames(df2) == "n")
|
|
||||||
colnames(df2)[n] = "count_country"
|
|
||||||
|
|
||||||
table(df2$count_country>100 & df$country_code!= "")
|
|
||||||
df3 = subset(df2, df2$count_country>100 & df2$country_code != "")
|
|
||||||
|
|
||||||
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
# REASSIGNMENT
|
|
||||||
df = df3
|
|
||||||
#%%%%%%%%%%%%%%%%%%%%%%%%
|
|
||||||
|
|
||||||
#==================================================
|
#==================================================
|
||||||
my_ats = 15 # axis text size
|
my_ats = 15 # axis text size
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue