buggy bugs that bug me
This commit is contained in:
parent
e1b8e103ea
commit
599cd7493f
3 changed files with 25 additions and 26 deletions
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env Rscript
|
#!/usr/bin/env Rscript
|
||||||
#########################################################
|
#########################################################
|
||||||
# TASK: Script to format data for lineage plots
|
# TASK: Script to format data for lineage plots
|
||||||
# Called by get_plotting_dfs.R
|
# Called by get_plotting_plot_dfs.R
|
||||||
|
|
||||||
# lineage_plot_data()
|
# lineage_plot_data()
|
||||||
# INPUT:
|
# INPUT:
|
||||||
# df : merged_df2 (data with 1:many relationship b/w snp and lineage)
|
# plot_df : merged_df2 (data with 1:many relationship b/w snp and lineage)
|
||||||
# NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship)
|
# NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship)
|
||||||
# lineage_column_name : Column name that contains lineage info
|
# lineage_column_name : Column name that contains lineage info
|
||||||
# remove_empty_lineage : where lineage info is missing, whether to omit those or not
|
# remove_empty_lineage : where lineage info is missing, whether to omit those or not
|
||||||
|
@ -24,7 +24,7 @@
|
||||||
#2) select lineages to display?
|
#2) select lineages to display?
|
||||||
#########################################################
|
#########################################################
|
||||||
|
|
||||||
lineage_plot_data <- function(df
|
lineage_plot_data <- function(plot_df
|
||||||
, lineage_column_name = "lineage"
|
, lineage_column_name = "lineage"
|
||||||
, remove_empty_lineage = T
|
, remove_empty_lineage = T
|
||||||
, lineage_label_col_name = "lineage_labels"
|
, lineage_label_col_name = "lineage_labels"
|
||||||
|
@ -35,13 +35,12 @@ lineage_plot_data <- function(df
|
||||||
# Get WF and LF data with lineage count, and snp diversity
|
# Get WF and LF data with lineage count, and snp diversity
|
||||||
################################################################
|
################################################################
|
||||||
|
|
||||||
df[lineage_column_name] =
|
|
||||||
# Initialise output list
|
# Initialise output list
|
||||||
lineage_dataL = list(
|
lineage_dataL = list(
|
||||||
lin_wf = data.frame()
|
lin_wf = data.frame()
|
||||||
, lin_lf = data.frame())
|
, lin_lf = data.frame())
|
||||||
|
|
||||||
table(df[[lineage_column_name]])
|
#table(plot_df[[lineage_column_name]])
|
||||||
|
|
||||||
#------------------------
|
#------------------------
|
||||||
# Check lineage counts
|
# Check lineage counts
|
||||||
|
@ -49,23 +48,23 @@ lineage_plot_data <- function(df
|
||||||
#------------------------
|
#------------------------
|
||||||
if (missing(remove_empty_lineage)){
|
if (missing(remove_empty_lineage)){
|
||||||
|
|
||||||
miss_ll = table(df[[lineage_column_name]] == "")[[2]]
|
miss_ll = table(plot_df[[lineage_column_name]] == "")[[2]]
|
||||||
rm_ll = which(df[[lineage_column_name]] == "")
|
rm_ll = which(plot_df[[lineage_column_name]] == "")
|
||||||
|
|
||||||
if (length(rm_ll) == miss_ll){
|
if (length(rm_ll) == miss_ll){
|
||||||
cat("\nNo. of samples with missing lineage classification:"
|
cat("\nNo. of samples with missing lineage classification:"
|
||||||
, miss_ll
|
, miss_ll
|
||||||
, "Removing these...")
|
, "Removing these...")
|
||||||
df = df[-rm_ll,]
|
plot_df = plot_df[-rm_ll,]
|
||||||
df = droplevels(df)
|
plot_df = droplevels(plot_df)
|
||||||
}else{
|
}else{
|
||||||
cat("\nSomething went wrong...numbers mismatch"
|
cat("\nSomething went wrong...numbers mismatch"
|
||||||
, "samples with missing lineages:", mis_all
|
, "samples with missing lineages:", mis_all
|
||||||
, "No. of corresponding indices to remove:", rm_ll)
|
, "No. of corresponding indices to remove:", rm_ll)
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
df = df
|
plot_df = plot_df
|
||||||
df = droplevels(df)
|
plot_df = droplevels(plot_df)
|
||||||
}
|
}
|
||||||
|
|
||||||
#------------------------
|
#------------------------
|
||||||
|
@ -77,9 +76,9 @@ lineage_plot_data <- function(df
|
||||||
lin_labels = lineage_column_name
|
lin_labels = lineage_column_name
|
||||||
|
|
||||||
#------------------------------------------
|
#------------------------------------------
|
||||||
if ( !is.factor((df[[lin_labels]])) ){
|
if ( !is.factor((plot_df[[lin_labels]])) ){
|
||||||
df[lin_labels] = as.factor(df[lin_labels])
|
plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
|
||||||
df[lin_labels] = factor()
|
cat("\nWARNING: Lineage label not a factor. Correcting.")
|
||||||
}else{
|
}else{
|
||||||
cat("\nLineage label column already factor")
|
cat("\nLineage label column already factor")
|
||||||
}
|
}
|
||||||
|
@ -90,8 +89,8 @@ lineage_plot_data <- function(df
|
||||||
cat("\nLineage label column present"
|
cat("\nLineage label column present"
|
||||||
, "\nUsing it, column name:", lin_labels)
|
, "\nUsing it, column name:", lin_labels)
|
||||||
#------------------------------------------
|
#------------------------------------------
|
||||||
if ( !is.factor((df[[lin_labels]])) ){
|
if ( !is.factor((plot_df[[lin_labels]])) ){
|
||||||
df[lin_labels] = as.factor(df[lin_labels])
|
plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
|
||||||
}else{
|
}else{
|
||||||
cat("\nLineage label already factor")
|
cat("\nLineage label already factor")
|
||||||
}
|
}
|
||||||
|
@ -100,11 +99,11 @@ lineage_plot_data <- function(df
|
||||||
|
|
||||||
# This is how lineage labels will appear
|
# This is how lineage labels will appear
|
||||||
cat("\nLineage labels will appear as below\n")
|
cat("\nLineage labels will appear as below\n")
|
||||||
print( table(df[[lin_labels]]) )
|
print( table(plot_df[[lin_labels]]) )
|
||||||
cat("\n")
|
cat("\n")
|
||||||
cat( "Class of", lin_labels, ":", class(df[[lin_labels]]) )
|
cat(paste0("Class of ", lin_labels, ": ", class(plot_df[[lin_labels]])) )
|
||||||
cat("\n")
|
cat("\n")
|
||||||
print( "No. of levels:", nlevels(df[[lin_labels]]) )
|
print(paste0("No. of levels: ", nlevels(plot_df[[lin_labels]])) )
|
||||||
|
|
||||||
#==========================================
|
#==========================================
|
||||||
# WF data: lineages with
|
# WF data: lineages with
|
||||||
|
@ -114,7 +113,7 @@ lineage_plot_data <- function(df
|
||||||
#==========================================
|
#==========================================
|
||||||
cat("\nCreating WF Lineage data...")
|
cat("\nCreating WF Lineage data...")
|
||||||
|
|
||||||
sel_lineages = levels(df[[lin_labels]])
|
sel_lineages = levels(plot_df[[lin_labels]])
|
||||||
|
|
||||||
lin_wf = data.frame(sel_lineages) #4, 1
|
lin_wf = data.frame(sel_lineages) #4, 1
|
||||||
total_snps_u = NULL
|
total_snps_u = NULL
|
||||||
|
@ -122,12 +121,12 @@ lineage_plot_data <- function(df
|
||||||
|
|
||||||
for (i in sel_lineages){
|
for (i in sel_lineages){
|
||||||
#print(i)
|
#print(i)
|
||||||
curr_total = length(unique(df[[id_colname]])[df[[lin_labels]]==i])
|
curr_total = length(unique(plot_df[[id_colname]])[plot_df[[lin_labels]]==i])
|
||||||
#print(curr_total)
|
#print(curr_total)
|
||||||
total_samples = c(total_samples, curr_total)
|
total_samples = c(total_samples, curr_total)
|
||||||
print(total_samples)
|
print(total_samples)
|
||||||
|
|
||||||
foo = df[df[[lin_labels]]==i,]
|
foo = plot_df[plot_df[[lin_labels]]==i,]
|
||||||
print(paste0(i, "=======\n"))
|
print(paste0(i, "=======\n"))
|
||||||
print(length(unique(foo[[snp_colname]])))
|
print(length(unique(foo[[snp_colname]])))
|
||||||
curr_count = length(unique(foo[[snp_colname]]))
|
curr_count = length(unique(foo[[snp_colname]]))
|
||||||
|
@ -137,7 +136,7 @@ lineage_plot_data <- function(df
|
||||||
|
|
||||||
lin_wf
|
lin_wf
|
||||||
|
|
||||||
# Add these counts as columns to the df
|
# Add these counts as columns to the plot_df
|
||||||
lin_wf$num_snps_u = total_snps_u
|
lin_wf$num_snps_u = total_snps_u
|
||||||
lin_wf$total_samples = total_samples
|
lin_wf$total_samples = total_samples
|
||||||
lin_wf
|
lin_wf
|
||||||
|
@ -181,7 +180,7 @@ lineage_plot_data <- function(df
|
||||||
|
|
||||||
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
||||||
|
|
||||||
lin_lf <- gather(lin_wf
|
lin_lf <- tidyr::gather(lin_wf
|
||||||
, count_categ
|
, count_categ
|
||||||
, p_count
|
, p_count
|
||||||
, num_snps_u:total_samples
|
, num_snps_u:total_samples
|
||||||
|
|
|
@ -124,7 +124,7 @@ cat(s2)
|
||||||
|
|
||||||
#source(paste0(plot_script_path, "lineage_data.R"))
|
#source(paste0(plot_script_path, "lineage_data.R"))
|
||||||
# converted to a function. Moved lineage_data.R to redundant/
|
# converted to a function. Moved lineage_data.R to redundant/
|
||||||
lineage_dfL = lineage_plot_data(df = merged_df2
|
lineage_dfL = lineage_plot_data(merged_df2
|
||||||
, lineage_column_name = "lineage"
|
, lineage_column_name = "lineage"
|
||||||
, remove_empty_lineage = F
|
, remove_empty_lineage = F
|
||||||
, lineage_label_col_name = "lineage_labels"
|
, lineage_label_col_name = "lineage_labels"
|
||||||
|
|
|
@ -25,7 +25,7 @@ class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
|
||||||
# total_samples
|
# total_samples
|
||||||
# snp diversity (perc)
|
# snp diversity (perc)
|
||||||
#==========================================
|
#==========================================
|
||||||
sel_lineages = levels(merged_df2$lineage_labels)
|
sel_lineages = levels(as.factor(merged_df2$lineage_labels))
|
||||||
|
|
||||||
lin_wf = data.frame(sel_lineages) #4, 1
|
lin_wf = data.frame(sel_lineages) #4, 1
|
||||||
total_snps_u = NULL
|
total_snps_u = NULL
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue