buggy bugs that bug me

This commit is contained in:
Tanushree Tunstall 2022-08-04 15:18:23 +01:00
parent e1b8e103ea
commit 599cd7493f
3 changed files with 25 additions and 26 deletions

View file

@ -1,11 +1,11 @@
#!/usr/bin/env Rscript #!/usr/bin/env Rscript
######################################################### #########################################################
# TASK: Script to format data for lineage plots # TASK: Script to format data for lineage plots
# Called by get_plotting_dfs.R # Called by get_plotting_plot_dfs.R
# lineage_plot_data() # lineage_plot_data()
# INPUT: # INPUT:
# df : merged_df2 (data with 1:many relationship b/w snp and lineage) # plot_df : merged_df2 (data with 1:many relationship b/w snp and lineage)
# NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship) # NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship)
# lineage_column_name : Column name that contains lineage info # lineage_column_name : Column name that contains lineage info
# remove_empty_lineage : where lineage info is missing, whether to omit those or not # remove_empty_lineage : where lineage info is missing, whether to omit those or not
@ -24,7 +24,7 @@
#2) select lineages to display? #2) select lineages to display?
######################################################### #########################################################
lineage_plot_data <- function(df lineage_plot_data <- function(plot_df
, lineage_column_name = "lineage" , lineage_column_name = "lineage"
, remove_empty_lineage = T , remove_empty_lineage = T
, lineage_label_col_name = "lineage_labels" , lineage_label_col_name = "lineage_labels"
@ -35,13 +35,12 @@ lineage_plot_data <- function(df
# Get WF and LF data with lineage count, and snp diversity # Get WF and LF data with lineage count, and snp diversity
################################################################ ################################################################
df[lineage_column_name] =
# Initialise output list # Initialise output list
lineage_dataL = list( lineage_dataL = list(
lin_wf = data.frame() lin_wf = data.frame()
, lin_lf = data.frame()) , lin_lf = data.frame())
table(df[[lineage_column_name]]) #table(plot_df[[lineage_column_name]])
#------------------------ #------------------------
# Check lineage counts # Check lineage counts
@ -49,23 +48,23 @@ lineage_plot_data <- function(df
#------------------------ #------------------------
if (missing(remove_empty_lineage)){ if (missing(remove_empty_lineage)){
miss_ll = table(df[[lineage_column_name]] == "")[[2]] miss_ll = table(plot_df[[lineage_column_name]] == "")[[2]]
rm_ll = which(df[[lineage_column_name]] == "") rm_ll = which(plot_df[[lineage_column_name]] == "")
if (length(rm_ll) == miss_ll){ if (length(rm_ll) == miss_ll){
cat("\nNo. of samples with missing lineage classification:" cat("\nNo. of samples with missing lineage classification:"
, miss_ll , miss_ll
, "Removing these...") , "Removing these...")
df = df[-rm_ll,] plot_df = plot_df[-rm_ll,]
df = droplevels(df) plot_df = droplevels(plot_df)
}else{ }else{
cat("\nSomething went wrong...numbers mismatch" cat("\nSomething went wrong...numbers mismatch"
, "samples with missing lineages:", mis_all , "samples with missing lineages:", mis_all
, "No. of corresponding indices to remove:", rm_ll) , "No. of corresponding indices to remove:", rm_ll)
} }
}else{ }else{
df = df plot_df = plot_df
df = droplevels(df) plot_df = droplevels(plot_df)
} }
#------------------------ #------------------------
@ -77,9 +76,9 @@ lineage_plot_data <- function(df
lin_labels = lineage_column_name lin_labels = lineage_column_name
#------------------------------------------ #------------------------------------------
if ( !is.factor((df[[lin_labels]])) ){ if ( !is.factor((plot_df[[lin_labels]])) ){
df[lin_labels] = as.factor(df[lin_labels]) plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
df[lin_labels] = factor() cat("\nWARNING: Lineage label not a factor. Correcting.")
}else{ }else{
cat("\nLineage label column already factor") cat("\nLineage label column already factor")
} }
@ -90,8 +89,8 @@ lineage_plot_data <- function(df
cat("\nLineage label column present" cat("\nLineage label column present"
, "\nUsing it, column name:", lin_labels) , "\nUsing it, column name:", lin_labels)
#------------------------------------------ #------------------------------------------
if ( !is.factor((df[[lin_labels]])) ){ if ( !is.factor((plot_df[[lin_labels]])) ){
df[lin_labels] = as.factor(df[lin_labels]) plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
}else{ }else{
cat("\nLineage label already factor") cat("\nLineage label already factor")
} }
@ -100,11 +99,11 @@ lineage_plot_data <- function(df
# This is how lineage labels will appear # This is how lineage labels will appear
cat("\nLineage labels will appear as below\n") cat("\nLineage labels will appear as below\n")
print( table(df[[lin_labels]]) ) print( table(plot_df[[lin_labels]]) )
cat("\n") cat("\n")
cat( "Class of", lin_labels, ":", class(df[[lin_labels]]) ) cat(paste0("Class of ", lin_labels, ": ", class(plot_df[[lin_labels]])) )
cat("\n") cat("\n")
print( "No. of levels:", nlevels(df[[lin_labels]]) ) print(paste0("No. of levels: ", nlevels(plot_df[[lin_labels]])) )
#========================================== #==========================================
# WF data: lineages with # WF data: lineages with
@ -114,7 +113,7 @@ lineage_plot_data <- function(df
#========================================== #==========================================
cat("\nCreating WF Lineage data...") cat("\nCreating WF Lineage data...")
sel_lineages = levels(df[[lin_labels]]) sel_lineages = levels(plot_df[[lin_labels]])
lin_wf = data.frame(sel_lineages) #4, 1 lin_wf = data.frame(sel_lineages) #4, 1
total_snps_u = NULL total_snps_u = NULL
@ -122,12 +121,12 @@ lineage_plot_data <- function(df
for (i in sel_lineages){ for (i in sel_lineages){
#print(i) #print(i)
curr_total = length(unique(df[[id_colname]])[df[[lin_labels]]==i]) curr_total = length(unique(plot_df[[id_colname]])[plot_df[[lin_labels]]==i])
#print(curr_total) #print(curr_total)
total_samples = c(total_samples, curr_total) total_samples = c(total_samples, curr_total)
print(total_samples) print(total_samples)
foo = df[df[[lin_labels]]==i,] foo = plot_df[plot_df[[lin_labels]]==i,]
print(paste0(i, "=======\n")) print(paste0(i, "=======\n"))
print(length(unique(foo[[snp_colname]]))) print(length(unique(foo[[snp_colname]])))
curr_count = length(unique(foo[[snp_colname]])) curr_count = length(unique(foo[[snp_colname]]))
@ -137,7 +136,7 @@ lineage_plot_data <- function(df
lin_wf lin_wf
# Add these counts as columns to the df # Add these counts as columns to the plot_df
lin_wf$num_snps_u = total_snps_u lin_wf$num_snps_u = total_snps_u
lin_wf$total_samples = total_samples lin_wf$total_samples = total_samples
lin_wf lin_wf
@ -181,7 +180,7 @@ lineage_plot_data <- function(df
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n ) expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
lin_lf <- gather(lin_wf lin_lf <- tidyr::gather(lin_wf
, count_categ , count_categ
, p_count , p_count
, num_snps_u:total_samples , num_snps_u:total_samples

View file

@ -124,7 +124,7 @@ cat(s2)
#source(paste0(plot_script_path, "lineage_data.R")) #source(paste0(plot_script_path, "lineage_data.R"))
# converted to a function. Moved lineage_data.R to redundant/ # converted to a function. Moved lineage_data.R to redundant/
lineage_dfL = lineage_plot_data(df = merged_df2 lineage_dfL = lineage_plot_data(merged_df2
, lineage_column_name = "lineage" , lineage_column_name = "lineage"
, remove_empty_lineage = F , remove_empty_lineage = F
, lineage_label_col_name = "lineage_labels" , lineage_label_col_name = "lineage_labels"

View file

@ -25,7 +25,7 @@ class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
# total_samples # total_samples
# snp diversity (perc) # snp diversity (perc)
#========================================== #==========================================
sel_lineages = levels(merged_df2$lineage_labels) sel_lineages = levels(as.factor(merged_df2$lineage_labels))
lin_wf = data.frame(sel_lineages) #4, 1 lin_wf = data.frame(sel_lineages) #4, 1
total_snps_u = NULL total_snps_u = NULL