buggy bugs that bug me

2022-08-04 15:18:23 +01:00 · 2022-08-04 15:18:23 +01:00 · 599cd7493f
commit 599cd7493f
parent e1b8e103ea
3 changed files with 25 additions and 26 deletions
--- a/scripts/functions/lineage_plot_data.R
+++ b/scripts/functions/lineage_plot_data.R
@ -1,11 +1,11 @@
 #!/usr/bin/env Rscript  
 #########################################################
 # TASK: Script to format data for lineage plots
-# Called by get_plotting_dfs.R
+# Called by get_plotting_plot_dfs.R

 # lineage_plot_data()
 # INPUT: 
-  # df                    : merged_df2 (data with 1:many relationship b/w snp and lineage)
+  # plot_df                    : merged_df2 (data with 1:many relationship b/w snp and lineage)
    # NOTE*: DO NOT use merged_df3 as it loses the 1:many relationship)
  # lineage_column_name   : Column name that contains lineage info
  # remove_empty_lineage  : where lineage info is missing, whether to omit those or not
@ -24,7 +24,7 @@
 #2) select lineages to display?
 #########################################################

-lineage_plot_data <- function(df
+lineage_plot_data <- function(plot_df
                            , lineage_column_name = "lineage"
                            , remove_empty_lineage = T
                            , lineage_label_col_name = "lineage_labels"
@ -35,13 +35,12 @@ lineage_plot_data <- function(df
    # Get WF and LF data with lineage count, and snp diversity
    ################################################################
  
-    df[lineage_column_name] = 
    # Initialise output list
    lineage_dataL = list(
        lin_wf = data.frame()
      , lin_lf = data.frame())
    
-    table(df[[lineage_column_name]])
+    #table(plot_df[[lineage_column_name]])
  
    #------------------------
    # Check lineage counts
@ -49,23 +48,23 @@ lineage_plot_data <- function(df
    #------------------------
    if (missing(remove_empty_lineage)){
      
-        miss_ll = table(df[[lineage_column_name]] == "")[[2]]    
-        rm_ll   = which(df[[lineage_column_name]] == "")
+        miss_ll = table(plot_df[[lineage_column_name]] == "")[[2]]    
+        rm_ll   = which(plot_df[[lineage_column_name]] == "")
      
        if (length(rm_ll) == miss_ll){
        cat("\nNo. of samples with missing lineage classification:"
            , miss_ll
            , "Removing these...")
-        df = df[-rm_ll,]
-        df = droplevels(df)
+        plot_df = plot_df[-rm_ll,]
+        plot_df = droplevels(plot_df)
        }else{
          cat("\nSomething went wrong...numbers mismatch"
            , "samples with missing lineages:", mis_all
            , "No. of corresponding indices to remove:", rm_ll)
            }
        }else{
-      df = df
-      df = droplevels(df)
+      plot_df = plot_df
+      plot_df = droplevels(plot_df)
      }

    #------------------------
@ -77,9 +76,9 @@ lineage_plot_data <- function(df
      lin_labels = lineage_column_name
      
        #------------------------------------------
-        if ( !is.factor((df[[lin_labels]])) ){
-          df[lin_labels] = as.factor(df[lin_labels])
-          df[lin_labels] = factor()
+        if ( !is.factor((plot_df[[lin_labels]])) ){
+          plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
+          cat("\nWARNING: Lineage label not a factor. Correcting.")
          }else{
           cat("\nLineage label column already factor")
           }
@ -90,8 +89,8 @@ lineage_plot_data <- function(df
      cat("\nLineage label column present"
          , "\nUsing it, column name:", lin_labels)
       #------------------------------------------
-       if ( !is.factor((df[[lin_labels]])) ){
-          df[lin_labels] = as.factor(df[lin_labels])
+       if ( !is.factor((plot_df[[lin_labels]])) ){
+          plot_df[[lin_labels]] = as.factor(plot_df[[lin_labels]])
          }else{
          cat("\nLineage label already factor")  
          }
@ -100,11 +99,11 @@ lineage_plot_data <- function(df
    
    # This is how lineage labels will appear
    cat("\nLineage labels will appear as below\n")
-    print( table(df[[lin_labels]]) )
+    print( table(plot_df[[lin_labels]]) )
    cat("\n")
-    cat( "Class of", lin_labels, ":", class(df[[lin_labels]]) )
+    cat(paste0("Class of ", lin_labels, ": ", class(plot_df[[lin_labels]])) )
    cat("\n")
-    print( "No. of levels:", nlevels(df[[lin_labels]]) )
+    print(paste0("No. of levels: ", nlevels(plot_df[[lin_labels]])) )

    #==========================================
    # WF data: lineages with 
@ -114,7 +113,7 @@ lineage_plot_data <- function(df
    #==========================================
    cat("\nCreating WF Lineage data...")
    
-    sel_lineages = levels(df[[lin_labels]])
+    sel_lineages = levels(plot_df[[lin_labels]])

    lin_wf = data.frame(sel_lineages) #4, 1
    total_snps_u = NULL
@ -122,12 +121,12 @@ lineage_plot_data <- function(df

    for (i in sel_lineages){
      #print(i)
-      curr_total = length(unique(df[[id_colname]])[df[[lin_labels]]==i])
+      curr_total = length(unique(plot_df[[id_colname]])[plot_df[[lin_labels]]==i])
      #print(curr_total)
      total_samples = c(total_samples, curr_total)
      print(total_samples)

-      foo = df[df[[lin_labels]]==i,]
+      foo = plot_df[plot_df[[lin_labels]]==i,]
      print(paste0(i, "=======\n"))
      print(length(unique(foo[[snp_colname]])))
      curr_count = length(unique(foo[[snp_colname]]))
@ -137,7 +136,7 @@ lineage_plot_data <- function(df
    
    lin_wf

-    # Add these counts as columns to the df
+    # Add these counts as columns to the plot_df
    lin_wf$num_snps_u = total_snps_u
    lin_wf$total_samples = total_samples
    lin_wf
@ -181,7 +180,7 @@ lineage_plot_data <- function(df
    
    expected_rows =  nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
    
-    lin_lf <- gather(lin_wf
+    lin_lf <- tidyr::gather(lin_wf
                     , count_categ
                     , p_count
                     , num_snps_u:total_samples
--- a/scripts/plotting/get_plotting_dfs.R
+++ b/scripts/plotting/get_plotting_dfs.R
@ -124,7 +124,7 @@ cat(s2)
 
 #source(paste0(plot_script_path, "lineage_data.R"))
 # converted to a function. Moved lineage_data.R to redundant/
-lineage_dfL = lineage_plot_data(df = merged_df2
+lineage_dfL = lineage_plot_data(merged_df2
                                , lineage_column_name = "lineage"
                                , remove_empty_lineage = F
                                , lineage_label_col_name = "lineage_labels"
--- a/scripts/plotting/lineage_data.R
+++ b/scripts/plotting/lineage_data.R
@ -25,7 +25,7 @@ class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
 # total_samples
 # snp diversity (perc)
 #==========================================
-sel_lineages = levels(merged_df2$lineage_labels)
+sel_lineages = levels(as.factor(merged_df2$lineage_labels))

 lin_wf = data.frame(sel_lineages) #4, 1
 total_snps_u = NULL