diff --git a/data_extraction_formatting_clinical.R b/data_extraction_formatting_clinical.R
index b6ff2c3..8ee3f08 100644
--- a/data_extraction_formatting_clinical.R
+++ b/data_extraction_formatting_clinical.R
@@ -3,14 +3,30 @@ getwd()
 setwd('~/git/mosaic_2020/')
 getwd()
 ########################################################################
-# TASK: Extract relevant columns from mosaic adults data
-# npa
+# TASK: Extract clinical data columns and recode as required for analysis
+# corrects the asthma and copd status for patients 
+# creates age_bins and other intervals for clinical params
+# merges steroid ics data and outcome var based on T1 resp score
+# The steroid_ics data file is read from outdir and has been manually sourced
+
+# TODO: for extra caution add and run checks on the steroid_ics file
 ########################################################################
 #====================
-# Input: source data
+# Input: source data 
+# and steroid ics file
+# This file contains steroid_ics data
+# and another outcome variable based on T1_resp score
 #====================
 source("read_data.R")
 source("colnames_clinical_meds.R")
+
+# read: steroid_ics file
+infile_ics = paste0(outdir, "data_ics.csv")
+infile_ics 
+
+clinical_ics = read.csv(infile_ics)
+str(clinical_ics)
+
 ########################################################################
 # quick sanity checks
 table(adult_df$ia_exac_copd==1 & adult_df$asthma == 1) # check this is 4 
@@ -21,7 +37,7 @@ table(fp_adults$ia_exac_copd==1 & fp_adults$asthma == 1) # check this is 3
 rm(all_df, adult_df, metadata_all)
 
 ########################################################################
-# Clinical_data extraction
+#                           Clinical_data extraction
 ########################################################################
 cat("\nExtracting:", length(clinical_cols), "cols from fp_adults")
 
@@ -38,6 +54,11 @@ if ( sum(table(clinical_df$obesity)) & sum(table(clinical_df$age>=18)) & sum(tab
 table(clinical_df$ia_exac_copd)
 
 ########################################################################
+#==================================
+# asthma and copd status correction
+# for conflicting field!
+#=================================
+
 # Reassign the copd and asthma status and do some checks 
 table(clinical_df$ia_exac_copd); sum(is.na(clinical_df$ia_exac_copd))
 
@@ -58,6 +79,10 @@ foo<- subset(clinical_df, asthma==1 & ia_exac_copd ==1) # check that its 0
 
 rm(check_copd_and_asthma_1, foo)
 #=====================================================================
+#=================================
+# resp scores: In, max and t1 & t2
+#=================================
+
 # count the resp scores 
 max_resp_score_table<- table(clinical_df$max_resp_score)
 max_resp_score_table
@@ -297,15 +322,38 @@ table(clinical_df$asthma, clinical_df$smoking)
 #1  7 17 19
 
 ################################################################
-#==================
+
+#=========================
+# Merge: clinical_df and infile ics
+#=========================
+merging_cols = intersect( names(clinical_df), names(clinical_ics) )
+
+clinical_df_ics = merge(clinical_df, clinical_ics, by = merging_cols, all = T); clinical_df_ics 
+
+if (nrow(clinical_df_ics) == nrow(clinical_df) & nrow(clinical_ics)){
+  cat("\nPASS: No. of rows match, nrow =", nrow(clinical_df_ics)
+      , "\nChecking ncols...")
+  if ( ncol(clinical_df_ics) == ncol(clinical_df) + ncol(clinical_ics)  - length(merging_cols) ){
+    cat("\nPASS: No. of cols match, ncol =", ncol(clinical_df_ics))
+  } else {
+    cat("\nFAIL: ncols mismatch"
+        , "Expected ncols:", ncol(clinical_df) + ncol(clinical_ics)  - length(merging_cols)
+        , "\nGot:", ncol(clinical_df_ics))
+  }
+} else {
+  cat("\nFAIL: nrows mismatch"
+      , "\nExpected nrows:", nrow(fp_adults))
+}
+
+#======================
 # writing output file
-#==================
+#======================
 outfile_name_reg = "clinical_df_recoded.csv"
 outfile_reg = paste0(outdir, outfile_name_reg)
 
 cat("\nWriting clinical file for regression:", outfile_reg)
 
-#write.csv(clinical_df, file = outfile_reg)
+#write.csv(clinical_df_ics, file = outfile_reg)
 ################################################################
 
 rm(age_bins, max_age_interval, max_in, min_in, o2_sat_bin, onset_initial_bin, tot_o2, tot_onset2ini, meta_data_cols)