diff --git a/rfecv_with_ohe.py b/rfecv_with_ohe.py
new file mode 100644
index 0000000..d2c45d9
--- /dev/null
+++ b/rfecv_with_ohe.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Tue May 24 06:29:06 2022
+
+@author: tanu
+"""
+
+#https://stackoverflow.com/questions/68345259/rfecv-with-a-pipeline-containing-columntransformer
+def rfecv(X, y, estimator,
+          min_features_to_select=3, 
+          splits=3,
+          step=3,
+          scoring_metric="f1",
+          scoring_decimals=3,
+          random_state=None):
+    """
+    This method is an implementation the recursive feature eliminationalgorithm, 
+    which eliminates unneccessary features. As scikit-learn only provides an RFECV 
+    version [1] that makes using Pipelines very difficult, we have implemented our 
+    own version based on the original paper [2].
+    
+    [1] https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html
+    [2] Guyon, Isabelle, et al. "Gene selection for cancer classification using support vector machines." 
+        Machine learning 46.1 (2002): 389-422.
+
+    :X: a DataFrame containing the features.
+    :y: a Series containing the labels.
+    :estimator: a scikit-learn estimator or a Pipeline. If a pipeline is passed,
+        the last element of the pipeline is assumed to be a classifier providing
+        a feature_importances_ attribute.
+    :min_features_to_select: the minimum number of features to evaluate.
+    :split: number of splits for to use for cross validation.
+    :step: the amount of features to be reduced during each step.
+    :scoring_metric: the scoring metric to use for evaluation (e.g., "f_one" or 
+        a callable implementing the sklearn scoring interface).
+    :scoring_decimals: the scoring metric can be rounded to N decimals to avoid 
+        the reduction from getting stuck with a larger number of features with
+        very small score gains. Defaults to 3 digits. If None is passed, full
+        scoring precision is used.
+    :random_state: if not None, this is the seed for all RNGs used in this function.
+        
+    :returns: best_features, best_score, ranks, scores; best_features is a list
+        of features, best_score is the mean score achieved with these features over the
+        folds, ranks is the order of eliminated features (from most relevant to most irrelevant),
+        scores is the list of mean scores for each step achieved during the feature 
+        elimination across all folds.
+    """
+    # Initialize survivors and ranked list
+    survivors = list(X.columns)
+    ranks = []
+    scores = []
+    
+    # While the survivor list is longer than min_features_to_select
+    while len(survivors) >= min_features_to_select:
+                
+        # Get only the surviving features
+        X_tmp = X[survivors]
+        
+        # Train and get the scores, cross_validate clones 
+        # the model internally, so this does not modify
+        # the estimator passed to this function
+        #print("[%.2f] evaluating %i features ..." % (time(), len(X_tmp.columns)))
+        cv_result = cross_validate(estimator, X_tmp, y,
+                                   cv=StratifiedKFold(n_splits=splits, 
+                                            shuffle=True, 
+                                            random_state=random_state),
+                                   scoring=scoring_metric,
+        
+        # Append the mean performance to 
+        score = np.mean(cv_result["test_score"])
+        if scoring_decimals is None:
+            scores.append(score)
+        else:
+            scores.append(round(score, scoring_decimals))            
+        print("[%.2f] ... score %f." % (time(), scores[-1]))
+        
+        # Get feature weights from the model fitted 
+        # on the best fold and square the weights as described 
+        # in the paper. If the estimator is a Pipeline,
+        # we get the weights from the last element.
+        best_estimator = cv_result["estimator"][np.argmax(cv_result["test_score"])]
+        if isinstance(best_estimator, Pipeline):
+            weights = best_estimator[-1].feature_importances_
+        else:
+            weights = best_estimator.feature_importances_
+        weights = list(np.power(weights, 2))
+                
+        # Remove step features (but respect min_features_to_select)
+        for _ in range(max(min(step, len(survivors) - min_features_to_select), 1)):
+            
+            # Find the feature with the smallest ranking criterion
+            # and update the ranks and survivors
+            idx = np.argmin(weights)
+            ranks.insert(0, survivors.pop(idx))
+            weights.pop(idx)
+            
+    # Calculate the best set of surviving features
+    ranks_reverse = list(reversed(ranks))
+    last_max_idx = len(scores) - np.argmax(list(reversed(scores))) - 1
+    removed_features = set(ranks_reverse[0:last_max_idx * step])
+    best_features = [f for f in X.columns if f not in removed_features]
+    
+    # Return ranks and scores
+    return best_features, max(scores), ranks, scores
+
+from sklearn.datasets import load_breast_cancer
+from sklearn.tree import DecisionTreeClassifier
+test_data = load_breast_cancer(as_frame=True)
+clf = DecisionTreeClassifier(random_state=0)
+clf.fit(test_data.data, test_data.target)
+DecisionTreeClassifier(random_state=0)
+best_features, best_score, _, _ = rfecv(test_data.data, test_data.target, clf, step=1, min_features_to_select=1, random_state=0)