Feature selection wrapper
feature_selection(dataset, method, class_attr = NULL, exclude = NULL, ...)
dataset | we want to do feature selection on |
---|---|
method | selected method of feature selection |
class_attr |
|
exclude |
|
... | Further arguments for |
The treated dataset (either with noisy instances replaced or erased)
library("smartdata") library("rpart") data(ecoli1, package = "imbalance") data(HouseVotes84, package = "mlbench") # Extracted from FSelector::best.first.search documentation evaluator <- function(subset) { k <- 5 splits <- runif(nrow(iris)) results = sapply(1:k, function(i) { test.idx <- (splits >= (i - 1) / k) & (splits < i / k) train.idx <- !test.idx test <- iris[test.idx, , drop=FALSE] train <- iris[train.idx, , drop=FALSE] tree <- rpart(FSelector::as.simple.formula(subset, "Species"), train) error.rate = sum(test$Species != predict(tree, test, type="c")) / nrow(test) return(1 - error.rate) }) print(subset) print(mean(results)) return(mean(results)) } super_iris <- feature_selection(iris, "Boruta", class_attr = "Species") super_iris <- feature_selection(iris, "chi_squared", class_attr = "Species", num_features = 3) # Pick 3 attributes from the continuous ones super_ecoli <- feature_selection(ecoli1, "information_gain", class_attr = "Class", num_features = 3) super_ecoli <- feature_selection(ecoli1, "gain_ratio", class_attr = "Class", num_features = 3) super_ecoli <- feature_selection(ecoli1, "sym_uncertainty", class_attr = "Class", num_features = 3) super_votes <- feature_selection(HouseVotes84, "oneR", exclude = c("V1", "V2"), class_attr = "Class", num_features = 3) super_votes <- feature_selection(iris, "RF_importance", class_attr = "Species", num_features = 3, type = 2) # \donttest{ super_iris <- feature_selection(iris, "best_first_search", exclude = "Species", eval_fun = evaluator)#> [1] "Sepal.Length" #> [1] 0.674 #> [1] "Sepal.Width" #> [1] 0.491 #> [1] "Petal.Length" #> [1] 0.944 #> [1] "Petal.Width" #> [1] 0.959 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.952 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Petal.Length" "Petal.Width" #> [1] 0.942super_iris <- feature_selection(iris, "forward_search", exclude = "Species", eval_fun = evaluator)#> [1] "Sepal.Length" #> [1] 0.694 #> [1] "Sepal.Width" #> [1] 0.505 #> [1] "Petal.Length" #> [1] 0.927 #> [1] "Petal.Width" #> [1] 0.954 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.957 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Petal.Length" "Petal.Width" #> [1] 0.936 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Sepal.Length" "Petal.Length" "Petal.Width" #> [1] 0.934super_iris <- feature_selection(iris, "backward_search", exclude = "Species", eval_fun = evaluator)#> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" #> [1] 0.939 #> [1] "Sepal.Width" "Petal.Length" "Petal.Width" #> [1] 0.937 #> [1] "Sepal.Length" "Petal.Length" "Petal.Width" #> [1] 0.926 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Width" #> [1] 0.96 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" #> [1] 0.944 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.958 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.951 #> [1] "Sepal.Length" "Sepal.Width" #> [1] 0.721# } super_iris <- feature_selection(iris, "cfs", class_attr = "Species") super_iris <- feature_selection(iris, "consistency", class_attr = "Species")