Feature selection wrapper

feature_selection(dataset, method, class_attr = NULL, exclude = NULL, ...)

Arguments

dataset

we want to do feature selection on

method

selected method of feature selection

class_attr

character. Indicates the class attribute or attributes from dataset. Must exist in it.

exclude

character. Vector of attributes to exclude from the feature selection process

...

Further arguments for method

Value

The treated dataset (either with noisy instances replaced or erased)

Examples

library("smartdata") library("rpart") data(ecoli1, package = "imbalance") data(HouseVotes84, package = "mlbench") # Extracted from FSelector::best.first.search documentation evaluator <- function(subset) { k <- 5 splits <- runif(nrow(iris)) results = sapply(1:k, function(i) { test.idx <- (splits >= (i - 1) / k) & (splits < i / k) train.idx <- !test.idx test <- iris[test.idx, , drop=FALSE] train <- iris[train.idx, , drop=FALSE] tree <- rpart(FSelector::as.simple.formula(subset, "Species"), train) error.rate = sum(test$Species != predict(tree, test, type="c")) / nrow(test) return(1 - error.rate) }) print(subset) print(mean(results)) return(mean(results)) } super_iris <- feature_selection(iris, "Boruta", class_attr = "Species") super_iris <- feature_selection(iris, "chi_squared", class_attr = "Species", num_features = 3) # Pick 3 attributes from the continuous ones super_ecoli <- feature_selection(ecoli1, "information_gain", class_attr = "Class", num_features = 3) super_ecoli <- feature_selection(ecoli1, "gain_ratio", class_attr = "Class", num_features = 3) super_ecoli <- feature_selection(ecoli1, "sym_uncertainty", class_attr = "Class", num_features = 3) super_votes <- feature_selection(HouseVotes84, "oneR", exclude = c("V1", "V2"), class_attr = "Class", num_features = 3) super_votes <- feature_selection(iris, "RF_importance", class_attr = "Species", num_features = 3, type = 2) # \donttest{ super_iris <- feature_selection(iris, "best_first_search", exclude = "Species", eval_fun = evaluator)
#> [1] "Sepal.Length" #> [1] 0.674 #> [1] "Sepal.Width" #> [1] 0.491 #> [1] "Petal.Length" #> [1] 0.944 #> [1] "Petal.Width" #> [1] 0.959 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.952 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Petal.Length" "Petal.Width" #> [1] 0.942
super_iris <- feature_selection(iris, "forward_search", exclude = "Species", eval_fun = evaluator)
#> [1] "Sepal.Length" #> [1] 0.694 #> [1] "Sepal.Width" #> [1] 0.505 #> [1] "Petal.Length" #> [1] 0.927 #> [1] "Petal.Width" #> [1] 0.954 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.957 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Petal.Length" "Petal.Width" #> [1] 0.936 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Width" #> [1] 0.951 #> [1] "Sepal.Length" "Petal.Length" "Petal.Width" #> [1] 0.934
super_iris <- feature_selection(iris, "backward_search", exclude = "Species", eval_fun = evaluator)
#> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" #> [1] 0.939 #> [1] "Sepal.Width" "Petal.Length" "Petal.Width" #> [1] 0.937 #> [1] "Sepal.Length" "Petal.Length" "Petal.Width" #> [1] 0.926 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Width" #> [1] 0.96 #> [1] "Sepal.Length" "Sepal.Width" "Petal.Length" #> [1] 0.944 #> [1] "Sepal.Width" "Petal.Width" #> [1] 0.958 #> [1] "Sepal.Length" "Petal.Width" #> [1] 0.951 #> [1] "Sepal.Length" "Sepal.Width" #> [1] 0.721
# } super_iris <- feature_selection(iris, "cfs", class_attr = "Species") super_iris <- feature_selection(iris, "consistency", class_attr = "Species")