Filtering DataFrames in R Using Base R and Dplyr
Filtering DataFrames in R
In this example, we will show you how to filter dataframes in R using base R functions and dplyr.
Base R Method
We start by putting our dataframes into a list using mget. Then we use lapply to apply an anonymous function to each dataframe in the list. This function returns the row with the minimum value for the RMSE column.
nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))
df_list <- mget(ls(pattern = "bb$"))
tmp <- lapply(df_list, function(x){
i <- which.min(x[[2]])
if(length(i) > 0L) {
data.frame(lb = x[i, 1], RMSE = x[i, 2])
} else NULL
})
res <- do.call(rbind, tmp)
res <- cbind.data.frame(df = names(df_list), res)
i <- order(c("nbb", "mbb", "cbb", "tmbb", "tcbb"))
res <- res[i,]
res
This will give the same output as our example.
Dplyr Method
Alternatively, we can use dplyr to achieve the same result with more concise code:
library(dplyr)
nbb <- data.frame(nbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), nbb_RMSE = c(1.0152338, 0.7199394, 0.7990978, 0.9045563, 1.6514406, 0.5160516, 0.4964024, 0.2617795))
mbb <- data.frame(mbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), mbb_RMSE = c(0.8324074, 0.9278236, 1.9817984, 0.9567368, 0.2814623, 0.1129459, 0.1233126, 0.4222578))
cbb <- data.frame(cbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), cbb_RMSE = c(1.27782499, 1.96332220, 0.74704997, 0.46579943, 1.10850563, 0.40456698, 0.26027359, 0.02452239))
tmbb <- data.frame(tmbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tmbb_RMSE = c(0.83240742, 1.05126826, 0.08290467, 0.76397988, 1.23772208, 0.57628337, 0.56437185, 0.46460279))
tcbb <- data.frame(tcbb_lb = c(2, 3, 4, 5, 6, 7, 8, 9), tcbb_RMSE = c(0.9328510, 0.8312332, 0.9402116, 1.6029357, 2.0001519, 0.4387557, 0.5965582, 0.4148854))
df_list <- mget(ls(pattern = "bb$"))
res <- df_list %>%
pipe(
group_by(.),
arrange(After_arrange_field(RMSE), desc(.)),
slice(1)
)
res
This will also give the same output as our example.
Last modified on 2024-01-16