@inproceedings{1ec0e0405c9243ac898adfd30a02e75d,
title = "Know-GRRF: Domain-Knowledge Informed Biomarker Discovery with Random Forests",
abstract = "Due to its robustness and built-in feature selection capability, random forest is frequently employed in omics studies for biomarker discovery and predictive modeling. However, random forest assumes equal importance of all features, while in reality domain knowledge may justify the prioritization of more relevant features. Furthermore, it has been shown that an antecedent feature selection step can improve the performance of random forest by reducing noises and search space. In this paper, we present a novel Know-guided regularized random forest (Know-GRRF) method that incorporates domain knowledge in a random forest framework for feature selection. Via rigorous simulations, we show that Know-GRRF outperforms existing methods by correctly identifying informative features and improving the accuracy of subsequent predictive models. Know-GRRF is responsive to a wide range of tuning parameters that help to better differentiate candidate features. Know-GRRF is also stable from run to run, making it robust to noises. We further proved that Know-GRRF is a generalized form of existing methods, RRF and GRRF. We applied Known-GRRF to a real world radiation biodosimetry study that uses non-human primate data to discover biomarkers for human applications. By using cross-species correlation as domain knowledge, Know-GRRF was able to identify three gene markers that significantly improved the cross-species prediction accuracy. We implemented Know-GRRF as an R package that is available through the CRAN archive.",
keywords = "Biomarker discovery, Domain knowledge, Feature selection, Regularized random forest",
author = "Xin Guan and Li Liu",
note = "Funding Information: Acknowledgments. We thank George Runger, Kristin Gillis, Vel Murugan, Jin Park and Garrick Wallstrom for insightful discussions. This project has been funded in part with federal funds from the Biomedical Advanced Research and Development Authority, office of the Assistant Secretary for Preparedness and Response, Office of the Secretary, Department of Health and Human Services under Contract No. HHS01201000008C. Publisher Copyright: {\textcopyright} Springer International Publishing AG, part of Springer Nature 2018. Copyright: Copyright 2018 Elsevier B.V., All rights reserved.; 6th International Work-Conference on Bioinformatics and Biomedical Engineering, IWBBIO 2018 ; Conference date: 25-04-2018 Through 27-04-2018",
year = "2018",
doi = "10.1007/978-3-319-78759-6_1",
language = "English (US)",
isbn = "9783319787589",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "3--14",
editor = "Ignacio Rojas and Francisco Ortuno",
booktitle = "Bioinformatics and Biomedical Engineering - 6th International Work-Conference, IWBBIO 2018, Proceedings",
}