@inproceedings{0c5bebf489a04c1eb99184c960eedd67,
title = "Characterizing large text corpora using a maximum variation sampling genetic algorithm",
abstract = "There exists an enormous amount of information available via the Internet. Much of this data is in the form of text-based documents. These documents cover a variety of topics that are vitally important to the scientific, business, and defense/security communities. Currently, there are a many techniques for processing and analyzing such data. However, the ability to quickly characterize a large set of documents still proves challenging. Previous work has successfully demonstrated the use of a genetic algorithm for providing a representative subset for text documents via adaptive sampling. In this work, we further expand and explore this approach on much larger data sets using a parallel Genetic Algorithm (GA) with adaptive parameter control. Experimental results are presented and discussed.",
keywords = "Intelligent agents, Parallel genetic algorithm, Text analysis",
author = "Patton, {Robert M.} and Potok, {Thomas E.}",
year = "2006",
doi = "10.1145/1143997.1144308",
language = "English",
isbn = "1595931864",
series = "GECCO 2006 - Genetic and Evolutionary Computation Conference",
publisher = "Association for Computing Machinery (ACM)",
pages = "1877--1878",
booktitle = "GECCO 2006 - Genetic and Evolutionary Computation Conference",
note = "8th Annual Genetic and Evolutionary Computation Conference 2006 ; Conference date: 08-07-2006 Through 12-07-2006",
}