triebeard/0000755000176200001440000000000014400752177012222 5ustar liggesuserstriebeard/NAMESPACE0000644000176200001440000000266114400735273013444 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(as.data.frame,trie) S3method(as.list,trie) S3method(dim,trie) S3method(get_keys,integer_trie) S3method(get_keys,logical_trie) S3method(get_keys,numeric_trie) S3method(get_keys,string_trie) S3method(get_values,integer_trie) S3method(get_values,logical_trie) S3method(get_values,numeric_trie) S3method(get_values,string_trie) S3method(greedy_match,integer_trie) S3method(greedy_match,logical_trie) S3method(greedy_match,numeric_trie) S3method(greedy_match,string_trie) S3method(length,integer_trie) S3method(length,logical_trie) S3method(length,numeric_trie) S3method(length,string_trie) S3method(longest_match,integer_trie) S3method(longest_match,logical_trie) S3method(longest_match,numeric_trie) S3method(longest_match,string_trie) S3method(prefix_match,integer_trie) S3method(prefix_match,logical_trie) S3method(prefix_match,numeric_trie) S3method(prefix_match,string_trie) S3method(print,trie) S3method(str,trie) S3method(trie_add,integer_trie) S3method(trie_add,logical_trie) S3method(trie_add,numeric_trie) S3method(trie_add,string_trie) S3method(trie_remove,integer_trie) S3method(trie_remove,logical_trie) S3method(trie_remove,numeric_trie) S3method(trie_remove,string_trie) export(get_keys) export(get_values) export(greedy_match) export(longest_match) export(prefix_match) export(trie) export(trie_add) export(trie_remove) importFrom(Rcpp,sourceCpp) useDynLib(triebeard, .registration = TRUE) triebeard/LICENSE0000644000176200001440000000005214377436506013234 0ustar liggesusersYEAR: 2016 COPYRIGHT HOLDER: Oliver Keyes triebeard/README.md0000644000176200001440000000307514400735243013501 0ustar liggesusers## triebeard Fast key-value matching in R and Rcpp __Author:__ Os Keyes, Drew Schmidt, Yuuki Takano
__License:__ [MIT](https://opensource.org/license/mit/)
__Status:__ Stable [![Travis-CI Build Status](https://travis-ci.org/Ironholds/triebeard.svg?branch=master)](https://travis-ci.org/Ironholds/triebeard) ![downloads](http://cranlogs.r-pkg.org/badges/grand-total/triebeard) ### Description Tries, or [radix trees](https://en.wikipedia.org/wiki/Radix_tree), are key-value data structures optimised for very, very fast matching of the keys against user-provided data (and then the return of the associated values!) This is pretty useful in data cleaning and value extraction, and tries let you do it *really* efficiently. `triebeard` contains an implementation that can be used both when writing R, and when writing Rcpp (and imported and linked against, to boot). For more information see: 1. The [vignette on Rcpp usage](https://CRAN.R-project.org/package=triebeard/vignettes/rcpp_radix.html); 2. The [vignette on R usage](https://CRAN.R-project.org/package=triebeard/vignettes/r_radix.html). Please note that this project is released with a [Contributor Code of Conduct](https://github.com/Ironholds/triebeard/blob/master/CONDUCT.md). By participating in this project you agree to abide by its terms. ### Installation The stable, CRAN-ready version can be retrieved with: install.packages("triebeard") The latest version can be obtained via: devtools::install_github("ironholds/triebeard") ### Dependencies * R. * [Rcpp](https://cran.r-project.org/package=Rcpp) triebeard/man/0000755000176200001440000000000014377436506013005 5ustar liggesuserstriebeard/man/alter.Rd0000644000176200001440000000171514377436506014407 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/alter.R \name{alter} \alias{alter} \alias{trie_add} \alias{trie_remove} \title{Add or remove trie entries} \usage{ trie_add(trie, keys, values) trie_remove(trie, keys) } \arguments{ \item{trie}{a trie object created with \code{\link{trie}}} \item{keys}{a character vector containing the keys of the entries to add (or remove). Entries with NA keys will not be added.} \item{values}{an atomic vector, matching the type of the trie, containing the values of the entries to add. Entries with NA values will not be added.} } \value{ nothing; the trie is modified in-place } \description{ \code{trie_add} and \code{trie_remove} allow you to add or remove entries from tries, respectively. } \examples{ trie <- trie("foo", "bar") length(trie) trie_add(trie, "baz", "qux") length(trie) trie_remove(trie, "baz") length(trie) } \seealso{ \code{\link{trie}} for creating tries in the first place. } triebeard/man/greedy_match.Rd0000644000176200001440000000272714377436506015737 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{greedy_match} \alias{greedy_match} \title{Greedily match against a tree} \usage{ greedy_match(trie, to_match, include_keys = FALSE) } \arguments{ \item{trie}{a trie object, created with \code{\link{trie}}} \item{to_match}{a character vector containing the strings to check against the trie's keys.} \item{include_keys}{a logical value indicating whether to include the keys in the returned results or not. If TRUE (\emph{not} the default) the returned object will be a list of data.frames, rather than of vectors.} } \value{ a list, the length of \code{to_match}, with each entry containing any trie values where the \code{to_match} element greedily matches the associated key. In the case that nothing was found, the entry will contain \code{NA}. In the case that \code{include_keys} is TRUE, the matching keys will also be included } \description{ \code{greedy_match} accepts a trie and a character vector and returns the values associated with any key that is "greedily" (read: fuzzily) matched against one of the character vector entries. } \examples{ trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) greedy_match(trie, c("avoid", "bring", "attack")) } \seealso{ \code{\link{longest_match}} and \code{\link{prefix_match}} for longest and prefix matching, respectively. } triebeard/man/getters.Rd0000644000176200001440000000074514377436506014757 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get.R \name{getters} \alias{getters} \alias{get_keys} \alias{get_values} \title{Trie Getters} \usage{ get_keys(trie) get_values(trie) } \arguments{ \item{trie}{A trie object, created with \code{\link{trie}}.} } \value{ An atomic vector of keys or values stored in the trie. } \description{ "Getters" for the data stored in a trie object. \code{get_keys} gets the keys, \code{get_values} gets the values. } triebeard/man/triebeard.Rd0000644000176200001440000000053214377436506015235 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/triebeard.R \docType{package} \name{triebeard} \alias{triebeard} \alias{triebeard-package} \title{Radix trees in Rcpp} \description{ This package provides access to Radix tree (or "trie") structures in Rcpp. At a later date it will hopefully provide them in R, too. } triebeard/man/trie.Rd0000644000176200001440000000175514377436506014247 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/create.R \name{trie} \alias{trie} \title{Create a Trie} \usage{ trie(keys, values) } \arguments{ \item{keys}{a character vector containing the keys for the trie.} \item{values}{an atomic vector of any type, containing the values to pair with \code{keys}. Must be the same length as \code{keys}.} } \value{ a `trie` object. } \description{ \code{create_trie} creates a trie (a key-value store optimised for matching) out of a provided character vector of keys, and a numeric, character, logical or integer vector of values (both the same length). } \examples{ # An integer trie int_trie <- trie(keys = "foo", values = 1) # A string trie str_trie <- trie(keys = "foo", values = "bar") } \seealso{ \code{\link{trie_add}} and \code{\link{trie_remove}} for adding to and removing from tries after their creation, and \code{\link{longest_match}} and other match functions for matching values against the keys of a created trie. } triebeard/man/longest_match.Rd0000644000176200001440000000232714377436506016127 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{longest_match} \alias{longest_match} \title{Find the longest match in a trie} \usage{ longest_match(trie, to_match, include_keys = FALSE) } \arguments{ \item{trie}{a trie object, created with \code{\link{trie}}} \item{to_match}{a character vector containing the strings to match against the trie's keys.} \item{include_keys}{a logical value indicating whether to include the keys in the returned results or not. If TRUE (\emph{not} the default) the returned object will be a data.frame, rather than a vector.} } \description{ \code{longest_match} accepts a trie and a character vector and returns the value associated with whichever key had the \emph{longest match} to each entry in the character vector. A trie of "binary" and "bind", for example, with an entry-to-compare of "binder", will match to "bind". } \examples{ trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) longest_match(trie, "binder") } \seealso{ \code{\link{prefix_match}} and \code{\link{greedy_match}} for prefix and greedy matching, respectively. } triebeard/man/prefix_match.Rd0000644000176200001440000000254014377436506015746 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/match.R \name{prefix_match} \alias{prefix_match} \title{Find the prefix matches in a trie} \usage{ prefix_match(trie, to_match, include_keys = FALSE) } \arguments{ \item{trie}{a trie object, created with \code{\link{trie}}} \item{to_match}{a character vector containing the strings to check against the trie's keys.} \item{include_keys}{a logical value indicating whether to include the keys in the returned results or not. If TRUE (\emph{not} the default) the returned object will be a list of data.frames, rather than of vector.} } \value{ a list, the length of \code{to_match}, with each entry containing any trie values where the \code{to_match} element was a prefix of the associated key. In the case that nothing was found, the entry will contain \code{NA}. } \description{ \code{prefix_match} accepts a trie and a character vector and returns the values associated with any key that has a particular character vector entry as a prefix (see the examples). } \examples{ trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) prefix_match(trie, "aff") } \seealso{ \code{\link{longest_match}} and \code{\link{greedy_match}} for longest and greedy matching, respectively. } triebeard/DESCRIPTION0000644000176200001440000000155214400752177013733 0ustar liggesusersPackage: triebeard Type: Package Title: 'Radix' Trees in 'Rcpp' Version: 0.4.1 Author: Os Keyes [aut, cre], Drew Schmidt [aut], Yuuki Takano [cph] Maintainer: Os Keyes Description: 'Radix trees', or 'tries', are key-value data structures optimised for efficient lookups, similar in purpose to hash tables. 'triebeard' provides an implementation of 'radix trees' for use in R programming and in developing packages with 'Rcpp'. License: MIT + file LICENSE LinkingTo: Rcpp Encoding: UTF-8 Imports: Rcpp RoxygenNote: 7.1.2 Suggests: knitr, rmarkdown, testthat VignetteBuilder: knitr URL: https://github.com/Ironholds/triebeard/ BugReports: https://github.com/Ironholds/triebeard/issues Date: 2023-03-04 NeedsCompilation: yes Packaged: 2023-03-04 21:40:31 UTC; ironholds Repository: CRAN Date/Publication: 2023-03-04 23:30:07 UTC triebeard/build/0000755000176200001440000000000014400735317013316 5ustar liggesuserstriebeard/build/vignette.rds0000644000176200001440000000033714400735317015660 0ustar liggesusersu0EC||_Aظ1ą;@Mx%Aw~98N;gz޻CщiD7ixP,8ȉ ^<M;I$U5KI~[sX׫3dـ{7혅mϣ?왆6͙ġЌxԇƸ>5fvwa/lQ62;ɾPk׉JHA'5 е?triebeard/tests/0000755000176200001440000000000014377436506013374 5ustar liggesuserstriebeard/tests/testthat/0000755000176200001440000000000014400752177015224 5ustar liggesuserstriebeard/tests/testthat/test_greedy.R0000644000176200001440000000423114377436506017675 0ustar liggesuserstestthat::context("Test that greedy-matching works") testthat::test_that("greedy matching works for string tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) output <- greedy_match(trie, "avoid") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == "available")) }) testthat::test_that("greedy matching works for integer tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(1, 2, 3, 4, 5, 6)) output <- greedy_match(trie, "avoid") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == 3)) }) testthat::test_that("greedy matching works for numeric tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = as.numeric(c(1, 2, 3, 4, 5, 6))) output <- greedy_match(trie, "avoid") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == 3)) }) testthat::test_that("greedy matching works for logical tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)) output <- greedy_match(trie, "avoid") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(output[[1]]) }) testthat::test_that("greedy matching works with include_keys", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)) output <- greedy_match(trie, "avoid", TRUE) holding = output[[1]] testthat::expect_true(is.data.frame(holding)) testthat::expect_true(holding$match_value[1]) testthat::expect_equal(holding$match_key[1], "available") }) testthat::test_that("greedy matching objects to non-trie objects", { expect_error(greedy_match("foo", "bar")) })triebeard/tests/testthat/test_create.R0000644000176200001440000000201514377436506017657 0ustar liggesuserscontext("Test trie creation") testthat::test_that("String tries can be created and safely avoid collection", { string_trie <- trie(LETTERS, LETTERS) testthat::expect_true(any(class(string_trie) == "string_trie")) testthat::expect_true(any(class(string_trie) == "trie")) }) testthat::test_that("Integer tries can be created", { int_trie <- trie(LETTERS, 1:length(LETTERS)) testthat::expect_true(any(class(int_trie) == "integer_trie")) testthat::expect_true(any(class(int_trie) == "trie")) }) testthat::test_that("Double tries can be created", { vals <- as.double(1:length(LETTERS)) double_trie <- trie(LETTERS, vals) testthat::expect_true(any(class(double_trie) == "numeric_trie")) testthat::expect_true(any(class(double_trie) == "trie")) }) testthat::test_that("Logical tries can be created", { vals <- as.logical(rep(c(0,1), (length(LETTERS)/2))) bool_trie <- trie(LETTERS, vals) testthat::expect_true(any(class(bool_trie) == "logical_trie")) testthat::expect_true(any(class(bool_trie) == "trie")) }) triebeard/tests/testthat/test_prefix.R0000644000176200001440000000422414377436506017715 0ustar liggesuserstestthat::context("Test that prefix-matching works") testthat::test_that("prefix matching works for string tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) output <- prefix_match(trie, "bin") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == c("binary", "bind"))) }) testthat::test_that("prefix matching works for integer tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(1, 2, 3, 4, 5, 6)) output <- prefix_match(trie, "bin") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == c(4, 5))) }) testthat::test_that("prefix matching works for numeric tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = as.numeric(c(1, 2, 3, 4, 5, 6))) output <- prefix_match(trie, "bin") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == c(4, 5))) }) testthat::test_that("prefix matching works for logical tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)) output <- prefix_match(trie, "bin") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(all(output[[1]] == c(FALSE, TRUE))) }) testthat::test_that("prefix matching objects to non-trie objects", { expect_error(prefix_match("foo", "bar")) }) testthat::test_that("prefix matching produces NAs with impossibilities", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)) output <- prefix_match(trie, "bingo") testthat::expect_equal(length(output), 1) testthat::expect_true(is.list(output)) testthat::expect_true(is.na(output[[1]])) }) triebeard/tests/testthat/test_longest.R0000644000176200001440000000335514377436506020077 0ustar liggesuserstestthat::context("Test that longest-matching works") testthat::test_that("Longest matching works for string tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "binary", "bind", "blind")) testthat::expect_equal(longest_match(trie, "binder"), "bind") }) testthat::test_that("Longest matching works for integer tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(1, 2, 3, 4, 5, 6)) testthat::expect_equal(longest_match(trie, "binder"), 5) }) testthat::test_that("Longest matching works for numeric tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = as.numeric(c(1, 2, 3, 4, 5, 6))) testthat::expect_equal(longest_match(trie, "binder"), 5.0) }) testthat::test_that("Longest matching works for logical tries", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)) testthat::expect_true(longest_match(trie, "binder")) }) testthat::test_that("Longest matching understands the new include_keys param", { trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), values = c("afford", "affair", "available", "muffin", "bind", "frog")) result <- longest_match(trie, "binaryness", TRUE) testthat::expect_true(is.data.frame(result)) testthat::expect_equal(result$match_key, "binary") testthat::expect_equal(result$match_value, "muffin") }) testthat::test_that("Longest matching rejects non-trie objects", { expect_error(longest_match("foo", "bar")) })triebeard/tests/testthat/test_get.R0000644000176200001440000000200714377436506017174 0ustar liggesuserscontext("Test key and value retrieval") testthat::test_that("String keys and values can be retrieved", { string_trie <- trie(LETTERS, LETTERS) testthat::expect_equal(LETTERS, get_values(string_trie)) testthat::expect_equal(LETTERS, get_keys(string_trie)) }) testthat::test_that("Integer keys and values can be retrieved", { int_trie <- trie(LETTERS, 1:length(LETTERS)) testthat::expect_equal(1:length(LETTERS), get_values(int_trie)) testthat::expect_equal(LETTERS, get_keys(int_trie)) }) testthat::test_that("Numeric keys and values can be retrieved", { vals <- as.double(1:length(LETTERS)) double_trie <- trie(LETTERS, vals) testthat::expect_equal(vals, get_values(double_trie)) testthat::expect_equal(LETTERS, get_keys(double_trie)) }) testthat::test_that("Boolean keys and values can be retrieved", { vals <- as.logical(rep(c(0,1), (length(LETTERS)/2))) bool_trie <- trie(LETTERS, vals) testthat::expect_equal(vals, get_values(bool_trie)) testthat::expect_equal(LETTERS, get_keys(bool_trie)) })triebeard/tests/testthat/test_alter.R0000644000176200001440000000324214377436506017526 0ustar liggesuserscontext("Test trie alteration") testthat::test_that("String tries can be altered", { trie <- trie("foo", "bar") original_length <- length(trie) trie_add(trie, "baz", "qux") increased_length <- length(trie) trie_remove(trie, "baz") testthat::expect_true(original_length < increased_length) testthat::expect_true(length(trie) == original_length) }) testthat::test_that("String tries can be altered", { trie <- trie("foo", "bar") original_length <- length(trie) trie_add(trie, "baz", "qux") increased_length <- length(trie) trie_remove(trie, "baz") testthat::expect_true(original_length < increased_length) testthat::expect_true(length(trie) == original_length) }) testthat::test_that("Integer tries can be altered", { trie <- trie("foo", 1) original_length <- length(trie) trie_add(trie, "baz", 2) increased_length <- length(trie) trie_remove(trie, "baz") testthat::expect_true(original_length < increased_length) testthat::expect_true(length(trie) == original_length) }) testthat::test_that("Numeric tries can be altered", { trie <- trie("foo", as.numeric(1)) original_length <- length(trie) trie_add(trie, "baz", as.numeric(2)) increased_length <- length(trie) trie_remove(trie, "baz") testthat::expect_true(original_length < increased_length) testthat::expect_true(length(trie) == original_length) }) testthat::test_that("Logical tries can be altered", { trie <- trie("foo", FALSE) original_length <- length(trie) trie_add(trie, "baz", TRUE) increased_length <- length(trie) trie_remove(trie, "baz") testthat::expect_true(original_length < increased_length) testthat::expect_true(length(trie) == original_length) }) triebeard/tests/testthat/test_convert.R0000644000176200001440000000144214377436506020077 0ustar liggesuserstestthat::context("Test conversion of tries into other R objects") testthat::test_that("Tries can be turned into lists", { trie_inst <- trie("foo", "bar") trlist <- as.list(trie_inst) testthat::expect_true(is.list(trlist)) testthat::expect_equal(length(trlist), 2) testthat::expect_equal(names(trlist), c("keys", "values")) testthat::expect_equal(trlist$values, "bar") testthat::expect_equal(trlist$keys, "foo") }) testthat::test_that("Tries can be turned into lists", { trie_inst <- trie("foo", "bar") trlist <- as.data.frame(trie_inst) testthat::expect_true(is.data.frame(trlist)) testthat::expect_equal(ncol(trlist), 2) testthat::expect_equal(names(trlist), c("keys", "values")) testthat::expect_equal(trlist$values, "bar") testthat::expect_equal(trlist$keys, "foo") })triebeard/tests/testthat.R0000644000176200001440000000007614377436506015362 0ustar liggesuserslibrary(testthat) library(triebeard) test_check("triebeard") triebeard/src/0000755000176200001440000000000014400735317013006 5ustar liggesuserstriebeard/src/str.cpp0000644000176200001440000000506214377436506014340 0ustar liggesusers#include "r_trie.h" #define PRINTMAX 75 template static inline int numlen(T num){ return ((int)std::log10(num))+1; } // TODO NA's static inline int printsize(std::string x){ return x.length(); } static inline int printsize(int x){ if (x == NA_INTEGER) return 2; else return numlen(x); } static inline int printsize(double x){ if (ISNA(x)) return 2; else return numlen(x); } static inline int printsize(bool x){ if (x == NA_LOGICAL) return 2; else return 1; } // TODO NA's static inline void valprinter(std::string val){ Rcout << "\""; Rcout << val; Rcout << "\"" << " "; } static inline void valprinter(int val){ if (val == NA_INTEGER) Rcout << "NA"; else Rcout << val; } static inline void valprinter(double val){ if (ISNA(val)) Rcout << "NA"; else Rcout << val; } static inline void valprinter(bool val){ if (val == NA_INTEGER) Rcout << "NA"; else { if (val) Rcout << "TRUE"; else Rcout << "FALSE"; } } template static inline void trie_str_generic(SEXP radix, std::string type_str){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); int input_size = rt_ptr->size(); int iter; int printed = 0; Rcout << " Keys: chr [1:" << input_size << "] "; printed += 19 + numlen(input_size); typename radix_tree< std::string, T >::iterator it; iter = 0; for (it = rt_ptr->radix.begin(); it != rt_ptr->radix.end() && printed < PRINTMAX; ++it) { printed += it->first.length(); if (iter > 0 && printed > PRINTMAX) break; Rcout << "\"" << it->first << "\"" << " "; iter++; } if (iter < input_size) Rcout << "..."; Rcout << std::endl; printed = 0; Rcout << " Values: " << type_str << " [1:" << input_size << "] "; printed += 15 + type_str.length() + numlen(input_size); iter = 0; for (it = rt_ptr->radix.begin(); it != rt_ptr->radix.end() && iter < 5; ++it) { printed += printsize(it->second); if (iter > 0 && printed > PRINTMAX) break; valprinter(it->second); Rcout << " "; iter++; } if (iter < input_size) Rcout << "..."; Rcout << std::endl; } //[[Rcpp::export]] void trie_str_string(SEXP radix){ trie_str_generic(radix, "chr"); } //[[Rcpp::export]] void trie_str_integer(SEXP radix){ trie_str_generic(radix, "int"); } //[[Rcpp::export]] void trie_str_numeric(SEXP radix){ trie_str_generic(radix, "num"); } //[[Rcpp::export]] void trie_str_logical(SEXP radix){ trie_str_generic(radix, "logi"); } triebeard/src/alter.cpp0000644000176200001440000000557714377436506014652 0ustar liggesusers#include "r_trie.h" //[[Rcpp::export]] void add_trie_string(SEXP trie, CharacterVector keys, CharacterVector values){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(keys[i] != NA_STRING && values[i] != NA_STRING){ rt_ptr->insert_value(Rcpp::as(keys[i]), Rcpp::as(values[i])); } } rt_ptr->radix_size = rt_ptr->size(); } //[[Rcpp::export]] void add_trie_integer(SEXP trie, CharacterVector keys, IntegerVector values){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(keys[i] != NA_STRING && values[i] != NA_INTEGER){ rt_ptr->insert_value(Rcpp::as(keys[i]), values[i]); } } rt_ptr->radix_size = rt_ptr->size(); } //[[Rcpp::export]] void add_trie_numeric(SEXP trie, CharacterVector keys, NumericVector values){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(keys[i] != NA_STRING && values[i] != NA_REAL){ rt_ptr->insert_value(Rcpp::as(keys[i]), values[i]); } } rt_ptr->radix_size = rt_ptr->size(); } //[[Rcpp::export]] void add_trie_logical(SEXP trie, CharacterVector keys, LogicalVector values){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(keys[i] != NA_STRING && values[i] != NA_LOGICAL){ rt_ptr->insert_value(Rcpp::as(keys[i]), values[i]); } } rt_ptr->radix_size = rt_ptr->size(); } //[[Rcpp::export]] void remove_trie_string(SEXP trie, CharacterVector keys){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); rt_ptr->remove_values(keys); } //[[Rcpp::export]] void remove_trie_integer(SEXP trie, CharacterVector keys){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); rt_ptr->remove_values(keys); } //[[Rcpp::export]] void remove_trie_numeric(SEXP trie, CharacterVector keys){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); rt_ptr->remove_values(keys); } //[[Rcpp::export]] void remove_trie_logical(SEXP trie, CharacterVector keys){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(trie); ptr_check(rt_ptr); rt_ptr->remove_values(keys); } triebeard/src/get.cpp0000644000176200001440000000260314377436506014305 0ustar liggesusers#include "r_trie.h" template static inline std::vector < std::string > get_keys_generic(SEXP radix){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); return rt_ptr->get_keys(); } //[[Rcpp::export]] std::vector < std::string > get_keys_string(SEXP radix){ return get_keys_generic(radix); } //[[Rcpp::export]] std::vector < std::string > get_keys_integer(SEXP radix){ return get_keys_generic(radix); } //[[Rcpp::export]] std::vector < std::string > get_keys_numeric(SEXP radix){ return get_keys_generic(radix); } //[[Rcpp::export]] std::vector < std::string > get_keys_logical(SEXP radix){ return get_keys_generic(radix); } template static inline std::vector < T > get_values_generic(SEXP radix){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); return rt_ptr->get_values(); } //[[Rcpp::export]] std::vector < std::string > get_values_string(SEXP radix){ return get_values_generic(radix); } //[[Rcpp::export]] std::vector < int > get_values_integer(SEXP radix){ return get_values_generic(radix); } //[[Rcpp::export]] std::vector < double > get_values_numeric(SEXP radix){ return get_values_generic(radix); } //[[Rcpp::export]] std::vector < bool > get_values_logical(SEXP radix){ return get_values_generic(radix); } triebeard/src/create.cpp0000644000176200001440000000160314377436506014770 0ustar liggesusers#include #include "typedef.h" using namespace Rcpp; //[[Rcpp::export]] SEXP radix_create_string(std::vector < std::string > keys, std::vector < std::string > values){ r_trie * radix = new r_trie (keys, values); return XPtrRadixStr(radix); } //[[Rcpp::export]] SEXP radix_create_integer(std::vector < std::string > keys, std::vector < int > values){ r_trie * radix = new r_trie (keys, values); return XPtrRadixInt(radix); } //[[Rcpp::export]] SEXP radix_create_numeric(std::vector < std::string > keys, std::vector < double > values){ r_trie * radix = new r_trie (keys, values); XPtrRadixDouble ptr(radix); return ptr; } //[[Rcpp::export]] SEXP radix_create_logical(std::vector < std::string > keys, std::vector < bool > values){ r_trie * radix = new r_trie (keys, values); return XPtrRadixBool(radix); } triebeard/src/typedef.h0000644000176200001440000000101414377436506014626 0ustar liggesusers#include "r_trie.h" #ifndef __RTRIE_TYPES__ #define __RTRIE_TYPES__ template void finaliseRadix(r_trie * radix_inst){ delete radix_inst; } typedef Rcpp::XPtr, Rcpp::PreserveStorage, finaliseRadix> XPtrRadixStr; typedef Rcpp::XPtr, Rcpp::PreserveStorage, finaliseRadix> XPtrRadixInt; typedef Rcpp::XPtr, Rcpp::PreserveStorage, finaliseRadix> XPtrRadixBool; typedef Rcpp::XPtr, Rcpp::PreserveStorage, finaliseRadix> XPtrRadixDouble; #endif triebeard/src/greedy_match.cpp0000644000176200001440000000671214377436506016166 0ustar liggesusers#include #include "r_trie.h" using namespace Rcpp; template List greedy_generic(SEXP radix, CharacterVector to_match, Y non_match_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); unsigned int input_size = to_match.size(); List output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } X holding; std::vector::iterator> vec; typename std::vector::iterator>::iterator it; if(to_match[i] == NA_STRING){ holding.push_back(non_match_val); } else { rt_ptr->radix.greedy_match(Rcpp::as(to_match[i]), vec); for (it = vec.begin(); it != vec.end(); ++it) { holding.push_back((*it)->second); } if(holding.size() == 0){ holding.push_back(non_match_val); } } output[i] = holding; } return output; } template List greedy_generic_df(SEXP radix, CharacterVector to_match, Y non_match_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); unsigned int input_size = to_match.size(); List output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } X holding; CharacterVector holding_keys; std::vector::iterator> vec; typename std::vector::iterator>::iterator it; if(to_match[i] == NA_STRING){ holding.push_back(non_match_val); holding_keys.push_back(NA_STRING); } else { rt_ptr->radix.greedy_match(Rcpp::as(to_match[i]), vec); for (it = vec.begin(); it != vec.end(); ++it) { holding.push_back((*it)->second); holding_keys.push_back((*it)->first); } if(holding.size() == 0){ holding.push_back(non_match_val); holding_keys.push_back(NA_STRING); } } output[i] = Rcpp::DataFrame::create(_["match_key"] = holding_keys, _["match_value"] = holding, _["stringsAsFactors"] = false); } return output; } //[[Rcpp::export]] List greedy_string(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return greedy_generic_df(radix, to_match, NA_STRING); } return greedy_generic(radix, to_match, NA_STRING); } //[[Rcpp::export]] List greedy_integer(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return greedy_generic_df(radix, to_match, NA_INTEGER); } return greedy_generic(radix, to_match, NA_INTEGER); } //[[Rcpp::export]] List greedy_numeric(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return greedy_generic_df(radix, to_match, NA_REAL); } return greedy_generic(radix, to_match, NA_REAL); } //[[Rcpp::export]] List greedy_logical(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return greedy_generic_df(radix, to_match, NA_INTEGER); } return greedy_generic(radix, to_match, NA_INTEGER); } triebeard/src/Makevars0000644000176200001440000000003714377436506014515 0ustar liggesusersPKG_CXXFLAGS=-I../inst/include triebeard/src/prefix_match.cpp0000644000176200001440000000710114377436506016175 0ustar liggesusers#include #include "r_trie.h" using namespace Rcpp; template List prefix_generic(SEXP radix, CharacterVector to_match, Z missing_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); typename radix_tree::iterator it; unsigned int input_size = to_match.size(); List output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } X holding; std::vector::iterator> vec; typename std::vector::iterator>::iterator it; if(to_match[i] == NA_STRING){ holding.push_back(missing_val); } else { rt_ptr->radix.prefix_match(Rcpp::as(to_match[i]), vec); for (it = vec.begin(); it != vec.end(); ++it) { holding.push_back((*it)->second); } if(holding.size() == 0){ holding.push_back(missing_val); } } output[i] = holding; } return output; } template List prefix_generic_df(SEXP radix, CharacterVector to_match, Z missing_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); typename radix_tree::iterator it; unsigned int input_size = to_match.size(); List output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } X holding_vals; CharacterVector holding_keys; std::vector::iterator> vec; typename std::vector::iterator>::iterator it; if(to_match[i] == NA_STRING){ holding_vals.push_back(missing_val); holding_keys.push_back(NA_STRING); } else { rt_ptr->radix.prefix_match(Rcpp::as(to_match[i]), vec); for (it = vec.begin(); it != vec.end(); ++it) { holding_vals.push_back((*it)->second); holding_keys.push_back((*it)->first); } if(holding_vals.size() == 0){ holding_vals.push_back(missing_val); holding_keys.push_back(NA_STRING); } } output[i] = Rcpp::DataFrame::create(_["match_key"] = holding_keys, _["match_value"] = holding_vals, _["stringsAsFactors"] = false); } return output; } //[[Rcpp::export]] List prefix_string(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return prefix_generic_df(radix, to_match, NA_STRING); } return prefix_generic(radix, to_match, NA_STRING); } //[[Rcpp::export]] List prefix_integer(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return prefix_generic_df(radix, to_match, NA_INTEGER); } return prefix_generic(radix, to_match, NA_INTEGER); } //[[Rcpp::export]] List prefix_numeric(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return prefix_generic_df(radix, to_match, NA_REAL); } return prefix_generic(radix, to_match, NA_REAL); } //[[Rcpp::export]] List prefix_logical(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return prefix_generic_df(radix, to_match, NA_INTEGER); } return prefix_generic(radix, to_match, NA_INTEGER); } triebeard/src/length.cpp0000644000176200001440000000107114377436506015005 0ustar liggesusers#include "r_trie.h" template static inline int radix_len(SEXP radix){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); return rt_ptr->radix_size; } //[[Rcpp::export]] int radix_len_string(SEXP radix){ return radix_len(radix); } //[[Rcpp::export]] int radix_len_integer(SEXP radix){ return radix_len(radix); } //[[Rcpp::export]] int radix_len_numeric(SEXP radix){ return radix_len(radix); } //[[Rcpp::export]] int radix_len_logical(SEXP radix){ return radix_len(radix); } triebeard/src/RcppExports.cpp0000644000176200001440000005516314400725565016021 0ustar liggesusers// Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 #include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // add_trie_string void add_trie_string(SEXP trie, CharacterVector keys, CharacterVector values); RcppExport SEXP _triebeard_add_trie_string(SEXP trieSEXP, SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); Rcpp::traits::input_parameter< CharacterVector >::type values(valuesSEXP); add_trie_string(trie, keys, values); return R_NilValue; END_RCPP } // add_trie_integer void add_trie_integer(SEXP trie, CharacterVector keys, IntegerVector values); RcppExport SEXP _triebeard_add_trie_integer(SEXP trieSEXP, SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); Rcpp::traits::input_parameter< IntegerVector >::type values(valuesSEXP); add_trie_integer(trie, keys, values); return R_NilValue; END_RCPP } // add_trie_numeric void add_trie_numeric(SEXP trie, CharacterVector keys, NumericVector values); RcppExport SEXP _triebeard_add_trie_numeric(SEXP trieSEXP, SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); Rcpp::traits::input_parameter< NumericVector >::type values(valuesSEXP); add_trie_numeric(trie, keys, values); return R_NilValue; END_RCPP } // add_trie_logical void add_trie_logical(SEXP trie, CharacterVector keys, LogicalVector values); RcppExport SEXP _triebeard_add_trie_logical(SEXP trieSEXP, SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); Rcpp::traits::input_parameter< LogicalVector >::type values(valuesSEXP); add_trie_logical(trie, keys, values); return R_NilValue; END_RCPP } // remove_trie_string void remove_trie_string(SEXP trie, CharacterVector keys); RcppExport SEXP _triebeard_remove_trie_string(SEXP trieSEXP, SEXP keysSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); remove_trie_string(trie, keys); return R_NilValue; END_RCPP } // remove_trie_integer void remove_trie_integer(SEXP trie, CharacterVector keys); RcppExport SEXP _triebeard_remove_trie_integer(SEXP trieSEXP, SEXP keysSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); remove_trie_integer(trie, keys); return R_NilValue; END_RCPP } // remove_trie_numeric void remove_trie_numeric(SEXP trie, CharacterVector keys); RcppExport SEXP _triebeard_remove_trie_numeric(SEXP trieSEXP, SEXP keysSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); remove_trie_numeric(trie, keys); return R_NilValue; END_RCPP } // remove_trie_logical void remove_trie_logical(SEXP trie, CharacterVector keys); RcppExport SEXP _triebeard_remove_trie_logical(SEXP trieSEXP, SEXP keysSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type trie(trieSEXP); Rcpp::traits::input_parameter< CharacterVector >::type keys(keysSEXP); remove_trie_logical(trie, keys); return R_NilValue; END_RCPP } // radix_create_string SEXP radix_create_string(std::vector < std::string > keys, std::vector < std::string > values); RcppExport SEXP _triebeard_radix_create_string(SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< std::vector < std::string > >::type keys(keysSEXP); Rcpp::traits::input_parameter< std::vector < std::string > >::type values(valuesSEXP); rcpp_result_gen = Rcpp::wrap(radix_create_string(keys, values)); return rcpp_result_gen; END_RCPP } // radix_create_integer SEXP radix_create_integer(std::vector < std::string > keys, std::vector < int > values); RcppExport SEXP _triebeard_radix_create_integer(SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< std::vector < std::string > >::type keys(keysSEXP); Rcpp::traits::input_parameter< std::vector < int > >::type values(valuesSEXP); rcpp_result_gen = Rcpp::wrap(radix_create_integer(keys, values)); return rcpp_result_gen; END_RCPP } // radix_create_numeric SEXP radix_create_numeric(std::vector < std::string > keys, std::vector < double > values); RcppExport SEXP _triebeard_radix_create_numeric(SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< std::vector < std::string > >::type keys(keysSEXP); Rcpp::traits::input_parameter< std::vector < double > >::type values(valuesSEXP); rcpp_result_gen = Rcpp::wrap(radix_create_numeric(keys, values)); return rcpp_result_gen; END_RCPP } // radix_create_logical SEXP radix_create_logical(std::vector < std::string > keys, std::vector < bool > values); RcppExport SEXP _triebeard_radix_create_logical(SEXP keysSEXP, SEXP valuesSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< std::vector < std::string > >::type keys(keysSEXP); Rcpp::traits::input_parameter< std::vector < bool > >::type values(valuesSEXP); rcpp_result_gen = Rcpp::wrap(radix_create_logical(keys, values)); return rcpp_result_gen; END_RCPP } // get_keys_string std::vector < std::string > get_keys_string(SEXP radix); RcppExport SEXP _triebeard_get_keys_string(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_keys_string(radix)); return rcpp_result_gen; END_RCPP } // get_keys_integer std::vector < std::string > get_keys_integer(SEXP radix); RcppExport SEXP _triebeard_get_keys_integer(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_keys_integer(radix)); return rcpp_result_gen; END_RCPP } // get_keys_numeric std::vector < std::string > get_keys_numeric(SEXP radix); RcppExport SEXP _triebeard_get_keys_numeric(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_keys_numeric(radix)); return rcpp_result_gen; END_RCPP } // get_keys_logical std::vector < std::string > get_keys_logical(SEXP radix); RcppExport SEXP _triebeard_get_keys_logical(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_keys_logical(radix)); return rcpp_result_gen; END_RCPP } // get_values_string std::vector < std::string > get_values_string(SEXP radix); RcppExport SEXP _triebeard_get_values_string(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_values_string(radix)); return rcpp_result_gen; END_RCPP } // get_values_integer std::vector < int > get_values_integer(SEXP radix); RcppExport SEXP _triebeard_get_values_integer(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_values_integer(radix)); return rcpp_result_gen; END_RCPP } // get_values_numeric std::vector < double > get_values_numeric(SEXP radix); RcppExport SEXP _triebeard_get_values_numeric(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_values_numeric(radix)); return rcpp_result_gen; END_RCPP } // get_values_logical std::vector < bool > get_values_logical(SEXP radix); RcppExport SEXP _triebeard_get_values_logical(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(get_values_logical(radix)); return rcpp_result_gen; END_RCPP } // greedy_string List greedy_string(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_greedy_string(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(greedy_string(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // greedy_integer List greedy_integer(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_greedy_integer(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(greedy_integer(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // greedy_numeric List greedy_numeric(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_greedy_numeric(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(greedy_numeric(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // greedy_logical List greedy_logical(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_greedy_logical(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(greedy_logical(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // radix_len_string int radix_len_string(SEXP radix); RcppExport SEXP _triebeard_radix_len_string(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(radix_len_string(radix)); return rcpp_result_gen; END_RCPP } // radix_len_integer int radix_len_integer(SEXP radix); RcppExport SEXP _triebeard_radix_len_integer(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(radix_len_integer(radix)); return rcpp_result_gen; END_RCPP } // radix_len_numeric int radix_len_numeric(SEXP radix); RcppExport SEXP _triebeard_radix_len_numeric(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(radix_len_numeric(radix)); return rcpp_result_gen; END_RCPP } // radix_len_logical int radix_len_logical(SEXP radix); RcppExport SEXP _triebeard_radix_len_logical(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); rcpp_result_gen = Rcpp::wrap(radix_len_logical(radix)); return rcpp_result_gen; END_RCPP } // longest_string SEXP longest_string(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_longest_string(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(longest_string(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // longest_integer SEXP longest_integer(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_longest_integer(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(longest_integer(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // longest_numeric SEXP longest_numeric(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_longest_numeric(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(longest_numeric(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // longest_logical SEXP longest_logical(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_longest_logical(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(longest_logical(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // prefix_string List prefix_string(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_prefix_string(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(prefix_string(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // prefix_integer List prefix_integer(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_prefix_integer(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(prefix_integer(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // prefix_numeric List prefix_numeric(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_prefix_numeric(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(prefix_numeric(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // prefix_logical List prefix_logical(SEXP radix, CharacterVector to_match, bool include_keys); RcppExport SEXP _triebeard_prefix_logical(SEXP radixSEXP, SEXP to_matchSEXP, SEXP include_keysSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); Rcpp::traits::input_parameter< CharacterVector >::type to_match(to_matchSEXP); Rcpp::traits::input_parameter< bool >::type include_keys(include_keysSEXP); rcpp_result_gen = Rcpp::wrap(prefix_logical(radix, to_match, include_keys)); return rcpp_result_gen; END_RCPP } // trie_str_string void trie_str_string(SEXP radix); RcppExport SEXP _triebeard_trie_str_string(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); trie_str_string(radix); return R_NilValue; END_RCPP } // trie_str_integer void trie_str_integer(SEXP radix); RcppExport SEXP _triebeard_trie_str_integer(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); trie_str_integer(radix); return R_NilValue; END_RCPP } // trie_str_numeric void trie_str_numeric(SEXP radix); RcppExport SEXP _triebeard_trie_str_numeric(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); trie_str_numeric(radix); return R_NilValue; END_RCPP } // trie_str_logical void trie_str_logical(SEXP radix); RcppExport SEXP _triebeard_trie_str_logical(SEXP radixSEXP) { BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< SEXP >::type radix(radixSEXP); trie_str_logical(radix); return R_NilValue; END_RCPP } static const R_CallMethodDef CallEntries[] = { {"_triebeard_add_trie_string", (DL_FUNC) &_triebeard_add_trie_string, 3}, {"_triebeard_add_trie_integer", (DL_FUNC) &_triebeard_add_trie_integer, 3}, {"_triebeard_add_trie_numeric", (DL_FUNC) &_triebeard_add_trie_numeric, 3}, {"_triebeard_add_trie_logical", (DL_FUNC) &_triebeard_add_trie_logical, 3}, {"_triebeard_remove_trie_string", (DL_FUNC) &_triebeard_remove_trie_string, 2}, {"_triebeard_remove_trie_integer", (DL_FUNC) &_triebeard_remove_trie_integer, 2}, {"_triebeard_remove_trie_numeric", (DL_FUNC) &_triebeard_remove_trie_numeric, 2}, {"_triebeard_remove_trie_logical", (DL_FUNC) &_triebeard_remove_trie_logical, 2}, {"_triebeard_radix_create_string", (DL_FUNC) &_triebeard_radix_create_string, 2}, {"_triebeard_radix_create_integer", (DL_FUNC) &_triebeard_radix_create_integer, 2}, {"_triebeard_radix_create_numeric", (DL_FUNC) &_triebeard_radix_create_numeric, 2}, {"_triebeard_radix_create_logical", (DL_FUNC) &_triebeard_radix_create_logical, 2}, {"_triebeard_get_keys_string", (DL_FUNC) &_triebeard_get_keys_string, 1}, {"_triebeard_get_keys_integer", (DL_FUNC) &_triebeard_get_keys_integer, 1}, {"_triebeard_get_keys_numeric", (DL_FUNC) &_triebeard_get_keys_numeric, 1}, {"_triebeard_get_keys_logical", (DL_FUNC) &_triebeard_get_keys_logical, 1}, {"_triebeard_get_values_string", (DL_FUNC) &_triebeard_get_values_string, 1}, {"_triebeard_get_values_integer", (DL_FUNC) &_triebeard_get_values_integer, 1}, {"_triebeard_get_values_numeric", (DL_FUNC) &_triebeard_get_values_numeric, 1}, {"_triebeard_get_values_logical", (DL_FUNC) &_triebeard_get_values_logical, 1}, {"_triebeard_greedy_string", (DL_FUNC) &_triebeard_greedy_string, 3}, {"_triebeard_greedy_integer", (DL_FUNC) &_triebeard_greedy_integer, 3}, {"_triebeard_greedy_numeric", (DL_FUNC) &_triebeard_greedy_numeric, 3}, {"_triebeard_greedy_logical", (DL_FUNC) &_triebeard_greedy_logical, 3}, {"_triebeard_radix_len_string", (DL_FUNC) &_triebeard_radix_len_string, 1}, {"_triebeard_radix_len_integer", (DL_FUNC) &_triebeard_radix_len_integer, 1}, {"_triebeard_radix_len_numeric", (DL_FUNC) &_triebeard_radix_len_numeric, 1}, {"_triebeard_radix_len_logical", (DL_FUNC) &_triebeard_radix_len_logical, 1}, {"_triebeard_longest_string", (DL_FUNC) &_triebeard_longest_string, 3}, {"_triebeard_longest_integer", (DL_FUNC) &_triebeard_longest_integer, 3}, {"_triebeard_longest_numeric", (DL_FUNC) &_triebeard_longest_numeric, 3}, {"_triebeard_longest_logical", (DL_FUNC) &_triebeard_longest_logical, 3}, {"_triebeard_prefix_string", (DL_FUNC) &_triebeard_prefix_string, 3}, {"_triebeard_prefix_integer", (DL_FUNC) &_triebeard_prefix_integer, 3}, {"_triebeard_prefix_numeric", (DL_FUNC) &_triebeard_prefix_numeric, 3}, {"_triebeard_prefix_logical", (DL_FUNC) &_triebeard_prefix_logical, 3}, {"_triebeard_trie_str_string", (DL_FUNC) &_triebeard_trie_str_string, 1}, {"_triebeard_trie_str_integer", (DL_FUNC) &_triebeard_trie_str_integer, 1}, {"_triebeard_trie_str_numeric", (DL_FUNC) &_triebeard_trie_str_numeric, 1}, {"_triebeard_trie_str_logical", (DL_FUNC) &_triebeard_trie_str_logical, 1}, {NULL, NULL, 0} }; RcppExport void R_init_triebeard(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } triebeard/src/r_trie.h0000644000176200001440000000346214377436506014463 0ustar liggesusers#include #include using namespace Rcpp; #ifndef __RTRIE_CORE__ #define __RTRIE_CORE__ static inline void ptr_check(void *ptr){ if (ptr == NULL){ stop("invalid trie object; pointer is NULL"); } } template class r_trie { public: int size(){ return radix.size(); } radix_tree radix; int radix_size; r_trie(std::vector < std::string > keys, std::vector < T > values){ unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } radix[keys[i]] = values[i]; } radix_size = size(); } std::vector < std::string > get_keys(){ int input_size = size(); int iter = 0; std::vector < std::string > output(input_size); typename radix_tree< std::string, T >::iterator it; for (it = radix.begin(); it != radix.end(); ++it) { output[iter] = it->first; iter++; } return output; } std::vector < T > get_values(){ int input_size = size(); int iter = 0; std::vector < T > output(input_size); typename radix_tree< std::string, T >::iterator it; for (it = radix.begin(); it != radix.end(); ++it) { output[iter] = it->second; iter++; } return output; } void insert_value(std::string key, T value){ radix[key] = value; } void remove_values(CharacterVector keys){ unsigned int in_size = keys.size(); for(unsigned int i = 0; i < in_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(keys[i] != NA_STRING){ radix.erase(Rcpp::as(keys[i])); } } radix_size = size(); } }; #endif triebeard/src/longest_match.cpp0000644000176200001440000000615014377436506016356 0ustar liggesusers#include #include "r_trie.h" using namespace Rcpp; template X longest_generic(SEXP radix, CharacterVector to_match, Z missing_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); typename radix_tree::iterator it; unsigned int input_size = to_match.size(); X output(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(to_match[i] == NA_STRING){ output[i] = missing_val; } else { it = rt_ptr->radix.longest_match(Rcpp::as(to_match[i])); if(it != rt_ptr->radix.end()){ output[i] = it->second; } else { output[i] = missing_val; } } } return output; } template DataFrame longest_generic_df(SEXP radix, CharacterVector to_match, Z missing_val){ r_trie * rt_ptr = (r_trie *) R_ExternalPtrAddr(radix); ptr_check(rt_ptr); typename radix_tree::iterator it; unsigned int input_size = to_match.size(); X output(input_size); CharacterVector output_keys(input_size); for(unsigned int i = 0; i < input_size; i++){ if((i % 10000) == 0){ Rcpp::checkUserInterrupt(); } if(to_match[i] == NA_STRING){ output[i] = missing_val; output_keys[i] = NA_STRING; } else { it = rt_ptr->radix.longest_match(Rcpp::as(to_match[i])); if(it != rt_ptr->radix.end()){ output[i] = it->second; output_keys[i] = it->first; } else { output[i] = missing_val; output_keys[i] = NA_STRING; } } } return Rcpp::DataFrame::create(_["match_key"] = output_keys, _["match_value"] = output, _["stringsAsFactors"] = false); } //[[Rcpp::export]] SEXP longest_string(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return Rcpp::wrap(longest_generic_df(radix, to_match, NA_STRING)); } return Rcpp::wrap(longest_generic(radix, to_match, NA_STRING)); } //[[Rcpp::export]] SEXP longest_integer(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return Rcpp::wrap(longest_generic_df(radix, to_match, NA_INTEGER)); } return Rcpp::wrap(longest_generic(radix, to_match, NA_INTEGER)); } //[[Rcpp::export]] SEXP longest_numeric(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return Rcpp::wrap(longest_generic_df(radix, to_match, NA_REAL)); } return Rcpp::wrap(longest_generic(radix, to_match, NA_REAL)); } //[[Rcpp::export]] SEXP longest_logical(SEXP radix, CharacterVector to_match, bool include_keys){ if(include_keys){ return Rcpp::wrap(longest_generic_df(radix, to_match, NA_INTEGER)); } return Rcpp::wrap(longest_generic(radix, to_match, NA_INTEGER)); } triebeard/vignettes/0000755000176200001440000000000014400735317014227 5ustar liggesuserstriebeard/vignettes/rcpp_radix.Rmd0000644000176200001440000000636014377436506017046 0ustar liggesusers--- title: "Radix trees in Rcpp" author: "Oliver Keyes" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Radix trees in Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- A **radix tree** is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values *associated* with those keys. `triebeard` provides an implementation of radix trees for Rcpp (and also for use directly in R). To start using radix trees in your Rcpp development, simply modify your C++ file to include at the top: ```{Rcpp, eval=FALSE} //[[Rcpp::depends(triebeard)]] #include ``` ## Constructing trees Trees are constructed using the syntax: ```{Rcpp, eval=FALSE} radix_tree radix; ``` Where `type` represents the type of the keys (for example, `std::string`) and `type2` the type of the values. Radix trees can have any scalar type as keys, although strings are most typical; they can also have any scalar type for values. Once you've constructed a tree, new entries can be added in a very R-like way: `radix[new_key] = new_value;`. Entries can also be removed, with `radix.erase(key)`. ## Matching against trees We then move on to the fun bit: matching! As mentioned, radix trees are really good for matching arbitrary values against keys (well, keys of the same type) and retrieving the associated values. There are three types of supported matching; longest, prefix, and greedy. Longest does exactly what it says on the tin: it finds the key-value pair where the longest initial part of the key matches the arbitrary value: ```{Rcpp, eval=FALSE} radix_tree radix; radix["turnin"] = "entry the first"; radix["turin"] = "entry the second"; radix_tree::iterator it; it = radix.longest_match("turing"); if(it = radix.end()){ printf("No match was found :("); } else { std::string result = "Key of longest match: " + it->first + " , value of longest match: " + it->second; } ``` Prefix matching provides all trie entries where the value-to-match is a *prefix* of the key: ```{Rcpp, eval=FALSE} radix_tree radix; radix["turnin"] = "entry the first"; radix["turin"] = "entry the second"; std::vector::iterator> vec; std::vector::iterator>::iterator it; it = radix.prefix_match("tur"); if(it == vec.end()){ printf("No match was found :("); } else { for (it = vec.begin(); it != vec.end(); ++it) { std::string result = "Key of a prefix match: " + it->first + " , value of a prefix match: " + it->second; } } ``` Greedy matching matches very, very fuzzily (a value of 'bring', for example, will match 'blind', 'bind' and 'binary') and, syntactically, looks exactly the same as prefix-matching, albeit with `radix.greedy_match()` instead of `radix.prefix_match()`. ### Other trie things If you have ideas for other trie-like structures, or functions that would be useful with *these* tries, the best approach is to either [request it](https://github.com/Ironholds/triebeard/issues) or [add it](https://github.com/Ironholds/triebeard/pulls)! triebeard/vignettes/r_radix.Rmd0000644000176200001440000001173714377436506016347 0ustar liggesusers--- title: "Radix trees in R" author: "Oliver Keyes" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Radix trees in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- A **radix tree**, or **trie**, is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values *associated* with those keys. `triebeard` provides an implementation of tries for R (and one that can be used in Rcpp development, too, if that's your thing) so that useRs can take advantage of the fast, efficient and user-friendly matching that they allow. ## Radix usage Suppose we have observations in a dataset that are labelled, with a 2-3 letter code that identifies the facility the sample came from: ```{r, eval=FALSE} labels <- c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", "AO-1099", "AFT-1101", "QZ-4933") ``` We know the facility each code maps to, and we want to be able to map the labels to that - not over 10 entries but over hundreds, or thousands, or hundreds *of* thousands. Tries are a great way of doing that: we treat the codes as *keys* and the full facility names as *values*. So let's make a trie to do this matching, and then, well, match: ```{r, eval=FALSE} library(triebeard) trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) longest_match(trie = trie, to_match = labels) [1] "Audobon" "Atlanta" "Ann Arbor" "Austin" "Queensland" "Queensland" "Raleigh" "Audobon" "Austin" [10] "Queensland" ``` This pulls out, for each label, the trie value where the associated key has the longest prefix-match to the label. We can also just grab all the values where the key starts with, say, A: ```{r, eval=FALSE} prefix_match(trie = trie, to_match = "A") [[1]] [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ``` And finally if we want we can match very, very fuzzily using "greedy" matching: ```{r, eval=FALSE} greedy_match(trie = trie, to_match = "AO") [[1]] [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ``` These operations are very, very efficient. If we use longest-match as an example, since that's the most useful thing, with a one-million element vector of things to match against: ```{r, eval=FALSE} library(triebeard) library(microbenchmark) trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) labels <- rep(c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", "AO-1099", "AFT-1101", "QZ-4933"), 100000) microbenchmark({longest_match(trie = trie, to_match = labels)}) Unit: milliseconds expr min lq mean median uq max neval { longest_match(trie = trie, to_match = labels) } 284.6457 285.5902 289.5342 286.8775 288.4564 327.3878 100 ``` I think we can call <300 milliseconds for a million matches against an entire set of possible values pretty fast. ## Radix modification There's always the possibility that (horror of horrors) you'll have to add or remove entries from the trie. Fear not; you can do just that with `trie_add` and `trie_remove` respectively, both of which silently modify the trie they're provided with to add or remove whatever key-value pairs you provide: ```{r, eval=FALSE} to_match = "198.0.0.1" trie_inst <- trie(keys = "197", values = "fake range") longest_match(trie_inst, to_match) [1] NA trie_add(trie_inst, keys = "198", values = "home range") longest_match(trie_inst, to_match) [1] "home range" trie_remove(trie_inst, keys = "198") longest_match(trie_inst, to_match) [1] NA ``` ## Metadata and coercion You can also extract information from tries without using them. `dim`, `str`, `print` and `length` all work for tries, and you can use `get_keys(trie)` and `get_values(trie)` to extract, respectively, the keys and values from a trie object. In addition, you can also coerce tries into other R data structures, specifically lists and data.frames: ```{r, eval=FALSE} trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) str(as.data.frame(trie)) 'data.frame': 6 obs. of 2 variables: $ keys : chr "AAI" "AEO" "AFT" "AO" ... $ values: chr "Ann Arbor" "Atlanta" "Austin" "Audobon" ... str(as.list(trie)) List of 2 $ keys : chr [1:6] "AAI" "AEO" "AFT" "AO" ... $ values: chr [1:6] "Ann Arbor" "Atlanta" "Austin" "Audobon" ... ``` ### Other trie operations If you have ideas for other trie-like structures, or functions that would be useful with *these* tries, the best approach is to either [request it](https://github.com/Ironholds/triebeard/issues) or [add it](https://github.com/Ironholds/triebeard/pulls)! triebeard/NEWS0000644000176200001440000000203614400725742012720 0ustar liggesusers Version 0.4.1 ================================= DEVELOPMENT * Replace C++17-deprecated features Version 0.3.1 ================================= MINOR * Accomodated knitr breaking change * Updated maintainer DEVELOPMENT * Internal refactoring using templates has drastically reduced the size of the codebase * Options to include the keys, as well as values, of matches have been incorporated (#13) Version 0.2.1 ================================= BUG FIXES * Fixed segfault when `trie_remove()` resulted in a 0 element trie. Version 0.2.0 ================================= FEATURES * tries can now be converted into lists and data.frames * We now have str() and print() methods! That's nice. * create_trie() renamed trie() BUGS * Haven't found any. Probably means they're lurking around and particularly nasty. DEVELOPMENT * greedy and prefix matching should now be (slightly) faster. * Installed size has been slightly reduced, and the C++ code simplifed. Version 0.1.0 ================================= * Initial, GitHub-centred release triebeard/R/0000755000176200001440000000000014400735272012420 5ustar liggesuserstriebeard/R/create.R0000644000176200001440000000337514377436506014031 0ustar liggesusers#'@title Create a Trie #'@description \code{create_trie} creates a trie (a key-value store optimised #'for matching) out of a provided character vector of keys, and a numeric, #'character, logical or integer vector of values (both the same length). #' #'@param keys a character vector containing the keys for the trie. #' #'@param values an atomic vector of any type, containing the values to pair with #'\code{keys}. Must be the same length as \code{keys}. #' #'@return a `trie` object. #' #'@seealso \code{\link{trie_add}} and \code{\link{trie_remove}} for adding to and removing #'from tries after their creation, and \code{\link{longest_match}} and other match functions #'for matching values against the keys of a created trie. #' #'@examples #'# An integer trie #'int_trie <- trie(keys = "foo", values = 1) #' #'# A string trie #'str_trie <- trie(keys = "foo", values = "bar") #' #'@export trie <- function(keys, values){ stopifnot(length(keys) == length(values)) stopifnot(is.character(keys)) output <- NULL output_classes <- c("trie", NA) switch(class(values)[1], "character" = { output <- radix_create_string(keys, values) output_classes[2] <- "string_trie" }, "integer" = { output <- radix_create_integer(keys, values) output_classes[2] <- "integer_trie" }, "numeric" = { output <- radix_create_numeric(keys, values) output_classes[2] <- "numeric_trie" }, "logical" = { output <- radix_create_logical(keys, values) output_classes[2] <- "logical_trie" }, stop("'values' must be a numeric, integer, character or logical vector")) class(output) <- c(class(output), output_classes) return(output) }triebeard/R/metadata.R0000644000176200001440000000162714377436506014344 0ustar liggesusers#'@export length.string_trie <- function(x){ return(radix_len_string(x)) } #'@export length.integer_trie <- function(x){ return(radix_len_integer(x)) } #'@export length.numeric_trie <- function(x){ return(radix_len_numeric(x)) } #'@export length.logical_trie <- function(x){ return(radix_len_logical(x)) } #'@export dim.trie <- function(x){ return(length(x)) } #'@export str.trie <- function(object, ...){ type <- class(object)[3] cat(paste0(type, "\n")) switch(type, "string_trie" = {trie_str_string(object)}, "integer_trie" = {trie_str_integer(object)}, "numeric_trie" = {trie_str_numeric(object)}, "logical_trie" = {trie_str_logical(object)} ) return(invisible()) } #'@export print.trie <- function(x, ...){ len <- length(x) entry_word <- ifelse(len != 1, "entries", "entry") cat("A", class(x)[3], "object with", len, entry_word, "\n") } triebeard/R/triebeard.R0000644000176200001440000000053014377436506014515 0ustar liggesusers#' @title Radix trees in Rcpp #' @name triebeard #' @description This package provides access to Radix tree (or "trie") structures in Rcpp. At #' a later date it will hopefully provide them in R, too. #' #' @docType package #' @aliases triebeard triebeard-package #' @useDynLib triebeard, .registration = TRUE #' @importFrom Rcpp sourceCpp NULLtriebeard/R/alter.R0000644000176200001440000000426114377436506013670 0ustar liggesusers#'@title Add or remove trie entries #' #'@description \code{trie_add} and \code{trie_remove} allow you to #'add or remove entries from tries, respectively. #' #'@param trie a trie object created with \code{\link{trie}} #' #'@param keys a character vector containing the keys of the entries to #'add (or remove). Entries with NA keys will not be added. #' #'@param values an atomic vector, matching the type of the trie, containing #'the values of the entries to add. Entries with NA values will not be added. #' #'@return nothing; the trie is modified in-place #' #'@examples #'trie <- trie("foo", "bar") #'length(trie) #' #'trie_add(trie, "baz", "qux") #'length(trie) #' #'trie_remove(trie, "baz") #'length(trie) #' #'@seealso \code{\link{trie}} for creating tries in the first place. #'@name alter #'@rdname alter #'@export trie_add <- function(trie, keys, values){ stopifnot(length(keys) == length(values)) stopifnot(is.character(keys)) UseMethod("trie_add", trie) } #'@export trie_add.string_trie <- function(trie, keys, values){ stopifnot(is.character(values)) add_trie_string(trie, keys, values) return(invisible()) } #'@export trie_add.integer_trie <- function(trie, keys, values){ stopifnot(is.integer(values)) add_trie_integer(trie, keys, values) return(invisible()) } #'@export trie_add.numeric_trie <- function(trie, keys, values){ stopifnot(is.numeric(values)) add_trie_numeric(trie, keys, values) return(invisible()) } #'@export trie_add.logical_trie <- function(trie, keys, values){ stopifnot(is.logical(values)) add_trie_logical(trie, keys, values) return(invisible()) } #'@rdname alter #'@export trie_remove <- function(trie, keys){ stopifnot(is.character(keys)) UseMethod("trie_remove", trie) } #'@export trie_remove.string_trie <- function(trie, keys){ remove_trie_string(trie, keys) return(invisible()) } #'@export trie_remove.integer_trie <- function(trie, keys){ remove_trie_integer(trie, keys) return(invisible()) } #'@export trie_remove.numeric_trie <- function(trie, keys){ remove_trie_numeric(trie, keys) return(invisible()) } #'@export trie_remove.logical_trie <- function(trie, keys){ remove_trie_logical(trie, keys) return(invisible()) } triebeard/R/RcppExports.R0000644000176200001440000001100714400735272015033 0ustar liggesusers# Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 add_trie_string <- function(trie, keys, values) { invisible(.Call(`_triebeard_add_trie_string`, trie, keys, values)) } add_trie_integer <- function(trie, keys, values) { invisible(.Call(`_triebeard_add_trie_integer`, trie, keys, values)) } add_trie_numeric <- function(trie, keys, values) { invisible(.Call(`_triebeard_add_trie_numeric`, trie, keys, values)) } add_trie_logical <- function(trie, keys, values) { invisible(.Call(`_triebeard_add_trie_logical`, trie, keys, values)) } remove_trie_string <- function(trie, keys) { invisible(.Call(`_triebeard_remove_trie_string`, trie, keys)) } remove_trie_integer <- function(trie, keys) { invisible(.Call(`_triebeard_remove_trie_integer`, trie, keys)) } remove_trie_numeric <- function(trie, keys) { invisible(.Call(`_triebeard_remove_trie_numeric`, trie, keys)) } remove_trie_logical <- function(trie, keys) { invisible(.Call(`_triebeard_remove_trie_logical`, trie, keys)) } radix_create_string <- function(keys, values) { .Call(`_triebeard_radix_create_string`, keys, values) } radix_create_integer <- function(keys, values) { .Call(`_triebeard_radix_create_integer`, keys, values) } radix_create_numeric <- function(keys, values) { .Call(`_triebeard_radix_create_numeric`, keys, values) } radix_create_logical <- function(keys, values) { .Call(`_triebeard_radix_create_logical`, keys, values) } get_keys_string <- function(radix) { .Call(`_triebeard_get_keys_string`, radix) } get_keys_integer <- function(radix) { .Call(`_triebeard_get_keys_integer`, radix) } get_keys_numeric <- function(radix) { .Call(`_triebeard_get_keys_numeric`, radix) } get_keys_logical <- function(radix) { .Call(`_triebeard_get_keys_logical`, radix) } get_values_string <- function(radix) { .Call(`_triebeard_get_values_string`, radix) } get_values_integer <- function(radix) { .Call(`_triebeard_get_values_integer`, radix) } get_values_numeric <- function(radix) { .Call(`_triebeard_get_values_numeric`, radix) } get_values_logical <- function(radix) { .Call(`_triebeard_get_values_logical`, radix) } greedy_string <- function(radix, to_match, include_keys) { .Call(`_triebeard_greedy_string`, radix, to_match, include_keys) } greedy_integer <- function(radix, to_match, include_keys) { .Call(`_triebeard_greedy_integer`, radix, to_match, include_keys) } greedy_numeric <- function(radix, to_match, include_keys) { .Call(`_triebeard_greedy_numeric`, radix, to_match, include_keys) } greedy_logical <- function(radix, to_match, include_keys) { .Call(`_triebeard_greedy_logical`, radix, to_match, include_keys) } radix_len_string <- function(radix) { .Call(`_triebeard_radix_len_string`, radix) } radix_len_integer <- function(radix) { .Call(`_triebeard_radix_len_integer`, radix) } radix_len_numeric <- function(radix) { .Call(`_triebeard_radix_len_numeric`, radix) } radix_len_logical <- function(radix) { .Call(`_triebeard_radix_len_logical`, radix) } longest_string <- function(radix, to_match, include_keys) { .Call(`_triebeard_longest_string`, radix, to_match, include_keys) } longest_integer <- function(radix, to_match, include_keys) { .Call(`_triebeard_longest_integer`, radix, to_match, include_keys) } longest_numeric <- function(radix, to_match, include_keys) { .Call(`_triebeard_longest_numeric`, radix, to_match, include_keys) } longest_logical <- function(radix, to_match, include_keys) { .Call(`_triebeard_longest_logical`, radix, to_match, include_keys) } prefix_string <- function(radix, to_match, include_keys) { .Call(`_triebeard_prefix_string`, radix, to_match, include_keys) } prefix_integer <- function(radix, to_match, include_keys) { .Call(`_triebeard_prefix_integer`, radix, to_match, include_keys) } prefix_numeric <- function(radix, to_match, include_keys) { .Call(`_triebeard_prefix_numeric`, radix, to_match, include_keys) } prefix_logical <- function(radix, to_match, include_keys) { .Call(`_triebeard_prefix_logical`, radix, to_match, include_keys) } trie_str_string <- function(radix) { invisible(.Call(`_triebeard_trie_str_string`, radix)) } trie_str_integer <- function(radix) { invisible(.Call(`_triebeard_trie_str_integer`, radix)) } trie_str_numeric <- function(radix) { invisible(.Call(`_triebeard_trie_str_numeric`, radix)) } trie_str_logical <- function(radix) { invisible(.Call(`_triebeard_trie_str_logical`, radix)) } triebeard/R/match.R0000644000176200001440000001343214377436506013655 0ustar liggesusers#'@title Find the longest match in a trie #'@description \code{longest_match} accepts a trie and a character vector #'and returns the value associated with whichever key had the \emph{longest match} #'to each entry in the character vector. A trie of "binary" and "bind", for example, #'with an entry-to-compare of "binder", will match to "bind". #' #'@param trie a trie object, created with \code{\link{trie}} #' #'@param to_match a character vector containing the strings to match against the #'trie's keys. #' #'@param include_keys a logical value indicating whether to include the keys in the #'returned results or not. If TRUE (\emph{not} the default) the returned object will #'be a data.frame, rather than a vector. #' #'@examples #'trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), #' values = c("afford", "affair", "available", "binary", "bind", "blind")) #'longest_match(trie, "binder") #' #'@seealso \code{\link{prefix_match}} and \code{\link{greedy_match}} #'for prefix and greedy matching, respectively. #' #'@export longest_match <- function(trie, to_match, include_keys = FALSE){ stopifnot("trie" %in% class(trie)) stopifnot(!is.na(include_keys)) UseMethod("longest_match", trie) } #'@export longest_match.string_trie <- function(trie, to_match, include_keys = FALSE){ return(longest_string(trie, to_match, include_keys)) } #'@export longest_match.integer_trie <- function(trie, to_match, include_keys = FALSE){ return(longest_integer(trie, to_match, include_keys)) } #'@export longest_match.numeric_trie <- function(trie, to_match, include_keys = FALSE){ return(longest_numeric(trie, to_match, include_keys)) } #'@export longest_match.logical_trie <- function(trie, to_match, include_keys = FALSE){ return(longest_logical(trie, to_match, include_keys)) } #'@title Find the prefix matches in a trie #'@description \code{prefix_match} accepts a trie and a character vector #'and returns the values associated with any key that has a particular #'character vector entry as a prefix (see the examples). #' #'@param trie a trie object, created with \code{\link{trie}} #' #'@param to_match a character vector containing the strings to check against the #'trie's keys. #' #'@param include_keys a logical value indicating whether to include the keys in the #'returned results or not. If TRUE (\emph{not} the default) the returned object will #'be a list of data.frames, rather than of vector. #' #'@return a list, the length of \code{to_match}, with each entry containing any trie values #'where the \code{to_match} element was a prefix of the associated key. In the case that #'nothing was found, the entry will contain \code{NA}. #' #'@examples #'trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), #' values = c("afford", "affair", "available", "binary", "bind", "blind")) #'prefix_match(trie, "aff") #' #'@seealso \code{\link{longest_match}} and \code{\link{greedy_match}} #'for longest and greedy matching, respectively. #' #'@export prefix_match <- function(trie, to_match, include_keys = FALSE){ stopifnot("trie" %in% class(trie)) stopifnot(!is.na(include_keys)) UseMethod("prefix_match", trie) } #'@export prefix_match.numeric_trie <- function(trie, to_match, include_keys = FALSE){ return(prefix_numeric(trie, to_match, include_keys)) } #'@export prefix_match.integer_trie <- function(trie, to_match, include_keys = FALSE){ return(prefix_integer(trie, to_match, include_keys)) } #'@export prefix_match.string_trie <- function(trie, to_match, include_keys = FALSE){ return(prefix_string(trie, to_match, include_keys)) } #'@export prefix_match.logical_trie <- function(trie, to_match, include_keys = FALSE){ return(prefix_logical(trie, to_match, include_keys)) } #'@title Greedily match against a tree #'@description \code{greedy_match} accepts a trie and a character vector #'and returns the values associated with any key that is "greedily" (read: fuzzily) #'matched against one of the character vector entries. #' #'@param trie a trie object, created with \code{\link{trie}} #' #'@param to_match a character vector containing the strings to check against the #'trie's keys. #' #'@param include_keys a logical value indicating whether to include the keys in the #'returned results or not. If TRUE (\emph{not} the default) the returned object will #'be a list of data.frames, rather than of vectors. #' #'@return a list, the length of \code{to_match}, with each entry containing any trie values #'where the \code{to_match} element greedily matches the associated key. In the case that #'nothing was found, the entry will contain \code{NA}. In the case that \code{include_keys} #'is TRUE, the matching keys will also be included #' #'@examples #'trie <- trie(keys = c("afford", "affair", "available", "binary", "bind", "blind"), #' values = c("afford", "affair", "available", "binary", "bind", "blind")) #'greedy_match(trie, c("avoid", "bring", "attack")) #' #'@seealso \code{\link{longest_match}} and \code{\link{prefix_match}} #'for longest and prefix matching, respectively. #' #'@export greedy_match <- function(trie, to_match, include_keys = FALSE){ stopifnot("trie" %in% class(trie)) stopifnot(!is.na(include_keys)) UseMethod("greedy_match", trie) } #'@export greedy_match.numeric_trie <- function(trie, to_match, include_keys = FALSE){ return(greedy_numeric(trie, to_match, include_keys)) } #'@export greedy_match.integer_trie <- function(trie, to_match, include_keys = FALSE){ return(greedy_integer(trie, to_match, include_keys)) } #'@export greedy_match.string_trie <- function(trie, to_match, include_keys = FALSE){ return(greedy_string(trie, to_match, include_keys)) } #'@export greedy_match.logical_trie <- function(trie, to_match, include_keys = FALSE){ return(greedy_logical(trie, to_match, include_keys)) } triebeard/R/get.R0000644000176200001440000000241514377436506013337 0ustar liggesusers#'@title Trie Getters #'@description "Getters" for the data stored in a trie object. \code{get_keys} #' gets the keys, \code{get_values} gets the values. #' #'@param trie A trie object, created with \code{\link{trie}}. #' #'@return An atomic vector of keys or values stored in the trie. #' #'@name getters #'@rdname getters NULL #'@rdname getters #'@export get_keys <- function(trie){ stopifnot("trie" %in% class(trie)) UseMethod("get_keys", trie) } #'@rdname getters #'@export get_values <- function(trie){ stopifnot("trie" %in% class(trie)) UseMethod("get_values", trie) } #'@export get_keys.string_trie <- function(trie){ return(get_keys_string(trie)) } #'@export get_keys.integer_trie <- function(trie){ return(get_keys_integer(trie)) } #'@export get_keys.numeric_trie <- function(trie){ return(get_keys_numeric(trie)) } #'@export get_keys.logical_trie <- function(trie){ return(get_keys_logical(trie)) } #'@export get_values.string_trie <- function(trie){ return(get_values_string(trie)) } #'@export get_values.integer_trie <- function(trie){ return(get_values_integer(trie)) } #'@export get_values.numeric_trie <- function(trie){ return(get_values_numeric(trie)) } #'@export get_values.logical_trie <- function(trie){ return(get_values_logical(trie)) } triebeard/R/as.R0000644000176200001440000000077714377436506013174 0ustar liggesusers#'@export as.list.trie <- function(x, ...){ return(list(keys = get_keys(x), values = get_values(x))) } #'@export as.data.frame.trie <- function(x, row.names = NULL, optional = FALSE, stringsAsFactors = FALSE, ...){ output <- data.frame(keys = get_keys(x), values = get_values(x), stringsAsFactors = stringsAsFactors, ...) if(!is.null(row.names)){ rownames(output) <- row.names } return(output) }triebeard/MD50000644000176200001440000000535214400752177012537 0ustar liggesusers3b0b0892f98408ed02e95b67f5a97394 *DESCRIPTION f94cca6b956b73bb59133806bf8630ed *LICENSE 3a04f227caeb649683174cb405e95a3d *NAMESPACE 28a5a61592dc5e0add26c19f4656a0e7 *NEWS fc8f6b2fdbda9832ff840e354e465b06 *R/RcppExports.R 6b8c5820078e558fed74ac9abde562d4 *R/alter.R 896e6c1354e06b438613b8bb0102aa82 *R/as.R 697b7a8fdaff92effd3d888bc8488548 *R/create.R da099d8106bd5e288145f294938b44b1 *R/get.R 64087a51fab2ca9aa3ec12665de32c04 *R/match.R ffbddee925213dc0f96f435e1ac57e37 *R/metadata.R 8d7d096ddc2873eb9bac4d598ba28be2 *R/triebeard.R e45cb2d59dfc34e81e3632ba655dc493 *README.md 13b4e760528e73b3a4c75324e407f9c2 *build/vignette.rds cb1096c81b056c87bdb37af2eb1ea36b *inst/doc/r_radix.R 890d7c27076eb26857ec09f9a6d88aae *inst/doc/r_radix.Rmd 695e0f946a175e4e36da5f90f052db0b *inst/doc/r_radix.html baf9e2a03a7a52b1305058dc23b2a8e1 *inst/doc/rcpp_radix.R 0c0cb935a3f5e8c8d89f47aaa9da4857 *inst/doc/rcpp_radix.Rmd 7af8c04924ccc503ca996038973c1eb1 *inst/doc/rcpp_radix.html 5d7986ce0d8af2be83ef21f2567aec01 *inst/include/radix.h 88d53ac341ef6cd3de95d9fc08c300d4 *inst/include/radix/radix_tree.hpp 9aaa8a32a444925f517b245e62a023d1 *inst/include/radix/radix_tree_it.hpp 51cc663dd5e66c53629dd472f3010343 *inst/include/radix/radix_tree_node.hpp a0c46197159a55e88a461e05e75a9e86 *man/alter.Rd bc320d34845aab66e91890c43077c8df *man/getters.Rd 1797c67e3567c7fae7350df4958b340a *man/greedy_match.Rd 64b08ed4266a48c9b012789cffde9331 *man/longest_match.Rd 283591e17060ce0f3b579be0779b3c07 *man/prefix_match.Rd 5c9c80d025b3d52cf723d005d0e1aaca *man/trie.Rd 84f57e19479f06884369145783a9f0bd *man/triebeard.Rd a4791667d570aa373d3accd2c0342f70 *src/Makevars ca620b20c8a3e90103c1993e06413ef9 *src/RcppExports.cpp 5ee232743bdd76ee685f76d63a87f29c *src/alter.cpp 03602e2613ad2a39164a3ebb546160fe *src/create.cpp 27d4379b5184abfdeed8bbf4517f5390 *src/get.cpp 8b0a2e3835ce59cdac6b06f7315b489e *src/greedy_match.cpp 289a0cb64a67563d44a3043f13bd976b *src/length.cpp 3ca84acb8714b3f6c5ac0c2fef5e25ec *src/longest_match.cpp a05a053e08f20cf52df9c8cdbeb5ce70 *src/prefix_match.cpp ff3819918299a03be89550c3d0216dbd *src/r_trie.h 16f6214218ab59001935c1a6c9eded89 *src/str.cpp 0ce9673389a6629661d5cb3c771ce283 *src/typedef.h 1924dd7d9fc0a96bd905e1505fed3c17 *tests/testthat.R e97eaea72cb061e52763587e46a40cf2 *tests/testthat/test_alter.R 4625183c39080e1110621d2a69dca991 *tests/testthat/test_convert.R cc491a0f239228a9767f802806e68ac3 *tests/testthat/test_create.R b7aea82f87079307342cd8207b08177e *tests/testthat/test_get.R bb692f046d7a6a93b750d54b4bdd4287 *tests/testthat/test_greedy.R a0c7315725307063bfbd06f23175e632 *tests/testthat/test_longest.R d754bb22a652c8ba92de00764268b19b *tests/testthat/test_prefix.R 890d7c27076eb26857ec09f9a6d88aae *vignettes/r_radix.Rmd 0c0cb935a3f5e8c8d89f47aaa9da4857 *vignettes/rcpp_radix.Rmd triebeard/inst/0000755000176200001440000000000014400735317013174 5ustar liggesuserstriebeard/inst/doc/0000755000176200001440000000000014400735317013741 5ustar liggesuserstriebeard/inst/doc/rcpp_radix.Rmd0000644000176200001440000000636014377436506016560 0ustar liggesusers--- title: "Radix trees in Rcpp" author: "Oliver Keyes" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Radix trees in Rcpp} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- A **radix tree** is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values *associated* with those keys. `triebeard` provides an implementation of radix trees for Rcpp (and also for use directly in R). To start using radix trees in your Rcpp development, simply modify your C++ file to include at the top: ```{Rcpp, eval=FALSE} //[[Rcpp::depends(triebeard)]] #include ``` ## Constructing trees Trees are constructed using the syntax: ```{Rcpp, eval=FALSE} radix_tree radix; ``` Where `type` represents the type of the keys (for example, `std::string`) and `type2` the type of the values. Radix trees can have any scalar type as keys, although strings are most typical; they can also have any scalar type for values. Once you've constructed a tree, new entries can be added in a very R-like way: `radix[new_key] = new_value;`. Entries can also be removed, with `radix.erase(key)`. ## Matching against trees We then move on to the fun bit: matching! As mentioned, radix trees are really good for matching arbitrary values against keys (well, keys of the same type) and retrieving the associated values. There are three types of supported matching; longest, prefix, and greedy. Longest does exactly what it says on the tin: it finds the key-value pair where the longest initial part of the key matches the arbitrary value: ```{Rcpp, eval=FALSE} radix_tree radix; radix["turnin"] = "entry the first"; radix["turin"] = "entry the second"; radix_tree::iterator it; it = radix.longest_match("turing"); if(it = radix.end()){ printf("No match was found :("); } else { std::string result = "Key of longest match: " + it->first + " , value of longest match: " + it->second; } ``` Prefix matching provides all trie entries where the value-to-match is a *prefix* of the key: ```{Rcpp, eval=FALSE} radix_tree radix; radix["turnin"] = "entry the first"; radix["turin"] = "entry the second"; std::vector::iterator> vec; std::vector::iterator>::iterator it; it = radix.prefix_match("tur"); if(it == vec.end()){ printf("No match was found :("); } else { for (it = vec.begin(); it != vec.end(); ++it) { std::string result = "Key of a prefix match: " + it->first + " , value of a prefix match: " + it->second; } } ``` Greedy matching matches very, very fuzzily (a value of 'bring', for example, will match 'blind', 'bind' and 'binary') and, syntactically, looks exactly the same as prefix-matching, albeit with `radix.greedy_match()` instead of `radix.prefix_match()`. ### Other trie things If you have ideas for other trie-like structures, or functions that would be useful with *these* tries, the best approach is to either [request it](https://github.com/Ironholds/triebeard/issues) or [add it](https://github.com/Ironholds/triebeard/pulls)! triebeard/inst/doc/rcpp_radix.R0000644000176200001440000000213014400735317016213 0ustar liggesusers## //[[Rcpp::depends(triebeard)]] ## #include ## radix_tree radix; ## radix_tree radix; ## radix["turnin"] = "entry the first"; ## radix["turin"] = "entry the second"; ## ## radix_tree::iterator it; ## ## it = radix.longest_match("turing"); ## ## if(it = radix.end()){ ## printf("No match was found :("); ## } else { ## std::string result = "Key of longest match: " + it->first + " , value of longest match: " + it->second; ## } ## radix_tree radix; ## radix["turnin"] = "entry the first"; ## radix["turin"] = "entry the second"; ## ## std::vector::iterator> vec; ## std::vector::iterator>::iterator it; ## ## it = radix.prefix_match("tur"); ## ## if(it == vec.end()){ ## printf("No match was found :("); ## } else { ## for (it = vec.begin(); it != vec.end(); ++it) { ## std::string result = "Key of a prefix match: " + it->first + " , value of a prefix match: " + it->second; ## } ## } triebeard/inst/doc/r_radix.R0000644000176200001440000000552214400735316015517 0ustar liggesusers## ---- eval=FALSE-------------------------------------------------------------- # labels <- c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", # "AO-1099", "AFT-1101", "QZ-4933") ## ---- eval=FALSE-------------------------------------------------------------- # library(triebeard) # trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), # values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) # # longest_match(trie = trie, to_match = labels) # # [1] "Audobon" "Atlanta" "Ann Arbor" "Austin" "Queensland" "Queensland" "Raleigh" "Audobon" "Austin" # [10] "Queensland" ## ---- eval=FALSE-------------------------------------------------------------- # prefix_match(trie = trie, to_match = "A") # # [[1]] # [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ## ---- eval=FALSE-------------------------------------------------------------- # greedy_match(trie = trie, to_match = "AO") # # [[1]] # [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ## ---- eval=FALSE-------------------------------------------------------------- # library(triebeard) # library(microbenchmark) # # trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), # values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) # # labels <- rep(c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", # "AO-1099", "AFT-1101", "QZ-4933"), 100000) # # microbenchmark({longest_match(trie = trie, to_match = labels)}) # # Unit: milliseconds # expr min lq mean median uq max neval # { longest_match(trie = trie, to_match = labels) } 284.6457 285.5902 289.5342 286.8775 288.4564 327.3878 100 ## ---- eval=FALSE-------------------------------------------------------------- # to_match = "198.0.0.1" # trie_inst <- trie(keys = "197", values = "fake range") # # longest_match(trie_inst, to_match) # [1] NA # # trie_add(trie_inst, keys = "198", values = "home range") # longest_match(trie_inst, to_match) # [1] "home range" # # trie_remove(trie_inst, keys = "198") # longest_match(trie_inst, to_match) # [1] NA ## ---- eval=FALSE-------------------------------------------------------------- # trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), # values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) # # str(as.data.frame(trie)) # 'data.frame': 6 obs. of 2 variables: # $ keys : chr "AAI" "AEO" "AFT" "AO" ... # $ values: chr "Ann Arbor" "Atlanta" "Austin" "Audobon" ... # # str(as.list(trie)) # # List of 2 # $ keys : chr [1:6] "AAI" "AEO" "AFT" "AO" ... # $ values: chr [1:6] "Ann Arbor" "Atlanta" "Austin" "Audobon" ... triebeard/inst/doc/rcpp_radix.html0000644000176200001440000004350714400735317016773 0ustar liggesusers Radix trees in Rcpp

Radix trees in Rcpp

Oliver Keyes

2023-03-04

A radix tree is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values associated with those keys.

triebeard provides an implementation of radix trees for Rcpp (and also for use directly in R). To start using radix trees in your Rcpp development, simply modify your C++ file to include at the top:

//[[Rcpp::depends(triebeard)]]
#include <radix.h>

Constructing trees

Trees are constructed using the syntax:

radix_tree<type1, type2> radix;

Where type represents the type of the keys (for example, std::string) and type2 the type of the values.

Radix trees can have any scalar type as keys, although strings are most typical; they can also have any scalar type for values. Once you’ve constructed a tree, new entries can be added in a very R-like way: radix[new_key] = new_value;. Entries can also be removed, with radix.erase(key).

Matching against trees

We then move on to the fun bit: matching! As mentioned, radix trees are really good for matching arbitrary values against keys (well, keys of the same type) and retrieving the associated values.

There are three types of supported matching; longest, prefix, and greedy. Longest does exactly what it says on the tin: it finds the key-value pair where the longest initial part of the key matches the arbitrary value:

radix_tree<std::string, std::string> radix;
radix["turnin"] = "entry the first";
radix["turin"] = "entry the second";

radix_tree<std::string, std::string>::iterator it;

it = radix.longest_match("turing");

if(it = radix.end()){
  printf("No match was found :(");
} else {
  std::string result = "Key of longest match: " + it->first + " , value of longest match: " + it->second;
}

Prefix matching provides all trie entries where the value-to-match is a prefix of the key:

radix_tree<std::string, std::string> radix;
radix["turnin"] = "entry the first";
radix["turin"] = "entry the second";

std::vector<radix_tree<std::string, std::string>::iterator> vec;
std::vector<radix_tree<std::string, std::string>::iterator>::iterator it;

it = radix.prefix_match("tur");

if(it == vec.end()){
  printf("No match was found :(");
} else {
  for (it = vec.begin(); it != vec.end(); ++it) {
    std::string result = "Key of a prefix match: " + it->first + " , value of a prefix match: " + it->second;
  }
}

Greedy matching matches very, very fuzzily (a value of ‘bring’, for example, will match ‘blind’, ‘bind’ and ‘binary’) and, syntactically, looks exactly the same as prefix-matching, albeit with radix.greedy_match() instead of radix.prefix_match().

Other trie things

If you have ideas for other trie-like structures, or functions that would be useful with these tries, the best approach is to either request it or add it!

triebeard/inst/doc/r_radix.html0000644000176200001440000006020614400735316016262 0ustar liggesusers Radix trees in R

Radix trees in R

Oliver Keyes

2023-03-04

A radix tree, or trie, is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values associated with those keys.

triebeard provides an implementation of tries for R (and one that can be used in Rcpp development, too, if that’s your thing) so that useRs can take advantage of the fast, efficient and user-friendly matching that they allow.

Radix usage

Suppose we have observations in a dataset that are labelled, with a 2-3 letter code that identifies the facility the sample came from:

labels <- c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901",
            "AO-1099", "AFT-1101", "QZ-4933")

We know the facility each code maps to, and we want to be able to map the labels to that - not over 10 entries but over hundreds, or thousands, or hundreds of thousands. Tries are a great way of doing that: we treat the codes as keys and the full facility names as values. So let’s make a trie to do this matching, and then, well, match:

library(triebeard)
trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"),
             values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh"))

longest_match(trie = trie, to_match = labels)

 [1] "Audobon"    "Atlanta"    "Ann Arbor"  "Austin"     "Queensland" "Queensland" "Raleigh"    "Audobon"    "Austin"    
[10] "Queensland"

This pulls out, for each label, the trie value where the associated key has the longest prefix-match to the label. We can also just grab all the values where the key starts with, say, A:

prefix_match(trie = trie, to_match = "A")

[[1]]
[1] "Ann Arbor" "Atlanta"   "Austin"    "Audobon"  

And finally if we want we can match very, very fuzzily using “greedy” matching:

greedy_match(trie = trie, to_match = "AO")

[[1]]
[1] "Ann Arbor" "Atlanta"   "Austin"    "Audobon"  

These operations are very, very efficient. If we use longest-match as an example, since that’s the most useful thing, with a one-million element vector of things to match against:

library(triebeard)
library(microbenchmark)

trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"),
             values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh"))

labels <- rep(c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901",
                "AO-1099", "AFT-1101", "QZ-4933"), 100000)

microbenchmark({longest_match(trie = trie, to_match = labels)})

Unit: milliseconds
                                                  expr      min       lq     mean   median       uq      max neval
 {     longest_match(trie = trie, to_match = labels) } 284.6457 285.5902 289.5342 286.8775 288.4564 327.3878   100

I think we can call <300 milliseconds for a million matches against an entire set of possible values pretty fast.

Radix modification

There’s always the possibility that (horror of horrors) you’ll have to add or remove entries from the trie. Fear not; you can do just that with trie_add and trie_remove respectively, both of which silently modify the trie they’re provided with to add or remove whatever key-value pairs you provide:

to_match = "198.0.0.1"
trie_inst <- trie(keys = "197", values = "fake range")

longest_match(trie_inst, to_match)
[1] NA

trie_add(trie_inst, keys = "198", values = "home range")
longest_match(trie_inst, to_match)
[1] "home range"

trie_remove(trie_inst, keys = "198")
longest_match(trie_inst, to_match)
[1] NA

Metadata and coercion

You can also extract information from tries without using them. dim, str, print and length all work for tries, and you can use get_keys(trie) and get_values(trie) to extract, respectively, the keys and values from a trie object.

In addition, you can also coerce tries into other R data structures, specifically lists and data.frames:

trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"),
             values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh"))

str(as.data.frame(trie))
'data.frame':   6 obs. of  2 variables:
 $ keys  : chr  "AAI" "AEO" "AFT" "AO" ...
 $ values: chr  "Ann Arbor" "Atlanta" "Austin" "Audobon" ...

str(as.list(trie))

List of 2
 $ keys  : chr [1:6] "AAI" "AEO" "AFT" "AO" ...
 $ values: chr [1:6] "Ann Arbor" "Atlanta" "Austin" "Audobon" ...

Other trie operations

If you have ideas for other trie-like structures, or functions that would be useful with these tries, the best approach is to either request it or add it!

triebeard/inst/doc/r_radix.Rmd0000644000176200001440000001173714377436506016061 0ustar liggesusers--- title: "Radix trees in R" author: "Oliver Keyes" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Radix trees in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- A **radix tree**, or **trie**, is a data structure optimised for storing key-value pairs in a way optimised for searching. This makes them very, very good for efficiently matching data against keys, and retrieving the values *associated* with those keys. `triebeard` provides an implementation of tries for R (and one that can be used in Rcpp development, too, if that's your thing) so that useRs can take advantage of the fast, efficient and user-friendly matching that they allow. ## Radix usage Suppose we have observations in a dataset that are labelled, with a 2-3 letter code that identifies the facility the sample came from: ```{r, eval=FALSE} labels <- c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", "AO-1099", "AFT-1101", "QZ-4933") ``` We know the facility each code maps to, and we want to be able to map the labels to that - not over 10 entries but over hundreds, or thousands, or hundreds *of* thousands. Tries are a great way of doing that: we treat the codes as *keys* and the full facility names as *values*. So let's make a trie to do this matching, and then, well, match: ```{r, eval=FALSE} library(triebeard) trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) longest_match(trie = trie, to_match = labels) [1] "Audobon" "Atlanta" "Ann Arbor" "Austin" "Queensland" "Queensland" "Raleigh" "Audobon" "Austin" [10] "Queensland" ``` This pulls out, for each label, the trie value where the associated key has the longest prefix-match to the label. We can also just grab all the values where the key starts with, say, A: ```{r, eval=FALSE} prefix_match(trie = trie, to_match = "A") [[1]] [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ``` And finally if we want we can match very, very fuzzily using "greedy" matching: ```{r, eval=FALSE} greedy_match(trie = trie, to_match = "AO") [[1]] [1] "Ann Arbor" "Atlanta" "Austin" "Audobon" ``` These operations are very, very efficient. If we use longest-match as an example, since that's the most useful thing, with a one-million element vector of things to match against: ```{r, eval=FALSE} library(triebeard) library(microbenchmark) trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) labels <- rep(c("AO-1002", "AEO-1004", "AAI-1009", "AFT-1403", "QZ-9065", "QZ-1021", "RF-0901", "AO-1099", "AFT-1101", "QZ-4933"), 100000) microbenchmark({longest_match(trie = trie, to_match = labels)}) Unit: milliseconds expr min lq mean median uq max neval { longest_match(trie = trie, to_match = labels) } 284.6457 285.5902 289.5342 286.8775 288.4564 327.3878 100 ``` I think we can call <300 milliseconds for a million matches against an entire set of possible values pretty fast. ## Radix modification There's always the possibility that (horror of horrors) you'll have to add or remove entries from the trie. Fear not; you can do just that with `trie_add` and `trie_remove` respectively, both of which silently modify the trie they're provided with to add or remove whatever key-value pairs you provide: ```{r, eval=FALSE} to_match = "198.0.0.1" trie_inst <- trie(keys = "197", values = "fake range") longest_match(trie_inst, to_match) [1] NA trie_add(trie_inst, keys = "198", values = "home range") longest_match(trie_inst, to_match) [1] "home range" trie_remove(trie_inst, keys = "198") longest_match(trie_inst, to_match) [1] NA ``` ## Metadata and coercion You can also extract information from tries without using them. `dim`, `str`, `print` and `length` all work for tries, and you can use `get_keys(trie)` and `get_values(trie)` to extract, respectively, the keys and values from a trie object. In addition, you can also coerce tries into other R data structures, specifically lists and data.frames: ```{r, eval=FALSE} trie <- trie(keys = c("AO", "AEO", "AAI", "AFT", "QZ", "RF"), values = c("Audobon", "Atlanta", "Ann Arbor", "Austin", "Queensland", "Raleigh")) str(as.data.frame(trie)) 'data.frame': 6 obs. of 2 variables: $ keys : chr "AAI" "AEO" "AFT" "AO" ... $ values: chr "Ann Arbor" "Atlanta" "Austin" "Audobon" ... str(as.list(trie)) List of 2 $ keys : chr [1:6] "AAI" "AEO" "AFT" "AO" ... $ values: chr [1:6] "Ann Arbor" "Atlanta" "Austin" "Audobon" ... ``` ### Other trie operations If you have ideas for other trie-like structures, or functions that would be useful with *these* tries, the best approach is to either [request it](https://github.com/Ironholds/triebeard/issues) or [add it](https://github.com/Ironholds/triebeard/pulls)! triebeard/inst/include/0000755000176200001440000000000014377436506014632 5ustar liggesuserstriebeard/inst/include/radix/0000755000176200001440000000000014400752177015731 5ustar liggesuserstriebeard/inst/include/radix/radix_tree.hpp0000644000176200001440000003017414377436506020605 0ustar liggesusers#ifndef RADIX_TREE_HPP #define RADIX_TREE_HPP #include #include #include #include #include "radix_tree_it.hpp" #include "radix_tree_node.hpp" template K radix_substr(const K &key, int begin, int num); template<> inline std::string radix_substr(const std::string &key, int begin, int num) { return key.substr(begin, num); } template K radix_join(const K &key1, const K &key2); template<> inline std::string radix_join(const std::string &key1, const std::string &key2) { return key1 + key2; } template int radix_length(const K &key); template<> inline int radix_length(const std::string &key) { return key.size(); } template class radix_tree { public: typedef K key_type; typedef T mapped_type; typedef std::pair value_type; typedef radix_tree_it iterator; typedef std::size_t size_type; radix_tree() : m_size(0), m_root(NULL) { } ~radix_tree() { delete m_root; } size_type size() const { return m_size; } bool empty() const { return m_size == 0; } void clear() { delete m_root; m_root = NULL; m_size = 0; } iterator find(const K &key); iterator begin(); iterator end(); std::pair insert(const value_type &val); bool erase(const K &key); void erase(iterator it); void prefix_match(const K &key, std::vector &vec); void greedy_match(const K &key, std::vector &vec); iterator longest_match(const K &key); T& operator[] (const K &lhs); private: size_type m_size; radix_tree_node* m_root; radix_tree_node* begin(radix_tree_node *node); radix_tree_node* find_node(const K &key, radix_tree_node *node, int depth); radix_tree_node* append(radix_tree_node *parent, const value_type &val); radix_tree_node* prepend(radix_tree_node *node, const value_type &val); void greedy_match(radix_tree_node *node, std::vector &vec); radix_tree(const radix_tree& other); // delete radix_tree& operator =(const radix_tree other); // delete }; template void radix_tree::prefix_match(const K &key, std::vector &vec) { vec.clear(); if (m_root == NULL) return; radix_tree_node *node; K key_sub1, key_sub2; node = find_node(key, m_root, 0); if (node->m_is_leaf) node = node->m_parent; int len = radix_length(key) - node->m_depth; key_sub1 = radix_substr(key, node->m_depth, len); key_sub2 = radix_substr(node->m_key, 0, len); if (key_sub1 != key_sub2) return; greedy_match(node, vec); } template typename radix_tree::iterator radix_tree::longest_match(const K &key) { if (m_root == NULL) return iterator(NULL); radix_tree_node *node; K key_sub; node = find_node(key, m_root, 0); if (node->m_is_leaf) return iterator(node); key_sub = radix_substr(key, node->m_depth, radix_length(node->m_key)); if (! (key_sub == node->m_key)) node = node->m_parent; K nul = radix_substr(key, 0, 0); while (node != NULL) { typename radix_tree_node::it_child it; it = node->m_children.find(nul); if (it != node->m_children.end() && it->second->m_is_leaf) return iterator(it->second); node = node->m_parent; } return iterator(NULL); } template typename radix_tree::iterator radix_tree::end() { return iterator(NULL); } template typename radix_tree::iterator radix_tree::begin() { radix_tree_node *node; if (m_root == NULL) node = NULL; else node = begin(m_root); return iterator(node); } template radix_tree_node* radix_tree::begin(radix_tree_node *node) { if (node->m_is_leaf) return node; assert(!node->m_children.empty()); return begin(node->m_children.begin()->second); } template T& radix_tree::operator[] (const K &lhs) { iterator it = find(lhs); if (it == end()) { std::pair val; val.first = lhs; std::pair ret; ret = insert(val); assert(ret.second == true); it = ret.first; } return it->second; } template void radix_tree::greedy_match(const K &key, std::vector &vec) { radix_tree_node *node; vec.clear(); if (m_root == NULL) return; node = find_node(key, m_root, 0); if (node->m_is_leaf) node = node->m_parent; greedy_match(node, vec); } template void radix_tree::greedy_match(radix_tree_node *node, std::vector &vec) { if (node->m_is_leaf) { vec.push_back(iterator(node)); return; } typename std::map*>::iterator it; for (it = node->m_children.begin(); it != node->m_children.end(); ++it) { greedy_match(it->second, vec); } } template void radix_tree::erase(iterator it) { erase(it->first); } template bool radix_tree::erase(const K &key) { if (m_root == NULL) return 0; radix_tree_node *child; radix_tree_node *parent; radix_tree_node *grandparent; K nul = radix_substr(key, 0, 0); child = find_node(key, m_root, 0); if (! child->m_is_leaf) return 0; parent = child->m_parent; parent->m_children.erase(nul); delete child; m_size--; if (parent == m_root) return 1; if (parent->m_children.size() > 1) return 1; if (parent->m_children.empty()) { grandparent = parent->m_parent; grandparent->m_children.erase(parent->m_key); delete parent; } else { grandparent = parent; } if (grandparent == m_root) { return 1; } if (grandparent->m_children.size() == 1) { // merge grandparent with the uncle typename std::map*>::iterator it; it = grandparent->m_children.begin(); radix_tree_node *uncle = it->second; if (uncle->m_is_leaf) return 1; uncle->m_depth = grandparent->m_depth; uncle->m_key = radix_join(grandparent->m_key, uncle->m_key); uncle->m_parent = grandparent->m_parent; grandparent->m_children.erase(it); grandparent->m_parent->m_children.erase(grandparent->m_key); grandparent->m_parent->m_children[uncle->m_key] = uncle; delete grandparent; } return 1; } template radix_tree_node* radix_tree::append(radix_tree_node *parent, const value_type &val) { int depth; int len; K nul = radix_substr(val.first, 0, 0); radix_tree_node *node_c, *node_cc; depth = parent->m_depth + radix_length(parent->m_key); len = radix_length(val.first) - depth; if (len == 0) { node_c = new radix_tree_node(val); node_c->m_depth = depth; node_c->m_parent = parent; node_c->m_key = nul; node_c->m_is_leaf = true; parent->m_children[nul] = node_c; return node_c; } else { node_c = new radix_tree_node(val); K key_sub = radix_substr(val.first, depth, len); parent->m_children[key_sub] = node_c; node_c->m_depth = depth; node_c->m_parent = parent; node_c->m_key = key_sub; node_cc = new radix_tree_node(val); node_c->m_children[nul] = node_cc; node_cc->m_depth = depth + len; node_cc->m_parent = node_c; node_cc->m_key = nul; node_cc->m_is_leaf = true; return node_cc; } } template radix_tree_node* radix_tree::prepend(radix_tree_node *node, const value_type &val) { int count; int len1, len2; len1 = radix_length(node->m_key); len2 = radix_length(val.first) - node->m_depth; for (count = 0; count < len1 && count < len2; count++) { if (! (node->m_key[count] == val.first[count + node->m_depth]) ) break; } assert(count != 0); node->m_parent->m_children.erase(node->m_key); radix_tree_node *node_a = new radix_tree_node; node_a->m_parent = node->m_parent; node_a->m_key = radix_substr(node->m_key, 0, count); node_a->m_depth = node->m_depth; node_a->m_parent->m_children[node_a->m_key] = node_a; node->m_depth += count; node->m_parent = node_a; node->m_key = radix_substr(node->m_key, count, len1 - count); node->m_parent->m_children[node->m_key] = node; K nul = radix_substr(val.first, 0, 0); if (count == len2) { radix_tree_node *node_b; node_b = new radix_tree_node(val); node_b->m_parent = node_a; node_b->m_key = nul; node_b->m_depth = node_a->m_depth + count; node_b->m_is_leaf = true; node_b->m_parent->m_children[nul] = node_b; return node_b; } else { radix_tree_node *node_b, *node_c; node_b = new radix_tree_node; node_b->m_parent = node_a; node_b->m_depth = node->m_depth; node_b->m_key = radix_substr(val.first, node_b->m_depth, len2 - count); node_b->m_parent->m_children[node_b->m_key] = node_b; node_c = new radix_tree_node(val); node_c->m_parent = node_b; node_c->m_depth = radix_length(val.first); node_c->m_key = nul; node_c->m_is_leaf = true; node_c->m_parent->m_children[nul] = node_c; return node_c; } } template std::pair::iterator, bool> radix_tree::insert(const value_type &val) { if (m_root == NULL) { K nul = radix_substr(val.first, 0, 0); m_root = new radix_tree_node; m_root->m_key = nul; } radix_tree_node *node = find_node(val.first, m_root, 0); if (node->m_is_leaf) { return std::pair(node, false); } else if (node == m_root) { m_size++; return std::pair(append(m_root, val), true); } else { m_size++; int len = radix_length(node->m_key); K key_sub = radix_substr(val.first, node->m_depth, len); if (key_sub == node->m_key) { return std::pair(append(node, val), true); } else { return std::pair(prepend(node, val), true); } } } template typename radix_tree::iterator radix_tree::find(const K &key) { if (m_root == NULL) return iterator(NULL); radix_tree_node *node = find_node(key, m_root, 0); // if the node is a internal node, return NULL if (! node->m_is_leaf) return iterator(NULL); return iterator(node); } template radix_tree_node* radix_tree::find_node(const K &key, radix_tree_node *node, int depth) { if (node->m_children.empty()) return node; typename radix_tree_node::it_child it; int len_key = radix_length(key) - depth; for (it = node->m_children.begin(); it != node->m_children.end(); ++it) { if (len_key == 0) { if (it->second->m_is_leaf) return it->second; else continue; } if (! it->second->m_is_leaf && key[depth] == it->first[0] ) { int len_node = radix_length(it->first); K key_sub = radix_substr(key, depth, len_node); if (key_sub == it->first) { return find_node(key, it->second, depth+len_node); } else { return it->second; } } } return node; } #endif // RADIX_TREE_HPP triebeard/inst/include/radix/radix_tree_it.hpp0000644000176200001440000000613614400725634021270 0ustar liggesusers#ifndef RADIX_TREE_IT #define RADIX_TREE_IT #include // forward declaration template class radix_tree; template class radix_tree_node; template class radix_tree_it { public: using iterator_category = std::forward_iterator_tag; using value_type = std::pair; using difference_type = std::pair; using pointer = std::pair*; using reference = std::pair&; radix_tree_it() : m_pointee(0) { } radix_tree_it(const radix_tree_it& r) : m_pointee(r.m_pointee) { } radix_tree_it& operator=(const radix_tree_it& r) { m_pointee = r.m_pointee; return *this; } ~radix_tree_it() { } std::pair& operator* () const; std::pair* operator-> () const; const radix_tree_it& operator++ (); radix_tree_it operator++ (int); // const radix_tree_it& operator-- (); bool operator!= (const radix_tree_it &lhs) const; bool operator== (const radix_tree_it &lhs) const; radix_tree_node *m_pointee; radix_tree_it(radix_tree_node *p) : m_pointee(p) { } radix_tree_node* increment(radix_tree_node* node) const; radix_tree_node* descend(radix_tree_node* node) const; }; template radix_tree_node* radix_tree_it::increment(radix_tree_node* node) const { radix_tree_node* parent = node->m_parent; if (parent == NULL) return NULL; typename radix_tree_node::it_child it = parent->m_children.find(node->m_key); assert(it != parent->m_children.end()); ++it; if (it == parent->m_children.end()) return increment(parent); else return descend(it->second); } template radix_tree_node* radix_tree_it::descend(radix_tree_node* node) const { if (node->m_is_leaf) return node; typename radix_tree_node::it_child it = node->m_children.begin(); assert(it != node->m_children.end()); return descend(it->second); } template std::pair& radix_tree_it::operator* () const { return *m_pointee->m_value; } template std::pair* radix_tree_it::operator-> () const { return m_pointee->m_value; } template bool radix_tree_it::operator!= (const radix_tree_it &lhs) const { return m_pointee != lhs.m_pointee; } template bool radix_tree_it::operator== (const radix_tree_it &lhs) const { return m_pointee == lhs.m_pointee; } template const radix_tree_it& radix_tree_it::operator++ () { if (m_pointee != NULL) // it is undefined behaviour to dereference iterator that is out of bounds... m_pointee = increment(m_pointee); return *this; } template radix_tree_it radix_tree_it::operator++ (int) { radix_tree_it copy(*this); ++(*this); return copy; } #endif // RADIX_TREE_IT triebeard/inst/include/radix/radix_tree_node.hpp0000644000176200001440000000242614377436506021611 0ustar liggesusers#ifndef RADIX_TREE_NODE_HPP #define RADIX_TREE_NODE_HPP #include template class radix_tree_node { friend class radix_tree; friend class radix_tree_it; typedef std::pair value_type; typedef typename std::map* >::iterator it_child; private: radix_tree_node() : m_children(), m_parent(NULL), m_value(NULL), m_depth(0), m_is_leaf(false), m_key() { } radix_tree_node(const value_type &val); radix_tree_node(const radix_tree_node&); // delete radix_tree_node& operator=(const radix_tree_node&); // delete ~radix_tree_node(); std::map*> m_children; radix_tree_node *m_parent; value_type *m_value; int m_depth; bool m_is_leaf; K m_key; }; template radix_tree_node::radix_tree_node(const value_type &val) : m_children(), m_parent(NULL), m_value(NULL), m_depth(0), m_is_leaf(false), m_key() { m_value = new value_type(val); } template radix_tree_node::~radix_tree_node() { it_child it; for (it = m_children.begin(); it != m_children.end(); ++it) { delete it->second; } delete m_value; } #endif // RADIX_TREE_NODE_HPP triebeard/inst/include/radix.h0000644000176200001440000000004014377436506016104 0ustar liggesusers#include "radix/radix_tree.hpp"