
COS 226 Lecture 10: Radix trees and tries Digital tree insertion Symbol Table, Dictionary records with keys A 0 0 0 0 1 S 1 0 0 1 1 A INSERT E 0 0 1 0 1 0 1 R 1 0 0 1 0 E S 1 1 0 1 0 1 SEARCH C 0 0 0 C H R X H 0 1 0 0 0 0 1 0 1 0 1 0 1 I 0 1 0 0 1 G I N P 0 1 0 1 0 1 0 1 N 0 1 1 1 0 M Balanced trees, randomized trees G 0 0 1 1 1 0 1 X 1 1 0 0 0 L 0 1 use O(lgN) comparisons M 0 1 1 0 1 P 1 0 0 0 0 Hashing L 0 1 1 0 0 uses O(1) probes but time proportional to key length A E 0 1 S Are comparisons necessary? 0 1 0 1 C H R 0 1 0 1 0 1 0 X 1 (no) G I N P Z 0 1 0 1 M0 1 0 1 0 1 Is time proportional to key length required? L0 1 0 1 (no) Trees NOT ordered Best possible: examine lgN BITS 10.1 does not support SORT and SELECT 10.3 Digital search trees (DSTs) DST insertion code Easy way to balance tree: use bits of key to direct search Otherwise identical to BST link insertR(link h, Item item, int w) { Key v = key(item), t = key(h->item); #define bit(A, B) digit(A, B) if (h == z) return NEW(item, z, z, 1); Item searchR(link h, Key v, int w) if (bit(v, w) == 0) { Key t = key(h->item); h->l = insertR(h->l, item, w+1); if (h == z) return NULLitem; else h->r = insertR(h->r, item, w+1); if eq(v, t) return h->item; return h; if (bit(v, w) == 0) } return searchR(h->l, v, w+1); void STinsert(Item item) else return searchR(h->r, v, w+1); { head = insertR(head, item, 0); } } Item STsearch(Key v) { return searchR(head, v, 0); } "digit" macro extracts Bth bit from A (see lecture 6) 10.2 10.4 Tries Trie search implementation Branch according to bits, as in DST Branch according to bits in keys Three outcomes: No keys in internal nodes null link Records/keys in external nodes key in external node matches search key key in external node differs from search key Structure depends on keys, not insertion order #define external(A) ((h->l == z) && (h->r == z)) Examine lgN BITS to distinguish key Item searchR(link h, Key v, int w) independent of key length! { Key t = key(h->item); if (h == z) return NULLitem; Problems: if (external(h)) 44% space waste from 1-way branching return eq(v, t) ? h->item : NULLitem; if (digit(v, w) == 0) multiple node types return searchR(h->l, v, w+1); else return searchR(h->r, v, w+1); } Item STsearch(Key v) { return searchR(head, v, 0); } 10.5 10.7 Trie example Trie insertion implementation Two possible search miss outcomes null link: replace with link to new node H external node: recursive split to distinguish new key E E A C A C R S H I R S void STinit() { head = (z = NEW(NULLitem, 0, 0, 0)); } link insertR(link h, Item item, int w) E N N A C A C E G { Key v = key(item), t = key(h->item); H I R S H I R S if (h == z) return NEW(item, z, z, 1); if (external(h)) X X { return split(NEW(item, z, z, 1), h, w); } N A C E G A C E G M N if (digit(v, w) == 0) H I R S H I R S h->l = insertR(h->l, item, w+1); else h->r = insertR(h->r, item, w+1); X X return h; A C E G M N P A C E G N P } H I R S H I L M R S void STinsert(Item item) { head = insertR(head, item, 0); } 10.6 10.8 Trie insertion recursive node split Patricia trie search and insertion 0 link split(link p, link q, int w) SEARCH: branch according to specified bit 1 S 4 2 H R { link t = NEW(NULLitem, z, z, 2); 3 E switch(bit(p->item, w)*2+bit(q->item, w)) 4 C A { case 0: t->l = split(p, q, w+1); break; case 1: t->l = p; t->r = q; break; case 2: t->r = p; t->l = q; break; INSERT: search, then find leftmost bit that case 3: t->r = split(p, q, w+1); break; } distinguishes new key from key found return t; } Easy (external) case 0 1 S 4 2 H 4 R 3 E I 4 C A Harder (internal) case 0 1 S 4 2 H 2 R 3 E 4 N 4 C I A 10.9 10.11 Patricia tries R-way digital tree Digression: cute nomenclature Generalize DIGITAL TREE to R links per node R-way branching gives fast search P ractical A lgorithm struct STnode T o { Item item; link next[26]; }; R etrieve Item searchR(link h, Key v, int w) I nformation { int i = digit(v, w); C oded if (h == z) return NULLitem; I n if eq(v, key(h->item)) return h->item; A lphanumeric return searchR(h->next[i], v, w+1); } information reTRIEval (but pronounced "try") Item STsearch(Key v) { return searchR(head, v, 0); } Patricia: collapse one-way branches in tries Simple, fast (but uses a lot of space) "thread" tree to eliminate multiple node types Quintessential search algorithm 10.10 10.12 R-way digital tree example R-way trie Generalize TRIE to R-way branching Nodes contain characters she R-way branching on next character by sells the End-of-key options sea shells fixed-length keys prefix match prefix-free keys end-of-key character she suffix trie by sells the sea shells Maintain order in tree to support SORT and SELECT shore 10.13 10.15 R-way digital tree analysis R-way trie search code N keys: N internal nodes, R links per node typedef struct STnode* link; Space: N*R struct STnode Time: lgN/lgR comparisons { Item item; link next[R]; }; Item searchR(link h, Key v, int w) Ex: R=26, N=20000 { int i = digit(v, w); 500,000 links if (h == z) return NULLitem; tree height 3-4 if (internal(h)) Ex: R=16, N=1M return searchR(h->next[i], v, w+1); 16M links if eq(v, key(h->item)) return h->item; tree height 5 return NULLitem; } Plus: one node type, easy implementation Item STsearch(Key v) Minus: { return searchR(head, v, 0); } full comparison at each node does not support SORT and SELECT 10.14 10.16 R-way trie example R-way trie existence table EXISTENCE TABLE no data in trie keys encoded in trie structure b s t y e h h Easy to implement, but may use excessive space a l e e l l s l Item searchR(link h, Key v, int w) s { int i = digit(v, w); if (h == z) return NULLitem; b s t if (i == NULLdigit) return v; y e h h a l e o e return searchR(h->next[i], v, w+1); l l r } s l e s Item STsearch(Key v) { return searchR(head, v, 0); } Extend digit(v, w) implementation to return NULLdigit if v has fewer than w digits 10.17 10.19 R-way trie insert code R-way trie example (without null links) link split(link p, link q, int w) { link t = NEW(NULLitem); int pd = digit(p->item, w), qd = digit(q->item, w); b s t y e h h if (pd == qd) a l e e { t->next[pd] = split(p, q, w+1); } l l else { t->next[pd] = p; t->next[qd] = q; } s l return t; s } link insertR(link h, Item item, int w) b s t { Key v = key(item); y e h h int i = digit(v, w); a l e o e l l r if (h == z) return NEW(item); s l e if (!internal(h)) s { return split(NEW(item), h, w); } h->next[i] = insertR(h->next[i], v, w+1); return h; } void STinsert(Item item) 10.18 { head = insertR(head, item, 0); } 10.20 R-way trie analysis Correspondence with sorting algs Assumptions BSTs correspond to Quicksort recursive structure one-way links collapsed link to remainder of key in external nodes Roles of TIME and SPACE interchanged N keys, total of C characters in keys TIME: approx. N internal nodes path in tree R links per node size of subfile in sort Space: N*R + C SPACE: Time: lgN/lgR CHARACTER comparisons stack size in sort branching factor in tree Ex: R=26, N=20000 520,000 links Tries correspond to MSD radix sort tree height 3-4 Ex: R=16, N=1M 16M links a b c d e f g h i j m n o r s t w tree height 5 c g n e a u i u g e o i u l a o e o w a k o a i a e e o d t b w e m g g e w r g t k m y t y n b w l p y b g p r p d s e 10.21 3-way tries correspond to 3-way Quicksort 10.23 R-way trie summary Three-way radix tries Faster than hashing Nodes contain characters successful search: no arithmetic three-way branching on next character unsuccessful search: don’t need to examine whole key left: key character less middle: key character equal Supports SORT, SELECT, other ADT operations right: key character greater Problems Equivalent to replacing trie node with BST on character eliminating 1-way branches multiple node types too much space for null links a c f g h i j m n o r s t w a o j f r a c h m w o a g i n t o s 10.22 10.24 Ternary search tries (TSTs) TST search implementation Code writes itself Existence trie implementation OK Item searchR(link h, Key v, int w) Adds factor of 2lnM to search cost { int i = digit(v, w); constant no.
Details
-
File Typepdf
-
Upload Time-
-
Content LanguagesEnglish
-
Upload UserAnonymous/Not logged-in
-
File Pages8 Page
-
File Size-