hashtable.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. /*
  2. * This implementation is based on code from uClibc-0.9.30.3 but was
  3. * modified and extended for use within U-Boot.
  4. *
  5. * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
  6. *
  7. * Original license header:
  8. *
  9. * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
  10. * This file is part of the GNU C Library.
  11. * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
  12. *
  13. * The GNU C Library is free software; you can redistribute it and/or
  14. * modify it under the terms of the GNU Lesser General Public
  15. * License as published by the Free Software Foundation; either
  16. * version 2.1 of the License, or (at your option) any later version.
  17. *
  18. * The GNU C Library is distributed in the hope that it will be useful,
  19. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  21. * Lesser General Public License for more details.
  22. *
  23. * You should have received a copy of the GNU Lesser General Public
  24. * License along with the GNU C Library; if not, write to the Free
  25. * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26. * 02111-1307 USA.
  27. */
  28. #include <errno.h>
  29. #include <malloc.h>
  30. #ifdef USE_HOSTCC /* HOST build */
  31. # include <string.h>
  32. # include <assert.h>
  33. # ifndef debug
  34. # ifdef DEBUG
  35. # define debug(fmt,args...) printf(fmt ,##args)
  36. # else
  37. # define debug(fmt,args...)
  38. # endif
  39. # endif
  40. #else /* U-Boot build */
  41. # include <common.h>
  42. # include <linux/string.h>
  43. #endif
  44. #include "search.h"
  45. /*
  46. * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  47. * [Knuth] The Art of Computer Programming, part 3 (6.4)
  48. */
  49. /*
  50. * The non-reentrant version use a global space for storing the hash table.
  51. */
  52. static struct hsearch_data htab;
  53. /*
  54. * The reentrant version has no static variables to maintain the state.
  55. * Instead the interface of all functions is extended to take an argument
  56. * which describes the current status.
  57. */
  58. typedef struct _ENTRY {
  59. unsigned int used;
  60. ENTRY entry;
  61. } _ENTRY;
  62. /*
  63. * hcreate()
  64. */
  65. /*
  66. * For the used double hash method the table size has to be a prime. To
  67. * correct the user given table size we need a prime test. This trivial
  68. * algorithm is adequate because
  69. * a) the code is (most probably) called a few times per program run and
  70. * b) the number is small because the table must fit in the core
  71. * */
  72. static int isprime(unsigned int number)
  73. {
  74. /* no even number will be passed */
  75. unsigned int div = 3;
  76. while (div * div < number && number % div != 0)
  77. div += 2;
  78. return number % div != 0;
  79. }
  80. int hcreate(size_t nel)
  81. {
  82. return hcreate_r(nel, &htab);
  83. }
  84. /*
  85. * Before using the hash table we must allocate memory for it.
  86. * Test for an existing table are done. We allocate one element
  87. * more as the found prime number says. This is done for more effective
  88. * indexing as explained in the comment for the hsearch function.
  89. * The contents of the table is zeroed, especially the field used
  90. * becomes zero.
  91. */
  92. int hcreate_r(size_t nel, struct hsearch_data *htab)
  93. {
  94. /* Test for correct arguments. */
  95. if (htab == NULL) {
  96. __set_errno(EINVAL);
  97. return 0;
  98. }
  99. /* There is still another table active. Return with error. */
  100. if (htab->table != NULL)
  101. return 0;
  102. /* Change nel to the first prime number not smaller as nel. */
  103. nel |= 1; /* make odd */
  104. while (!isprime(nel))
  105. nel += 2;
  106. htab->size = nel;
  107. htab->filled = 0;
  108. /* allocate memory and zero out */
  109. htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
  110. if (htab->table == NULL)
  111. return 0;
  112. /* everything went alright */
  113. return 1;
  114. }
  115. /*
  116. * hdestroy()
  117. */
  118. void hdestroy(void)
  119. {
  120. hdestroy_r(&htab);
  121. }
  122. /*
  123. * After using the hash table it has to be destroyed. The used memory can
  124. * be freed and the local static variable can be marked as not used.
  125. */
  126. void hdestroy_r(struct hsearch_data *htab)
  127. {
  128. int i;
  129. /* Test for correct arguments. */
  130. if (htab == NULL) {
  131. __set_errno(EINVAL);
  132. return;
  133. }
  134. /* free used memory */
  135. for (i = 1; i <= htab->size; ++i) {
  136. if (htab->table[i].used) {
  137. ENTRY *ep = &htab->table[i].entry;
  138. free(ep->key);
  139. free(ep->data);
  140. }
  141. }
  142. free(htab->table);
  143. /* the sign for an existing table is an value != NULL in htable */
  144. htab->table = NULL;
  145. }
  146. /*
  147. * hsearch()
  148. */
  149. /*
  150. * This is the search function. It uses double hashing with open addressing.
  151. * The argument item.key has to be a pointer to an zero terminated, most
  152. * probably strings of chars. The function for generating a number of the
  153. * strings is simple but fast. It can be replaced by a more complex function
  154. * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
  155. *
  156. * We use an trick to speed up the lookup. The table is created by hcreate
  157. * with one more element available. This enables us to use the index zero
  158. * special. This index will never be used because we store the first hash
  159. * index in the field used where zero means not used. Every other value
  160. * means used. The used field can be used as a first fast comparison for
  161. * equality of the stored and the parameter value. This helps to prevent
  162. * unnecessary expensive calls of strcmp.
  163. *
  164. * This implementation differs from the standard library version of
  165. * this function in a number of ways:
  166. *
  167. * - While the standard version does not make any assumptions about
  168. * the type of the stored data objects at all, this implementation
  169. * works with NUL terminated strings only.
  170. * - Instead of storing just pointers to the original objects, we
  171. * create local copies so the caller does not need to care about the
  172. * data any more.
  173. * - The standard implementation does not provide a way to update an
  174. * existing entry. This version will create a new entry or update an
  175. * existing one when both "action == ENTER" and "item.data != NULL".
  176. * - Instead of returning 1 on success, we return the index into the
  177. * internal hash table, which is also guaranteed to be positive.
  178. * This allows us direct access to the found hash table slot for
  179. * example for functions like hdelete().
  180. */
  181. ENTRY *hsearch(ENTRY item, ACTION action)
  182. {
  183. ENTRY *result;
  184. (void) hsearch_r(item, action, &result, &htab);
  185. return result;
  186. }
  187. int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
  188. struct hsearch_data *htab)
  189. {
  190. unsigned int hval;
  191. unsigned int count;
  192. unsigned int len = strlen(item.key);
  193. unsigned int idx;
  194. /* Compute an value for the given string. Perhaps use a better method. */
  195. hval = len;
  196. count = len;
  197. while (count-- > 0) {
  198. hval <<= 4;
  199. hval += item.key[count];
  200. }
  201. /*
  202. * First hash function:
  203. * simply take the modul but prevent zero.
  204. */
  205. hval %= htab->size;
  206. if (hval == 0)
  207. ++hval;
  208. /* The first index tried. */
  209. idx = hval;
  210. if (htab->table[idx].used) {
  211. /*
  212. * Further action might be required according to the
  213. * action value.
  214. */
  215. unsigned hval2;
  216. if (htab->table[idx].used == hval
  217. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  218. /* Overwrite existing value? */
  219. if ((action == ENTER) && (item.data != NULL)) {
  220. free(htab->table[idx].entry.data);
  221. htab->table[idx].entry.data =
  222. strdup(item.data);
  223. if (!htab->table[idx].entry.data) {
  224. __set_errno(ENOMEM);
  225. *retval = NULL;
  226. return 0;
  227. }
  228. }
  229. /* return found entry */
  230. *retval = &htab->table[idx].entry;
  231. return idx;
  232. }
  233. /*
  234. * Second hash function:
  235. * as suggested in [Knuth]
  236. */
  237. hval2 = 1 + hval % (htab->size - 2);
  238. do {
  239. /*
  240. * Because SIZE is prime this guarantees to
  241. * step through all available indices.
  242. */
  243. if (idx <= hval2)
  244. idx = htab->size + idx - hval2;
  245. else
  246. idx -= hval2;
  247. /*
  248. * If we visited all entries leave the loop
  249. * unsuccessfully.
  250. */
  251. if (idx == hval)
  252. break;
  253. /* If entry is found use it. */
  254. if ((htab->table[idx].used == hval)
  255. && strcmp(item.key, htab->table[idx].entry.key) == 0) {
  256. /* Overwrite existing value? */
  257. if ((action == ENTER) && (item.data != NULL)) {
  258. free(htab->table[idx].entry.data);
  259. htab->table[idx].entry.data =
  260. strdup(item.data);
  261. if (!htab->table[idx].entry.data) {
  262. __set_errno(ENOMEM);
  263. *retval = NULL;
  264. return 0;
  265. }
  266. }
  267. /* return found entry */
  268. *retval = &htab->table[idx].entry;
  269. return idx;
  270. }
  271. }
  272. while (htab->table[idx].used);
  273. }
  274. /* An empty bucket has been found. */
  275. if (action == ENTER) {
  276. /*
  277. * If table is full and another entry should be
  278. * entered return with error.
  279. */
  280. if (htab->filled == htab->size) {
  281. __set_errno(ENOMEM);
  282. *retval = NULL;
  283. return 0;
  284. }
  285. /*
  286. * Create new entry;
  287. * create copies of item.key and item.data
  288. */
  289. htab->table[idx].used = hval;
  290. htab->table[idx].entry.key = strdup(item.key);
  291. htab->table[idx].entry.data = strdup(item.data);
  292. if (!htab->table[idx].entry.key ||
  293. !htab->table[idx].entry.data) {
  294. __set_errno(ENOMEM);
  295. *retval = NULL;
  296. return 0;
  297. }
  298. ++htab->filled;
  299. /* return new entry */
  300. *retval = &htab->table[idx].entry;
  301. return 1;
  302. }
  303. __set_errno(ESRCH);
  304. *retval = NULL;
  305. return 0;
  306. }
  307. /*
  308. * hdelete()
  309. */
  310. /*
  311. * The standard implementation of hsearch(3) does not provide any way
  312. * to delete any entries from the hash table. We extend the code to
  313. * do that.
  314. */
  315. int hdelete(const char *key)
  316. {
  317. return hdelete_r(key, &htab);
  318. }
  319. int hdelete_r(const char *key, struct hsearch_data *htab)
  320. {
  321. ENTRY e, *ep;
  322. int idx;
  323. debug("hdelete: DELETE key \"%s\"\n", key);
  324. e.key = (char *)key;
  325. if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
  326. __set_errno(ESRCH);
  327. return 0; /* not found */
  328. }
  329. /* free used ENTRY */
  330. debug("hdelete: DELETING key \"%s\"\n", key);
  331. free(ep->key);
  332. free(ep->data);
  333. htab->table[idx].used = 0;
  334. --htab->filled;
  335. return 1;
  336. }
  337. /*
  338. * hexport()
  339. */
  340. /*
  341. * Export the data stored in the hash table in linearized form.
  342. *
  343. * Entries are exported as "name=value" strings, separated by an
  344. * arbitrary (non-NUL, of course) separator character. This allows to
  345. * use this function both when formatting the U-Boot environment for
  346. * external storage (using '\0' as separator), but also when using it
  347. * for the "printenv" command to print all variables, simply by using
  348. * as '\n" as separator. This can also be used for new features like
  349. * exporting the environment data as text file, including the option
  350. * for later re-import.
  351. *
  352. * The entries in the result list will be sorted by ascending key
  353. * values.
  354. *
  355. * If the separator character is different from NUL, then any
  356. * separator characters and backslash characters in the values will
  357. * be escaped by a preceeding backslash in output. This is needed for
  358. * example to enable multi-line values, especially when the output
  359. * shall later be parsed (for example, for re-import).
  360. *
  361. * There are several options how the result buffer is handled:
  362. *
  363. * *resp size
  364. * -----------
  365. * NULL 0 A string of sufficient length will be allocated.
  366. * NULL >0 A string of the size given will be
  367. * allocated. An error will be returned if the size is
  368. * not sufficient. Any unused bytes in the string will
  369. * be '\0'-padded.
  370. * !NULL 0 The user-supplied buffer will be used. No length
  371. * checking will be performed, i. e. it is assumed that
  372. * the buffer size will always be big enough. DANGEROUS.
  373. * !NULL >0 The user-supplied buffer will be used. An error will
  374. * be returned if the size is not sufficient. Any unused
  375. * bytes in the string will be '\0'-padded.
  376. */
  377. ssize_t hexport(const char sep, char **resp, size_t size)
  378. {
  379. return hexport_r(&htab, sep, resp, size);
  380. }
  381. static int cmpkey(const void *p1, const void *p2)
  382. {
  383. ENTRY *e1 = *(ENTRY **) p1;
  384. ENTRY *e2 = *(ENTRY **) p2;
  385. return (strcmp(e1->key, e2->key));
  386. }
  387. ssize_t hexport_r(struct hsearch_data *htab, const char sep,
  388. char **resp, size_t size)
  389. {
  390. ENTRY *list[htab->size];
  391. char *res, *p;
  392. size_t totlen;
  393. int i, n;
  394. /* Test for correct arguments. */
  395. if ((resp == NULL) || (htab == NULL)) {
  396. __set_errno(EINVAL);
  397. return (-1);
  398. }
  399. debug("EXPORT table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
  400. htab, htab->size, htab->filled, size);
  401. /*
  402. * Pass 1:
  403. * search used entries,
  404. * save addresses and compute total length
  405. */
  406. for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
  407. if (htab->table[i].used) {
  408. ENTRY *ep = &htab->table[i].entry;
  409. list[n++] = ep;
  410. totlen += strlen(ep->key) + 2;
  411. if (sep == '\0') {
  412. totlen += strlen(ep->data);
  413. } else { /* check if escapes are needed */
  414. char *s = ep->data;
  415. while (*s) {
  416. ++totlen;
  417. /* add room for needed escape chars */
  418. if ((*s == sep) || (*s == '\\'))
  419. ++totlen;
  420. ++s;
  421. }
  422. }
  423. totlen += 2; /* for '=' and 'sep' char */
  424. }
  425. }
  426. #ifdef DEBUG
  427. /* Pass 1a: print unsorted list */
  428. printf("Unsorted: n=%d\n", n);
  429. for (i = 0; i < n; ++i) {
  430. printf("\t%3d: %p ==> %-10s => %s\n",
  431. i, list[i], list[i]->key, list[i]->data);
  432. }
  433. #endif
  434. /* Sort list by keys */
  435. qsort(list, n, sizeof(ENTRY *), cmpkey);
  436. /* Check if the user supplied buffer size is sufficient */
  437. if (size) {
  438. if (size < totlen + 1) { /* provided buffer too small */
  439. debug("### buffer too small: %d, but need %d\n",
  440. size, totlen + 1);
  441. __set_errno(ENOMEM);
  442. return (-1);
  443. }
  444. } else {
  445. size = totlen + 1;
  446. }
  447. /* Check if the user provided a buffer */
  448. if (*resp) {
  449. /* yes; clear it */
  450. res = *resp;
  451. memset(res, '\0', size);
  452. } else {
  453. /* no, allocate and clear one */
  454. *resp = res = calloc(1, size);
  455. if (res == NULL) {
  456. __set_errno(ENOMEM);
  457. return (-1);
  458. }
  459. }
  460. /*
  461. * Pass 2:
  462. * export sorted list of result data
  463. */
  464. for (i = 0, p = res; i < n; ++i) {
  465. char *s;
  466. s = list[i]->key;
  467. while (*s)
  468. *p++ = *s++;
  469. *p++ = '=';
  470. s = list[i]->data;
  471. while (*s) {
  472. if ((*s == sep) || (*s == '\\'))
  473. *p++ = '\\'; /* escape */
  474. *p++ = *s++;
  475. }
  476. *p++ = sep;
  477. }
  478. *p = '\0'; /* terminate result */
  479. return size;
  480. }
  481. /*
  482. * himport()
  483. */
  484. /*
  485. * Import linearized data into hash table.
  486. *
  487. * This is the inverse function to hexport(): it takes a linear list
  488. * of "name=value" pairs and creates hash table entries from it.
  489. *
  490. * Entries without "value", i. e. consisting of only "name" or
  491. * "name=", will cause this entry to be deleted from the hash table.
  492. *
  493. * The "flag" argument can be used to control the behaviour: when the
  494. * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
  495. * new data will be added to an existing hash table; otherwise, old
  496. * data will be discarded and a new hash table will be created.
  497. *
  498. * The separator character for the "name=value" pairs can be selected,
  499. * so we both support importing from externally stored environment
  500. * data (separated by NUL characters) and from plain text files
  501. * (entries separated by newline characters).
  502. *
  503. * To allow for nicely formatted text input, leading white space
  504. * (sequences of SPACE and TAB chars) is ignored, and entries starting
  505. * (after removal of any leading white space) with a '#' character are
  506. * considered comments and ignored.
  507. *
  508. * [NOTE: this means that a variable name cannot start with a '#'
  509. * character.]
  510. *
  511. * When using a non-NUL separator character, backslash is used as
  512. * escape character in the value part, allowing for example for
  513. * multi-line values.
  514. *
  515. * In theory, arbitrary separator characters can be used, but only
  516. * '\0' and '\n' have really been tested.
  517. */
  518. int himport(const char *env, size_t size, const char sep, int flag)
  519. {
  520. return himport_r(&htab, env, size, sep, flag);
  521. }
  522. int himport_r(struct hsearch_data *htab,
  523. const char *env, size_t size, const char sep, int flag)
  524. {
  525. char *data, *sp, *dp, *name, *value;
  526. /* Test for correct arguments. */
  527. if (htab == NULL) {
  528. __set_errno(EINVAL);
  529. return 0;
  530. }
  531. /* we allocate new space to make sure we can write to the array */
  532. if ((data = malloc(size)) == NULL) {
  533. debug("himport_r: can't malloc %d bytes\n", size);
  534. __set_errno(ENOMEM);
  535. return 0;
  536. }
  537. memcpy(data, env, size);
  538. dp = data;
  539. if ((flag & H_NOCLEAR) == 0) {
  540. /* Destroy old hash table if one exists */
  541. debug("Destroy Hash Table: %p table = %p\n", htab,
  542. htab->table);
  543. if (htab->table)
  544. hdestroy_r(htab);
  545. }
  546. /*
  547. * Create new hash table (if needed). The computation of the hash
  548. * table size is based on heuristics: in a sample of some 70+
  549. * existing systems we found an average size of 39+ bytes per entry
  550. * in the environment (for the whole key=value pair). Assuming a
  551. * size of 7 per entry (= safety factor of >5) should provide enough
  552. * safety margin for any existing environment definitons and still
  553. * allow for more than enough dynamic additions. Note that the
  554. * "size" argument is supposed to give the maximum enviroment size
  555. * (CONFIG_ENV_SIZE).
  556. */
  557. if (!htab->table) {
  558. int nent = size / 7;
  559. debug("Create Hash Table: N=%d\n", nent);
  560. if (hcreate_r(nent, htab) == 0) {
  561. free(data);
  562. return 0;
  563. }
  564. }
  565. /* Parse environment; allow for '\0' and 'sep' as separators */
  566. do {
  567. ENTRY e, *rv;
  568. /* skip leading white space */
  569. while ((*dp == ' ') || (*dp == '\t'))
  570. ++dp;
  571. /* skip comment lines */
  572. if (*dp == '#') {
  573. while (*dp && (*dp != sep))
  574. ++dp;
  575. ++dp;
  576. continue;
  577. }
  578. /* parse name */
  579. for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
  580. ;
  581. /* deal with "name" and "name=" entries (delete var) */
  582. if (*dp == '\0' || *(dp + 1) == '\0' ||
  583. *dp == sep || *(dp + 1) == sep) {
  584. if (*dp == '=')
  585. *dp++ = '\0';
  586. *dp++ = '\0'; /* terminate name */
  587. debug("DELETE CANDIDATE: \"%s\"\n", name);
  588. if (hdelete_r(name, htab) == 0)
  589. debug("DELETE ERROR ##############################\n");
  590. continue;
  591. }
  592. *dp++ = '\0'; /* terminate name */
  593. /* parse value; deal with escapes */
  594. for (value = sp = dp; *dp && (*dp != sep); ++dp) {
  595. if ((*dp == '\\') && *(dp + 1))
  596. ++dp;
  597. *sp++ = *dp;
  598. }
  599. *sp++ = '\0'; /* terminate value */
  600. ++dp;
  601. /* enter into hash table */
  602. e.key = name;
  603. e.data = value;
  604. hsearch_r(e, ENTER, &rv, htab);
  605. if (rv == NULL) {
  606. printf("himport_r: can't insert \"%s=%s\" into hash table\n", name, value);
  607. return 0;
  608. }
  609. debug("INSERT: %p ==> name=\"%s\" value=\"%s\"\n", rv, name,
  610. value);
  611. debug(" table = %p, size = %d, filled = %d\n", htab,
  612. htab->size, htab->filled);
  613. } while ((dp < data + size) && *dp); /* size check needed for text */
  614. /* without '\0' termination */
  615. free(data);
  616. return 1; /* everything OK */
  617. }