├── .gdbinit
├── .gitignore
├── .gitmodules
├── .travis.yml
├── .whitesource
├── Changes
├── Hash.xs
├── MANIFEST
├── Makefile.PL
├── README.md
├── cmph-2.0.tar.gz
├── cmph-2.0
    ├── AUTHORS
    ├── CMPH.xs
    ├── COPYING
    ├── ChangeLog
    ├── INSTALL
    ├── LGPL-2
    ├── MPL-1.1
    ├── Makefile.PL
    ├── Makefile.am
    ├── Makefile.in
    ├── NEWS
    ├── README
    ├── acinclude.m4
    ├── aclocal.m4
    ├── cmph.pc.in
    ├── cmph.spec
    ├── compile
    ├── config.guess
    ├── config.h.in
    ├── config.sub
    ├── configure
    ├── configure.ac
    ├── cxxmph.pc.in
    ├── cxxmph
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── MurmurHash3.cpp
    │   ├── MurmurHash3.h
    │   ├── benchmark.cc
    │   ├── benchmark.h
    │   ├── bm_common.cc
    │   ├── bm_common.h
    │   ├── bm_map.cc
    │   ├── cxxmph.cc
    │   ├── dense_hash_map_test.cc
    │   ├── hollow_iterator.h
    │   ├── hollow_iterator_test.cc
    │   ├── map_tester.cc
    │   ├── map_tester.h
    │   ├── map_tester_test.cc
    │   ├── mph_bits.cc
    │   ├── mph_bits.h
    │   ├── mph_bits_test.cc
    │   ├── mph_index.cc
    │   ├── mph_index.h
    │   ├── mph_index_test.cc
    │   ├── mph_map.h
    │   ├── mph_map_test.cc
    │   ├── seeded_hash.h
    │   ├── seeded_hash_test.cc
    │   ├── string_util.cc
    │   ├── string_util.h
    │   ├── string_util_test.cc
    │   ├── stringpiece.h
    │   ├── test.cc
    │   ├── test.h
    │   ├── test_test.cc
    │   ├── trigraph.cc
    │   ├── trigraph.h
    │   └── trigraph_test.cc
    ├── depcomp
    ├── examples
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── file_adapter_ex2.c
    │   ├── struct_vector_adapter_ex3.c
    │   └── vector_adapter_ex1.c
    ├── install-sh
    ├── ltmain.sh
    ├── m4
    │   ├── libtool.m4
    │   ├── ltoptions.m4
    │   ├── ltsugar.m4
    │   ├── ltversion.m4
    │   └── lt~obsolete.m4
    ├── man
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   └── cmph.1
    ├── missing
    ├── src
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── bdz.c
    │   ├── bdz.h
    │   ├── bdz_ph.c
    │   ├── bdz_ph.h
    │   ├── bdz_structs.h
    │   ├── bdz_structs_ph.h
    │   ├── bitbool.h
    │   ├── bm_numbers.c
    │   ├── bmz.c
    │   ├── bmz.h
    │   ├── bmz8.c
    │   ├── bmz8.h
    │   ├── bmz8_structs.h
    │   ├── bmz_structs.h
    │   ├── brz.c
    │   ├── brz.h
    │   ├── brz_structs.h
    │   ├── buffer_entry.c
    │   ├── buffer_entry.h
    │   ├── buffer_manager.c
    │   ├── buffer_manager.h
    │   ├── chd.c
    │   ├── chd.h
    │   ├── chd_ph.c
    │   ├── chd_ph.h
    │   ├── chd_structs.h
    │   ├── chd_structs_ph.h
    │   ├── chm.c
    │   ├── chm.h
    │   ├── chm_structs.h
    │   ├── cmph.c
    │   ├── cmph.h
    │   ├── cmph_benchmark.c
    │   ├── cmph_benchmark.h
    │   ├── cmph_structs.c
    │   ├── cmph_structs.h
    │   ├── cmph_time.h
    │   ├── cmph_types.h
    │   ├── compressed_rank.c
    │   ├── compressed_rank.h
    │   ├── compressed_seq.c
    │   ├── compressed_seq.h
    │   ├── debug.h
    │   ├── fch.c
    │   ├── fch.h
    │   ├── fch_buckets.c
    │   ├── fch_buckets.h
    │   ├── fch_structs.h
    │   ├── graph.c
    │   ├── graph.h
    │   ├── hash.c
    │   ├── hash.h
    │   ├── hash_state.h
    │   ├── jenkins_hash.c
    │   ├── jenkins_hash.h
    │   ├── linear_string_map.c
    │   ├── linear_string_map.h
    │   ├── main.c
    │   ├── miller_rabin.c
    │   ├── miller_rabin.h
    │   ├── select.c
    │   ├── select.h
    │   ├── select_lookup_tables.h
    │   ├── vqueue.c
    │   ├── vqueue.h
    │   ├── vstack.c
    │   ├── vstack.h
    │   ├── wingetopt.c
    │   └── wingetopt.h
    ├── test-driver
    └── tests
    │   ├── Makefile.am
    │   ├── Makefile.in
    │   ├── cmph_benchmark_test.c
    │   ├── compressed_rank_tests.c
    │   ├── compressed_seq_tests.c
    │   ├── graph_tests.c
    │   ├── mphf_tests.c
    │   ├── packed_mphf_tests.c
    │   └── select_tests.c
├── examples
    ├── all-bench.sh
    ├── bench.pl
    ├── epmh.py
    ├── utf8
    ├── words20
    └── words500
├── inc
    └── Devel
    │   └── CheckLib.pm
├── lib
    └── Perfect
    │   ├── Hash.pm
    │   ├── Hash
    │       ├── Bob.pm
    │       ├── C.pm
    │       ├── CMPH.pm
    │       ├── CMPH
    │       │   ├── BDZ.pm
    │       │   ├── BDZ_PH.pm
    │       │   ├── BMZ.pm
    │       │   ├── BMZ8.pm
    │       │   ├── BRZ.pm
    │       │   ├── CHD.pm
    │       │   ├── CHD_PH.pm
    │       │   ├── CHM.pm
    │       │   └── FCH.pm
    │       ├── Cuckoo.pm
    │       ├── Gperf.pm
    │       ├── Hanov.pm
    │       ├── HanovPP.pm
    │       ├── MoreHashes.pm
    │       ├── Pearson.pm
    │       ├── Pearson16.pm
    │       ├── Pearson32.pm
    │       ├── Pearson8.pm
    │       ├── PearsonNP.pm
    │       ├── Switch.pm
    │       ├── Urban.pm
    │       └── XS.pm
    │   └── PerfectHashInt.pm
├── ppport.h
├── script
    └── pperf.PL
└── t
    ├── 00basic.t
    ├── 01words.t
    ├── 02sysdict.t
    ├── 03false-positives.t
    ├── 04save_c.t
    ├── 05save_c_nul.t
    ├── 06save_c_utf8.t
    ├── 07save_c_pic.t
    ├── test.pl
    ├── z_kwalitee.t
    ├── z_meta.t
    ├── z_pod-coverage.t
    └── z_pod.t


/.gdbinit:
--------------------------------------------------------------------------------
 1 | add-auto-load-safe-path /lib/x86_64-linux-gnu/libthread_db-1.0.so
 2 | 
 3 | define sdump
 4 |   p/x *$arg0
 5 |   call Perl_sv_dump($arg0)
 6 | end
 7 | document sdump
 8 | sdump sv => p/x *sv; Perl_sv_dump(sv)
 9 | see `help tsdump`
10 | end
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.bs
 3 | *.o
 4 | .coveralls.yml
 5 | /Debian_CPANTS.txt
 6 | /Perfect-Hash-*.tar.gz
 7 | /cmph-*.tar.gz
 8 | cmph-2.0/CMPH.c
 9 | cmph-2.0/Makefile.orig
10 | cmph-2.0/TAGS
11 | cmph-2.0/autom4te.cache/
12 | examples/words
13 | script/pperf
14 | META.json
15 | META.yml
16 | MYMETA.json
17 | MYMETA.yml
18 | Makefile
19 | Makefile.old
20 | Hash.c
21 | Hash.i
22 | Hash.o
23 | Hash.obj
24 | Hash.c.gcov
25 | Hash.xs.gcov
26 | CMPH.c.gcov
27 | CMPH.xs.gcov
28 | Hash.gcda
29 | Hash.gcno
30 | pm_to_blib
31 | /blib
32 | /cover_db
33 | /perf.data
34 | /log.test-*
35 | /log.bench-*
36 | cmph-2.0/CMPH.gcda
37 | cmph-2.0/CMPH.gcno
38 | cmph-2.0/bin/
39 | cmph-2.0/blib/
40 | cmph-2.0/cmph.pc
41 | cmph-2.0/config.h
42 | cmph-2.0/config.log
43 | cmph-2.0/config.status
44 | cmph-2.0/cxxmph.pc
45 | cmph-2.0/cxxmph/.deps/
46 | cmph-2.0/examples/.deps/
47 | cmph-2.0/examples/.libs/
48 | cmph-2.0/examples/file_adapter_ex2
49 | cmph-2.0/examples/struct_vector_adapter_ex3
50 | cmph-2.0/examples/vector_adapter_ex1
51 | cmph-2.0/include/
52 | cmph-2.0/lib/
53 | cmph-2.0/libtool
54 | cmph-2.0/share/
55 | cmph-2.0/src/.deps/
56 | cmph-2.0/src/.libs/
57 | cmph-2.0/src/bdz.lo
58 | cmph-2.0/src/bdz_ph.lo
59 | cmph-2.0/src/bm_numbers
60 | cmph-2.0/src/bmz.lo
61 | cmph-2.0/src/bmz8.lo
62 | cmph-2.0/src/brz.lo
63 | cmph-2.0/src/buffer_entry.lo
64 | cmph-2.0/src/buffer_manager.lo
65 | cmph-2.0/src/chd.lo
66 | cmph-2.0/src/chd_ph.lo
67 | cmph-2.0/src/chm.lo
68 | cmph-2.0/src/cmph
69 | cmph-2.0/src/cmph.lo
70 | cmph-2.0/src/cmph_benchmark.lo
71 | cmph-2.0/src/cmph_structs.lo
72 | cmph-2.0/src/compressed_rank.lo
73 | cmph-2.0/src/compressed_seq.lo
74 | cmph-2.0/src/fch.lo
75 | cmph-2.0/src/fch_buckets.lo
76 | cmph-2.0/src/graph.lo
77 | cmph-2.0/src/hash.lo
78 | cmph-2.0/src/jenkins_hash.lo
79 | cmph-2.0/src/libcmph.la
80 | cmph-2.0/src/linear_string_map.lo
81 | cmph-2.0/src/miller_rabin.lo
82 | cmph-2.0/src/select.lo
83 | cmph-2.0/src/vqueue.lo
84 | cmph-2.0/src/vstack.lo
85 | cmph-2.0/stamp-h1
86 | cmph-2.0/tests/.deps/
87 | cmph-2.0/tests/.libs/
88 | cmph-2.0/tests/cmph_benchmark_test
89 | cmph-2.0/tests/compressed_rank_tests
90 | cmph-2.0/tests/compressed_seq_tests
91 | cmph-2.0/tests/graph_tests
92 | cmph-2.0/tests/mphf_tests
93 | cmph-2.0/tests/packed_mphf_tests
94 | cmph-2.0/tests/select_tests
95 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "jenkins-minimal-perfect-hash"]
2 | 	path = bob
3 | 	url = git://github.com/rurban/jenkins-minimal-perfect-hash.git
4 | [submodule "nbperf"]
5 | 	path = nbperf
6 | 	url = https://github.com/rurban/nbperf
7 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: "perl"
 2 | sudo: false
 3 | perl:
 4 | # - "5.6.2"
 5 |   - "5.8"
 6 |   - "5.10"
 7 |   - "5.12"
 8 |   - "5.14"
 9 |   - "5.16"
10 |   - "5.18"
11 |   - "5.20"
12 |   - "5.22"
13 |   - "5.22-thr"
14 |   - "5.22-dbg"
15 |   - "5.22-thr-dbg"
16 |   - "5.22-mb"
17 |   - "dev"
18 |   - "blead"
19 | 
20 | addons:
21 |   apt:
22 |     packages:
23 |     - gperf
24 | 
25 | # blead and 5.6 stumble over YAML and more missing dependencies
26 | # for Devel::Cover::Report::Coveralls
27 | # cpanm does not do 5.6
28 | before_install:
29 |   - mkdir /home/travis/bin || true
30 |   - ln -s `which true` /home/travis/bin/cpansign
31 |   - eval $(curl https://travis-perl.github.io/init) --auto
32 | install:
33 |   - export AUTOMATED_TESTING=1 HARNESS_TIMER=1 AUTHOR_TESTING=0 RELEASE_TESTING=0
34 |   - cpan-install --deps       # installs prereqs, including recommends
35 |   - cpan-install --coverage   # installs converage prereqs, if enabled
36 | 
37 | before_script:
38 |   - gperf --version
39 |   - coverage-setup
40 | 
41 | notifications:
42 |   email:
43 |     on_success: change
44 |     on_failure: always
45 | 
46 | matrix:
47 |   fast_finish: true
48 |   allow_failures:
49 |     - perl: "dev"
50 |     - perl: "blead"
51 |   include:
52 |     - perl: 5.18
53 |       env: COVERAGE=1   # enables coverage+coveralls reporting
54 | 
55 | # Hack to not run on tag pushes:
56 | branches:
57 |   except:
58 |   - /^v?[0-9]+\.[0-9]+/
59 | 


--------------------------------------------------------------------------------
/.whitesource:
--------------------------------------------------------------------------------
1 | {
2 |   "generalSettings": {
3 |     "shouldScanRepo": true
4 |   },
5 |   "checkRunSettings": {
6 |     "vulnerableCheckRunConclusionLevel": "failure"
7 |   }
8 | }


--------------------------------------------------------------------------------
/Changes:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/Changes


--------------------------------------------------------------------------------
/MANIFEST:
--------------------------------------------------------------------------------
 1 | bob/Makefile.PL
 2 | bob/Bob.xs
 3 | Changes
 4 | cmph-2.0.tar.gz
 5 | cmph-2.0/Makefile.PL
 6 | cmph-2.0/CMPH.xs
 7 | examples/all-bench.sh
 8 | examples/bench.pl
 9 | examples/epmh.py
10 | examples/utf8
11 | examples/words20
12 | examples/words500
13 | inc/Devel/CheckLib.pm
14 | lib/Perfect/Hash.pm
15 | lib/Perfect/Hash/Bob.pm
16 | lib/Perfect/Hash/C.pm
17 | lib/Perfect/Hash/CMPH.pm
18 | lib/Perfect/Hash/CMPH/CHM.pm
19 | lib/Perfect/Hash/CMPH/BDZ_PH.pm
20 | lib/Perfect/Hash/CMPH/BDZ.pm
21 | lib/Perfect/Hash/CMPH/BMZ.pm
22 | lib/Perfect/Hash/CMPH/BMZ8.pm
23 | lib/Perfect/Hash/CMPH/BRZ.pm
24 | lib/Perfect/Hash/CMPH/CHD.pm
25 | lib/Perfect/Hash/CMPH/CHD_PH.pm
26 | lib/Perfect/Hash/CMPH/FCH.pm
27 | lib/Perfect/Hash/Cuckoo.pm
28 | lib/Perfect/Hash/Gperf.pm
29 | lib/Perfect/Hash/Hanov.pm
30 | lib/Perfect/Hash/HanovPP.pm
31 | lib/Perfect/Hash/MoreHashes.pm
32 | lib/Perfect/Hash/Pearson.pm
33 | lib/Perfect/Hash/PearsonNP.pm
34 | lib/Perfect/Hash/Pearson16.pm
35 | lib/Perfect/Hash/Pearson32.pm
36 | lib/Perfect/Hash/Pearson8.pm
37 | lib/Perfect/Hash/Switch.pm
38 | lib/Perfect/Hash/Urban.pm
39 | lib/Perfect/Hash/XS.pm
40 | Makefile.PL
41 | MANIFEST
42 | META.json
43 | META.yml
44 | ppport.h
45 | README.md
46 | script/pperf.PL
47 | t/00basic.t
48 | t/01words.t
49 | t/02sysdict.t
50 | t/03false-positives.t
51 | t/04save_c.t
52 | t/05save_c_nul.t
53 | t/06save_c_utf8.t
54 | t/07save_c_pic.t
55 | t/z_kwalitee.t
56 | t/z_meta.t
57 | t/z_pod-coverage.t
58 | t/z_pod.t
59 | 


--------------------------------------------------------------------------------
/cmph-2.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/cmph-2.0.tar.gz


--------------------------------------------------------------------------------
/cmph-2.0/AUTHORS:
--------------------------------------------------------------------------------
1 | Davi de Castro Reis davi@users.sourceforge.net
2 | Djamel Belazzougui db8192@users.sourceforge.net
3 | Fabiano Cupertino Botelho fc_botelho@users.sourceforge.net
4 | Nivio Ziviani nivio@dcc.ufmg.br
5 | 


--------------------------------------------------------------------------------
/cmph-2.0/CMPH.xs:
--------------------------------------------------------------------------------
  1 | /* -*- mode:C tab-width:4 -*- */
  2 | #define PERL_NO_GET_CONTEXT
  3 | #include "EXTERN.h"
  4 | #include "perl.h"
  5 | #include "XSUB.h"
  6 | 
  7 | #include "cmph.h"
  8 | 
  9 | #if PERL_VERSION < 10
 10 | #  define USE_PPPORT_H
 11 | #endif
 12 | 
 13 | #ifdef USE_PPPORT_H
 14 | #  include "../ppport.h"
 15 | #endif
 16 | 
 17 | MODULE = Perfect::Hash::CMPH	PACKAGE = Perfect::Hash::CMPH
 18 | 
 19 | SV*
 20 | _new(class, keyfile, ...)
 21 |     SV*  class
 22 |     SV*  keyfile
 23 |   CODE:
 24 |   {
 25 |     int i;
 26 |     UV size;
 27 |     AV *result;
 28 |     HV *options;
 29 |     FILE * keys_fd = NULL;
 30 |     cmph_io_adapter_t *key_source;
 31 |     cmph_config_t *mph;
 32 |     cmph_t *mphf;
 33 |     unsigned char *packed;
 34 |     CMPH_ALGO algo = CMPH_CHM;
 35 |     const char *classname = SvPVX(class);
 36 | 
 37 |     if (SvPOK(keyfile)) {
 38 |       keys_fd = fopen(SvPVX(keyfile), "r");
 39 |       key_source = cmph_io_nlfile_adapter(keys_fd);
 40 |     } else {
 41 |       if (SvTYPE(keyfile) == SVt_PVAV) {
 42 |       } else if (SvTYPE(keyfile) == SVt_PVHV) {
 43 |       }
 44 |       /* XXX support arrayrefs at least, probably created via nvecset
 45 |          and use the io_vector or io_byte_vector adapter */
 46 |       warn("CMPH only accepts filenames yet\n");
 47 |       /*keys_fd = fopen("examples/words500", "r");
 48 |         key_source = cmph_io_nlfile_adapter(keys_fd);*/
 49 |       XSRETURN_UNDEF;
 50 |     }
 51 |     if (!strcmp(classname, "Perfect::Hash::CMPH::CHM"))         algo = CMPH_CHM;
 52 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::BMZ"))    algo = CMPH_BMZ;
 53 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::BMZ8"))   algo = CMPH_BMZ8;
 54 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::BRZ"))    algo = CMPH_BRZ;
 55 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::FCH"))    algo = CMPH_FCH;
 56 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::BDZ"))    algo = CMPH_BDZ;
 57 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::BDZ_PH")) algo = CMPH_BDZ_PH;
 58 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::CHD"))    algo = CMPH_CHD;
 59 |     else if (!strcmp(classname, "Perfect::Hash::CMPH::CHD_PH")) algo = CMPH_CHD_PH;
 60 |     mph = cmph_config_new(key_source);
 61 |     if (algo != CMPH_CHM)
 62 |       cmph_config_set_algo(mph, algo);
 63 |     mphf = cmph_new(mph);
 64 |     if (!mphf) {
 65 |       fprintf(stderr, "Failed to create mphf for algorithm %s", classname);
 66 |       XSRETURN_UNDEF;
 67 |     }
 68 |     result = newAV();
 69 |     av_push(result, newSViv(PTR2IV(mphf)));                  /* mphf in [0] */
 70 |     size = cmph_packed_size(mphf);
 71 |     if (!size) {
 72 |       fprintf(stderr, "Failed to calculate cmph_packed_size for algorithm %s", classname);
 73 |       XSRETURN_UNDEF;
 74 |     }
 75 |     packed = (unsigned char *)malloc(size);
 76 |     cmph_pack(mphf, packed);
 77 |     av_push(result, newSVpvn((char *)packed, size));       /* packed in [1] */
 78 |     options = newHV();
 79 |     for (i=2; i<items; i++) { /* CHECKME */
 80 |       hv_store_ent(options, ST(i), newSViv(1), 0);
 81 |     }
 82 |     av_push(result, newRV((SV*)options));                 /* options at [2] */
 83 |     RETVAL = sv_bless(newRV_noinc((SV*)result), gv_stashpv(classname, GV_ADDWARN));
 84 |   }
 85 | OUTPUT:
 86 |     RETVAL
 87 | 
 88 | IV
 89 | perfecthash(ph, key)
 90 |     SV*  ph
 91 |     SV*  key
 92 | CODE:
 93 |     AV *ref = (AV*)SvRV(ph);
 94 |     cmph_t *mphf = (cmph_t *)SvIVX(AvARRAY(ref)[0]);
 95 |     if (!mphf) die ("Empty cmph");
 96 |     RETVAL = cmph_search(mphf, SvPVX(key), SvCUR(key));
 97 | OUTPUT:
 98 |     RETVAL
 99 | 
100 | void
101 | DESTROY(ph)
102 |     SV*  ph
103 | CODE:
104 |     AV *result = (AV*)SvRV(ph);
105 |     cmph_t *mphf = (cmph_t *)SvIVX(AvARRAY(result)[0]);
106 |     if (mphf) cmph_destroy(mphf);
107 | 


--------------------------------------------------------------------------------
/cmph-2.0/COPYING:
--------------------------------------------------------------------------------
1 | The code of the cmph library is dual licensed under the LGPL version 2 and MPL
2 | 1.1 licenses. Please refer to the LGPL-2 and MPL-1.1 files in the repository
3 | for the full description of each of the licenses.
4 | 
5 | For cxxmph, the files stringpiece.h and MurmurHash2 are covered by the BSD and MIT licenses, respectively.
6 | 


--------------------------------------------------------------------------------
/cmph-2.0/Makefile.PL:
--------------------------------------------------------------------------------
 1 | use ExtUtils::MakeMaker;
 2 | use Config;
 3 | 
 4 | my @extras = ();
 5 | my $EUMM_VER = $ExtUtils::MakeMaker::VERSION;
 6 | push(@extras, CAPI => 'TRUE')
 7 |   if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32')
 8 |       and ($Config{archname} =~ /-object\b/i));
 9 | 
10 | push(@extras,
11 |      ABSTRACT => "XS interface to cmph",
12 |      AUTHOR   => 'Reini Urban <rurban@cpan.org>')
13 |   if $EUMM_VER gt '5.4301';
14 | push(@extras,
15 |      META_MERGE => {
16 |        license => [ 'mozilla_1_1', 'lgpl_2_1' ]
17 |      })
18 |   if $EUMM_VER gt '6.46';
19 | 
20 | #TODO: integrate into proper build and depend rules
21 | if (! -e "bin/cmph".$Config{exe_ext}) {
22 |   print "building cmph-2.0\n";
23 |   system("autoreconf -i");
24 |   system("./configure --prefix=`pwd`");
25 |   system($Config{'make'}, "-s");
26 |   system($Config{'make'}, "-s", "install");
27 |   rename "Makefile", "Makefile.orig";
28 | }
29 | 
30 | WriteMakefile(
31 |   NAME         => 'Perfect::Hash::CMPH',
32 |   XS           => { 'CMPH.xs' => 'CMPH.c' },
33 |   XSPROTOARG   => '-noprototypes',
34 |   INC          => '-Iinclude',
35 |   LIBS         => '-Llib -lcmph',
36 |   VERSION_FROM => '../lib/Perfect/Hash/CMPH.pm',
37 |   clean => { FILES => "*.gcov *.gcda *.gcno" },
38 |   @extras
39 | );
40 | 
41 | 


--------------------------------------------------------------------------------
/cmph-2.0/Makefile.am:
--------------------------------------------------------------------------------
1 | SUBDIRS = src tests examples man $(CXXMPH)
2 | EXTRA_DIST = cmph.spec configure.ac cmph.pc.in cxxmph.pc.in LGPL-2 MPL-1.1
3 | pkgconfig_DATA = cmph.pc
4 | if USE_CXXMPH
5 |   pkgconfig_DATA += cxxmph.pc
6 | endif
7 | 
8 | pkgconfigdir = $(libdir)/pkgconfig
9 | 


--------------------------------------------------------------------------------
/cmph-2.0/NEWS:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/cmph-2.0/NEWS


--------------------------------------------------------------------------------
/cmph-2.0/cmph.pc.in:
--------------------------------------------------------------------------------
 1 | url=http://cmph.sourceforge.net/
 2 | prefix=@prefix@
 3 | exec_prefix=@exec_prefix@
 4 | libdir=@libdir@
 5 | includedir=@includedir@
 6 | 
 7 | Name: cmph
 8 | Description: minimal perfect hashing library
 9 | Version: @VERSION@
10 | Libs: -L${libdir} -lcmph
11 | Cflags: -I${includedir} 
12 | URL: ${url}
13 | 


--------------------------------------------------------------------------------
/cmph-2.0/cmph.spec:
--------------------------------------------------------------------------------
 1 | %define name cmph
 2 | %define version 0.4
 3 | %define release 3
 4 | 
 5 | Name: %{name}
 6 | Version: %{version}
 7 | Release: %{release}
 8 | Summary: C Minimal perfect hash library
 9 | Source: %{name}-%{version}.tar.gz
10 | License: Proprietary
11 | URL: http://www.akwan.com.br
12 | BuildArch: i386
13 | Group: Sitesearch
14 | BuildRoot: %{_tmppath}/%{name}-root
15 | 
16 | %description
17 | C Minimal perfect hash library
18 | 
19 | %prep
20 | rm -Rf $RPM_BUILD_ROOT
21 | rm -rf $RPM_BUILD_ROOT
22 | %setup
23 | mkdir $RPM_BUILD_ROOT
24 | mkdir $RPM_BUILD_ROOT/usr
25 | CXXFLAGS="-O2" ./configure --prefix=/usr/
26 | 
27 | %build
28 | make
29 | 
30 | %install
31 | DESTDIR=$RPM_BUILD_ROOT make install
32 | 
33 | %files
34 | %defattr(755,root,root)
35 | /
36 | 
37 | %changelog
38 | * Tue Jun 1 2004 Davi de Castro Reis <davi@akwan.com.br>
39 | + Initial build
40 | 


--------------------------------------------------------------------------------
/cmph-2.0/config.h.in:
--------------------------------------------------------------------------------
 1 | /* config.h.in.  Generated from configure.ac by autoheader.  */
 2 | 
 3 | /* Define to 1 if you have the <dlfcn.h> header file. */
 4 | #undef HAVE_DLFCN_H
 5 | 
 6 | /* Define to 1 if you have the <getopt.h> header file. */
 7 | #undef HAVE_GETOPT_H
 8 | 
 9 | /* Define to 1 if you have the <inttypes.h> header file. */
10 | #undef HAVE_INTTYPES_H
11 | 
12 | /* Define to 1 if you have the `check' library (-lcheck). */
13 | #undef HAVE_LIBCHECK
14 | 
15 | /* Define to 1 if you have the <math.h> header file. */
16 | #undef HAVE_MATH_H
17 | 
18 | /* Define to 1 if you have the <memory.h> header file. */
19 | #undef HAVE_MEMORY_H
20 | 
21 | /* Define if g++ supports C++0x features. */
22 | #undef HAVE_STDCXX_0X
23 | 
24 | /* Define to 1 if you have the <stdint.h> header file. */
25 | #undef HAVE_STDINT_H
26 | 
27 | /* Define to 1 if you have the <stdlib.h> header file. */
28 | #undef HAVE_STDLIB_H
29 | 
30 | /* Define to 1 if you have the <strings.h> header file. */
31 | #undef HAVE_STRINGS_H
32 | 
33 | /* Define to 1 if you have the <string.h> header file. */
34 | #undef HAVE_STRING_H
35 | 
36 | /* Define to 1 if you have the <sys/stat.h> header file. */
37 | #undef HAVE_SYS_STAT_H
38 | 
39 | /* Define to 1 if you have the <sys/types.h> header file. */
40 | #undef HAVE_SYS_TYPES_H
41 | 
42 | /* Define to 1 if you have the <unistd.h> header file. */
43 | #undef HAVE_UNISTD_H
44 | 
45 | /* Define to the sub-directory where libtool stores uninstalled libraries. */
46 | #undef LT_OBJDIR
47 | 
48 | /* Name of package */
49 | #undef PACKAGE
50 | 
51 | /* Define to the address where bug reports for this package should be sent. */
52 | #undef PACKAGE_BUGREPORT
53 | 
54 | /* Define to the full name of this package. */
55 | #undef PACKAGE_NAME
56 | 
57 | /* Define to the full name and version of this package. */
58 | #undef PACKAGE_STRING
59 | 
60 | /* Define to the one symbol short name of this package. */
61 | #undef PACKAGE_TARNAME
62 | 
63 | /* Define to the home page for this package. */
64 | #undef PACKAGE_URL
65 | 
66 | /* Define to the version of this package. */
67 | #undef PACKAGE_VERSION
68 | 
69 | /* Define to 1 if you have the ANSI C header files. */
70 | #undef STDC_HEADERS
71 | 
72 | /* Version number of package */
73 | #undef VERSION
74 | 
75 | /* Number of bits in a file offset, on hosts where this is settable. */
76 | #undef _FILE_OFFSET_BITS
77 | 
78 | /* Define to make fseeko etc. visible, on some hosts. */
79 | #undef _LARGEFILE_SOURCE
80 | 
81 | /* Define for large files, on AIX-style hosts. */
82 | #undef _LARGE_FILES
83 | 


--------------------------------------------------------------------------------
/cmph-2.0/configure.ac:
--------------------------------------------------------------------------------
 1 | dnl Process this file with autoconf to produce a configure script.
 2 | AC_INIT
 3 | AC_CONFIG_SRCDIR([Makefile.am])
 4 | AM_INIT_AUTOMAKE(cmph, 2.0)
 5 | AC_CONFIG_HEADERS([config.h])
 6 | AC_CONFIG_MACRO_DIR([m4])
 7 | 
 8 | dnl Checks for programs.
 9 | AC_PROG_AWK
10 | AC_PROG_CC
11 | AC_PROG_INSTALL
12 | AC_PROG_LN_S
13 | LT_INIT
14 | AC_SYS_EXTRA_LARGEFILE
15 | if test "x$ac_cv_sys_largefile_CFLAGS" = "xno" ; then
16 | 	ac_cv_sys_largefile_CFLAGS=""
17 | fi
18 | if test "x$ac_cv_sys_largefile_LDFLAGS" = "xno" ; then
19 | 	ac_cv_sys_largefile_LDFLAGS=""
20 | fi
21 | if test "x$ac_cv_sys_largefile_LIBS" = "xno" ; then
22 | 	ac_cv_sys_largefile_LIBS=""
23 | fi
24 | CFLAGS="$ac_cv_sys_largefile_CFLAGS $CFLAGS"
25 | LDFLAGS="$ac_cv_sys_largefile_LDFLAGS $LDFLAGS"
26 | LIBS="$LIBS $ac_cv_sys_largefile_LIBS"
27 | 
28 | dnl Checks for headers
29 | AC_CHECK_HEADERS([getopt.h math.h])
30 | 
31 | dnl Checks for libraries.
32 | LT_LIB_M  
33 | LDFLAGS="$LIBM $LDFLAGS"
34 | CFLAGS="-Wall"
35 | 
36 | AC_PROG_CXX
37 | CXXFLAGS="-Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer $CXXFLAGS"
38 | AC_ENABLE_CXXMPH
39 | if test x$cxxmph = xtrue; then
40 |   AC_COMPILE_STDCXX_0X
41 |   if test x$ac_cv_cxx_compile_cxx0x_native = "xno"; then
42 |     if test x$ac_cv_cxx_compile_cxx0x_cxx = "xyes"; then
43 |       CXXFLAGS="$CXXFLAGS -std=c++0x"
44 |     elif test x$ac_cv_cxx_compile_cxx0x_gxx = "xyes"; then
45 |       CXXFLAGS="$CXXFLAGS -std=gnu++0x"
46 |     else
47 |       AC_MSG_ERROR("cxxmph demands a working c++0x compiler.")
48 |     fi
49 |   fi
50 |   AC_SUBST([CXXMPH], "cxxmph")
51 | fi
52 | AM_CONDITIONAL([USE_CXXMPH], [test "$cxxmph" = true])
53 | 
54 | # Unit tests based on the check library. Disabled by default.
55 | # We do not use pkg-config because it is inconvenient for all developers to
56 | # have check library installed.
57 | AC_ARG_ENABLE(check, AS_HELP_STRING(
58 |      [--enable-check],
59 |      [Build unit tests depending on check library (default: disabled)]))
60 | AS_IF([test "x$enable_check" = "xyes"],
61 |   [ AC_CHECK_LIB([check], [tcase_create])
62 |     AS_IF([test "$ac_cv_lib_check_tcase_create" = yes], [CHECK_LIBS="-lcheck"],
63 |           [AC_MSG_ERROR("Failed to find check library (http://check.sf.net).")])
64 |     AC_CHECK_HEADER(check.h,[],
65 |           [AC_MSG_ERROR("Failed to find check library header (http://check.sf.net).")])
66 |   ])
67 | AM_CONDITIONAL([USE_LIBCHECK], [test "$ac_cv_lib_check_tcase_create" = yes])
68 | AC_SUBST(CHECK_LIBS)
69 | AC_SUBST(CHECK_CFLAGS)
70 | 
71 | AC_CHECK_SPOON
72 | AC_CONFIG_FILES([Makefile src/Makefile cxxmph/Makefile tests/Makefile examples/Makefile man/Makefile cmph.pc cxxmph.pc])
73 | AC_OUTPUT
74 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph.pc.in:
--------------------------------------------------------------------------------
 1 | url=http://cmph.sourceforge.net/
 2 | prefix=@prefix@
 3 | exec_prefix=@exec_prefix@
 4 | libdir=@libdir@
 5 | includedir=@includedir@
 6 | 
 7 | Name: cxxmph
 8 | Description: minimal perfect hashing c++11 library
 9 | Version: @VERSION@
10 | Libs: -L${libdir} -lcxxmph
11 | Cflags: -std=c++0x -I${includedir}
12 | URL: ${url}
13 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/Makefile.am:
--------------------------------------------------------------------------------
 1 | TESTS = $(check_PROGRAMS)
 2 | check_PROGRAMS = seeded_hash_test mph_bits_test hollow_iterator_test mph_index_test trigraph_test
 3 | if USE_LIBCHECK
 4 |   check_PROGRAMS += test_test map_tester_test mph_map_test dense_hash_map_test string_util_test
 5 |   check_LTLIBRARIES = libcxxmph_test.la
 6 | endif
 7 | 
 8 | noinst_PROGRAMS = bm_map # bm_index - disabled because of cmph dependency
 9 | bin_PROGRAMS = cxxmph
10 | 
11 | cxxmph_includedir = $(includedir)/cxxmph/
12 | cxxmph_include_HEADERS = mph_bits.h mph_map.h mph_index.h MurmurHash3.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h string_util.h
13 | 
14 | noinst_LTLIBRARIES = libcxxmph_bm.la
15 | lib_LTLIBRARIES = libcxxmph.la
16 | libcxxmph_la_SOURCES = MurmurHash3.cpp trigraph.cc mph_bits.cc mph_index.cc benchmark.h benchmark.cc string_util.cc
17 | libcxxmph_la_LDFLAGS = -version-info 0:0:0
18 | libcxxmph_test_la_SOURCES = test.h test.cc
19 | libcxxmph_test_la_LIBADD = libcxxmph.la
20 | libcxxmph_bm_la_SOURCES = benchmark.h benchmark.cc bm_common.h bm_common.cc
21 | libcxxmph_bm_la_LIBADD = libcxxmph.la
22 | 
23 | test_test_SOURCES = test_test.cc
24 | test_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
25 | 
26 | mph_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
27 | mph_map_test_SOURCES = mph_map_test.cc
28 | dense_hash_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS)
29 | dense_hash_map_test_SOURCES = dense_hash_map_test.cc
30 | 
31 | mph_index_test_LDADD   = libcxxmph.la
32 | mph_index_test_SOURCES = mph_index_test.cc
33 | 
34 | trigraph_test_LDADD   = libcxxmph.la
35 | trigraph_test_SOURCES = trigraph_test.cc
36 | 
37 | # Bad dependency, do not compile by default.
38 | # bm_index_LDADD = libcxxmph_bm.la -lcmph
39 | # bm_index_SOURCES = bm_index.cc 
40 | 
41 | bm_map_LDADD = libcxxmph_bm.la
42 | bm_map_SOURCES = bm_map.cc
43 | 
44 | cxxmph_LDADD   = libcxxmph.la
45 | cxxmph_SOURCES = cxxmph.cc
46 | 
47 | hollow_iterator_test_SOURCES = hollow_iterator_test.cc
48 | 
49 | seeded_hash_test_SOURCES = seeded_hash_test.cc
50 | seeded_hash_test_LDADD   = libcxxmph.la
51 | 
52 | mph_bits_test_SOURCES = mph_bits_test.cc
53 | mph_bits_test_LDADD   = libcxxmph.la
54 | 
55 | string_util_test_SOURCES = string_util_test.cc
56 | string_util_test_LDADD   = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)
57 | 
58 | map_tester_test_SOURCES = map_tester.h map_tester.cc map_tester_test.cc
59 | map_tester_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS)
60 | 
61 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/MurmurHash3.h:
--------------------------------------------------------------------------------
 1 | //-----------------------------------------------------------------------------
 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
 3 | // domain. The author hereby disclaims copyright to this source code.
 4 | 
 5 | #ifndef _MURMURHASH3_H_
 6 | #define _MURMURHASH3_H_
 7 | 
 8 | //-----------------------------------------------------------------------------
 9 | // Platform-specific functions and macros
10 | 
11 | // Microsoft Visual Studio
12 | 
13 | #if defined(_MSC_VER)
14 | 
15 | typedef unsigned char uint8_t;
16 | typedef unsigned long uint32_t;
17 | typedef unsigned __int64 uint64_t;
18 | 
19 | // Other compilers
20 | 
21 | #else	// defined(_MSC_VER)
22 | 
23 | #include <stdint.h>
24 | 
25 | #endif // !defined(_MSC_VER)
26 | 
27 | //-----------------------------------------------------------------------------
28 | 
29 | void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
30 | 
31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
32 | 
33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
34 | 
35 | //-----------------------------------------------------------------------------
36 | 
37 | #endif // _MURMURHASH3_H_
38 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/benchmark.cc:
--------------------------------------------------------------------------------
  1 | #include "benchmark.h"
  2 | 
  3 | #include <cerrno>
  4 | #include <cstring>
  5 | #include <cstdio>
  6 | #include <memory>
  7 | #include <sys/time.h>
  8 | #include <sys/resource.h>
  9 | 
 10 | #include <iomanip>
 11 | #include <iostream>
 12 | #include <sstream>
 13 | #include <vector>
 14 | 
 15 | using std::cerr;
 16 | using std::cout;
 17 | using std::endl;
 18 | using std::setfill;
 19 | using std::setw;
 20 | using std::string;
 21 | using std::ostringstream;
 22 | using std::vector;
 23 | 
 24 | namespace {
 25 | 
 26 | /* Subtract the `struct timeval' values X and Y,
 27 |    storing the result in RESULT.
 28 |    Return 1 if the difference is negative, otherwise 0.  */
 29 | int timeval_subtract ( 
 30 |     struct timeval *result, struct timeval *x, struct timeval* y) {
 31 |   /* Perform the carry for the later subtraction by updating y. */
 32 |   if (x->tv_usec < y->tv_usec) {
 33 |     int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
 34 |     y->tv_usec -= 1000000 * nsec;
 35 |     y->tv_sec += nsec;
 36 |   }
 37 |   if (x->tv_usec - y->tv_usec > 1000000) {
 38 |     int nsec = (x->tv_usec - y->tv_usec) / 1000000;
 39 |     y->tv_usec += 1000000 * nsec;
 40 |     y->tv_sec -= nsec;
 41 |   }
 42 | 
 43 |   /* Compute the time remaining to wait.
 44 |      tv_usec is certainly positive. */
 45 |   result->tv_sec = x->tv_sec - y->tv_sec;
 46 |   result->tv_usec = x->tv_usec - y->tv_usec;
 47 | 
 48 |   /* Return 1 if result is negative. */
 49 |   return x->tv_sec < y->tv_sec;
 50 | }
 51 | 
 52 | // C++ iostream is terrible for formatting.
 53 | string timeval_to_string(timeval tv) {
 54 |   ostringstream out;
 55 |   out << setfill(' ') << setw(3) << tv.tv_sec << '.';
 56 |   out << setfill('0') << setw(6) << tv.tv_usec;
 57 |   return out.str();
 58 | }
 59 | 
 60 | struct rusage getrusage_or_die() {
 61 |   struct rusage rs;
 62 |   int ret = getrusage(RUSAGE_SELF, &rs);
 63 |   if (ret != 0) {
 64 |     cerr << "rusage failed: " << strerror(errno) << endl;
 65 |     exit(-1);
 66 |   }
 67 |   return rs;
 68 | }
 69 | 
 70 | struct timeval gettimeofday_or_die() {
 71 |   struct timeval tv;
 72 |   int ret = gettimeofday(&tv, NULL); 
 73 |   if (ret != 0) {
 74 |     cerr << "gettimeofday failed: " << strerror(errno) << endl;
 75 |     exit(-1);
 76 |   }
 77 |   return tv;
 78 | }
 79 | 
 80 | #ifdef HAVE_CXA_DEMANGLE
 81 | string demangle(const string& name) {
 82 |   char buf[1024];
 83 |   unsigned int size = 1024;
 84 |   int status;
 85 |   char* res = abi::__cxa_demangle(
 86 |      name.c_str(), buf, &size, &status);
 87 |   return res;
 88 | }
 89 | #else
 90 | string demangle(const string& name) { return name; }
 91 | #endif
 92 |  
 93 | 
 94 | static vector<cxxmph::Benchmark*> g_benchmarks;
 95 | 
 96 | }  // anonymous namespace
 97 | 
 98 | namespace cxxmph {
 99 | 
100 | /* static */ void Benchmark::Register(Benchmark* bm) {
101 |   if (bm->name().empty()) {
102 |     string name = demangle(typeid(*bm).name());
103 |     bm->set_name(name);
104 |   }
105 |   g_benchmarks.push_back(bm);
106 | }
107 | 
108 | /* static */ void Benchmark::RunAll() {
109 |   for (uint32_t i = 0; i < g_benchmarks.size(); ++i) {
110 |     std::auto_ptr<Benchmark> bm(g_benchmarks[i]);
111 |     if (!bm->SetUp()) {
112 |       cerr << "Set up phase for benchmark "
113 |            << bm->name() << " failed." << endl;
114 |       continue;
115 |     }
116 |     bm->MeasureRun();
117 |     bm->TearDown(); 
118 |   }
119 | }
120 | 
121 | void Benchmark::MeasureRun() {
122 |   struct timeval walltime_begin = gettimeofday_or_die();
123 |   struct rusage begin = getrusage_or_die();
124 |   Run();
125 |   struct rusage end = getrusage_or_die();
126 |   struct timeval walltime_end = gettimeofday_or_die();
127 | 
128 |   struct timeval utime;
129 |   timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime);
130 |   struct timeval stime;
131 |   timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime);
132 |   struct timeval wtime;
133 |   timeval_subtract(&wtime, &walltime_end, &walltime_begin);
134 | 
135 |   cout << "Benchmark: " << name_ << endl;
136 |   cout << "CPU User time  : " << timeval_to_string(utime) << endl;
137 |   cout << "CPU System time: " << timeval_to_string(stime) << endl;
138 |   cout << "Wall clock time: " << timeval_to_string(wtime) << endl;
139 |   cout << endl;
140 | }
141 | 
142 | }  // namespace cxxmph
143 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/benchmark.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_BENCHMARK_H__
 2 | #define __CXXMPH_BENCHMARK_H__
 3 | 
 4 | #include <string>
 5 | #include <typeinfo>
 6 | 
 7 | namespace cxxmph {
 8 | 
 9 | class Benchmark {
10 |  public:
11 |   Benchmark() {}
12 |   virtual ~Benchmark() {}
13 | 
14 |   const std::string& name() { return name_; }
15 |   void set_name(const std::string& name) { name_ = name; }
16 | 
17 |   static void Register(Benchmark* bm);
18 |   static void RunAll();
19 | 
20 |  protected:
21 |   virtual bool SetUp() { return true; }; 
22 |   virtual void Run() = 0;
23 |   virtual bool TearDown() { return true; };
24 | 
25 |  private:
26 |   std::string name_;
27 |   void MeasureRun();
28 | };
29 | 
30 | }  // namespace cxxmph
31 | 
32 | #endif
33 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/bm_common.cc:
--------------------------------------------------------------------------------
 1 | #include <cmath>
 2 | #include <fstream>
 3 | #include <limits>
 4 | #include <iostream>
 5 | #include <set>
 6 | 
 7 | #include "bm_common.h"
 8 | 
 9 | using std::cerr;
10 | using std::endl;
11 | using std::set;
12 | using std::string;
13 | using std::vector;
14 | 
15 | namespace cxxmph {
16 |   
17 | UrlsBenchmark::~UrlsBenchmark() {}
18 | bool UrlsBenchmark::SetUp() {
19 |   vector<string> urls;
20 |   std::ifstream f(urls_file_.c_str());
21 |   if (!f.is_open()) {
22 |     cerr << "Failed to open urls file " << urls_file_ << endl;
23 |     return false;
24 |   }
25 |   string buffer;
26 |   while(std::getline(f, buffer)) urls.push_back(buffer);
27 |   set<string> unique(urls.begin(), urls.end());
28 |   if (unique.size() != urls.size()) {
29 |     cerr << "Input file has repeated keys." << endl;
30 |     return false;
31 |   }
32 |   urls.swap(urls_);
33 |   return true;
34 | }
35 | 
36 | SearchUrlsBenchmark::~SearchUrlsBenchmark() {}
37 | bool SearchUrlsBenchmark::SetUp() {
38 |   if (!UrlsBenchmark::SetUp()) return false;
39 |   int32_t miss_ratio_int32 = std::numeric_limits<int32_t>::max() * miss_ratio_;
40 |   forced_miss_urls_.resize(nsearches_);
41 |   random_.resize(nsearches_);
42 |   for (uint32_t i = 0; i < nsearches_; ++i) {
43 |     random_[i] = urls_[random() % urls_.size()];
44 |     if (random() < miss_ratio_int32) {
45 |       forced_miss_urls_[i] = random_[i].as_string() + ".force_miss";
46 |       random_[i] = forced_miss_urls_[i];
47 |     }
48 |   }
49 |   return true;
50 | }
51 | 
52 | Uint64Benchmark::~Uint64Benchmark() {}
53 | bool Uint64Benchmark::SetUp() {
54 |   set<uint64_t> unique;
55 |   for (uint32_t i = 0; i < count_; ++i) {
56 |     uint64_t v;
57 |     do { v = random(); } while (unique.find(v) != unique.end());
58 |     values_.push_back(v);
59 |     unique.insert(v);
60 |   }
61 |   return true;
62 | }
63 | 
64 | SearchUint64Benchmark::~SearchUint64Benchmark() {}
65 | bool SearchUint64Benchmark::SetUp() {
66 |   if (!Uint64Benchmark::SetUp()) return false;
67 |   random_.resize(nsearches_);
68 |   for (uint32_t i = 0; i < nsearches_; ++i) {
69 |     uint32_t pos = random() % values_.size();
70 |     random_[i] = values_[pos];
71 |   }
72 |   return true;
73 | }
74 | 
75 | }  // namespace cxxmph 
76 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/bm_common.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_BM_COMMON_H__
 2 | #define __CXXMPH_BM_COMMON_H__
 3 | 
 4 | #include "stringpiece.h"
 5 | 
 6 | #include <string>
 7 | #include <vector>
 8 | #include <unordered_map>  // std::hash
 9 | #include "MurmurHash3.h"
10 | 
11 | #include "benchmark.h"
12 | 
13 | namespace std {
14 | template <> struct hash<cxxmph::StringPiece> {
15 |   uint32_t operator()(const cxxmph::StringPiece& k) const {
16 |     uint32_t out;
17 |     MurmurHash3_x86_32(k.data(), k.length(), 1, &out);
18 |     return out;
19 |   }
20 | };
21 | }  // namespace std
22 | 
23 | namespace cxxmph {
24 | 
25 | class UrlsBenchmark : public Benchmark {
26 |  public:
27 |   UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { }
28 |   virtual  ~UrlsBenchmark();
29 |  protected:
30 |   virtual bool SetUp();
31 |   const std::string urls_file_;
32 |   std::vector<std::string> urls_;
33 | };
34 | 
35 | class SearchUrlsBenchmark : public UrlsBenchmark {
36 |  public:
37 |   SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches, float miss_ratio)
38 |       : UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(miss_ratio) {}
39 |  virtual ~SearchUrlsBenchmark();
40 |  protected:
41 |   virtual bool SetUp();
42 |   const uint32_t nsearches_; 
43 |   float miss_ratio_;
44 |   std::vector<std::string> forced_miss_urls_;
45 |   std::vector<StringPiece> random_;
46 | };
47 | 
48 | class Uint64Benchmark : public Benchmark {
49 |  public:
50 |   Uint64Benchmark(uint32_t count) : count_(count) { }
51 |   virtual ~Uint64Benchmark();
52 |   virtual void Run() {}
53 |  protected:
54 |   virtual bool SetUp();
55 |   const uint32_t count_;
56 |   std::vector<uint64_t> values_;
57 | };
58 | 
59 | class SearchUint64Benchmark : public Uint64Benchmark {
60 |  public:
61 |   SearchUint64Benchmark(uint32_t count, uint32_t nsearches)
62 |       : Uint64Benchmark(count), nsearches_(nsearches) { }
63 |   virtual ~SearchUint64Benchmark();
64 |   virtual void Run() {};
65 |  protected:
66 |   virtual bool SetUp();
67 |   const uint32_t nsearches_;
68 |   std::vector<uint64_t> random_;
69 | };
70 | 
71 | }  // namespace cxxmph
72 | 
73 | #endif  // __CXXMPH_BM_COMMON_H__
74 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/cxxmph.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2010 Google Inc. All Rights Reserved.
 2 | // Author: davi@google.com (Davi Reis)
 3 | 
 4 | #include <getopt.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | #include "mph_map.h"
12 | #include "config.h"
13 | 
14 | using std::cerr;
15 | using std::cout;
16 | using std::endl;
17 | using std::getline;
18 | using std::ifstream;
19 | using std::string;
20 | using std::vector;
21 | 
22 | using cxxmph::mph_map;
23 | 
24 | void usage(const char* prg) {
25 |   cerr << "usage: " << prg << " [-v] [-h] [-V] <keys.txt>" << endl;
26 | }
27 | void usage_long(const char* prg) {
28 |   usage(prg);
29 |   cerr << "   -h\t print this help message" << endl;
30 |   cerr << "   -V\t print version number and exit" << endl;
31 |   cerr << "   -v\t increase verbosity (may be used multiple times)" << endl;
32 | }
33 | 
34 | int main(int argc, char** argv) {
35 | 
36 |   int verbosity = 0;
37 |   while (1) {
38 |     char ch = (char)getopt(argc, argv, "hvV");
39 |     if (ch == -1) break;
40 |     switch (ch) {
41 |       case 'h':
42 |         usage_long(argv[0]);
43 |         return 0;
44 |       case 'V':
45 |         std::cout << VERSION << std::endl;
46 |         return 0;
47 |       case 'v':
48 |         ++verbosity;
49 |         break;
50 |     }
51 |   }
52 |   if (optind != argc - 1) {
53 |     usage(argv[0]);
54 |     return 1;
55 |   }
56 |   vector<string> keys;
57 |   ifstream f(argv[optind]);
58 |   if (!f.is_open()) {
59 |     std::cerr << "Failed to open " << argv[optind] << std::endl;
60 |     exit(-1);
61 |   }
62 |   string buffer;
63 |   while (!getline(f, buffer).eof()) keys.push_back(buffer);
64 |   for (uint32_t i = 0; i < keys.size(); ++i) string s = keys[i];
65 |   mph_map<string, string> table;
66 | 
67 |   for (uint32_t i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i];
68 |   mph_map<string, string>::const_iterator it = table.begin();
69 |   mph_map<string, string>::const_iterator end = table.end();
70 |   for (int i = 0; it != end; ++it, ++i) {
71 |     cout << i << ": " << it->first
72 |          <<" -> " << it->second << endl;
73 |   }
74 | }
75 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/dense_hash_map_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <cstdlib>
 3 | #include <iostream>
 4 | #include <string>
 5 | 
 6 | #include "mph_map.h"
 7 | #include "map_tester.h"
 8 | #include "test.h"
 9 | 
10 | using namespace cxxmph;
11 | 
12 | typedef MapTester<dense_hash_map> Tester;
13 | 
14 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
15 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
16 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
17 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
18 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
19 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
20 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
21 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
22 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
23 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
24 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/hollow_iterator.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_HOLLOW_ITERATOR_H__
 2 | #define __CXXMPH_HOLLOW_ITERATOR_H__
 3 | 
 4 | #include <vector>
 5 | 
 6 | namespace cxxmph {
 7 | 
 8 | using std::vector;
 9 | 
10 | template <typename container_type>
11 | struct is_empty {
12 |  public:
13 |   is_empty() : c_(NULL), p_(NULL) {};
14 |   is_empty(const container_type* c, const vector<bool>* p) : c_(c), p_(p) {};
15 |   bool operator()(typename container_type::const_iterator it) const {
16 |     if (it == c_->end()) return false;
17 |     return !(*p_)[it - c_->begin()];
18 |   }
19 |  private:
20 |   const container_type* c_;
21 |   const vector<bool>* p_;
22 | };
23 | 
24 | template <typename iterator, typename is_empty>
25 | struct hollow_iterator_base
26 |     : public std::iterator<std::forward_iterator_tag,
27 |                            typename iterator::value_type> {
28 |  public:
29 |   typedef hollow_iterator_base<iterator, is_empty> self_type;
30 |   typedef self_type& self_reference;
31 |   typedef typename iterator::reference reference;
32 |   typedef typename iterator::pointer pointer;
33 |   inline hollow_iterator_base() : it_(), empty_() { }
34 |   inline hollow_iterator_base(iterator it, is_empty empty, bool solid) : it_(it), empty_(empty) {
35 |     if (!solid) advance();
36 |   }
37 |   // Same as above, assumes solid==true.
38 |   inline hollow_iterator_base(iterator it, is_empty empty) : it_(it), empty_(empty) {}
39 |   inline hollow_iterator_base(const self_type& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
40 |   template <typename const_iterator>
41 |   hollow_iterator_base(const hollow_iterator_base<const_iterator, is_empty>& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; }
42 | 
43 |   reference operator*() { return *it_;  }
44 |   pointer operator->() { return &(*it_); }
45 |   self_reference operator++() { ++it_; advance(); return *this; }
46 |   // self_type operator++() { auto tmp(*this); ++tmp; return tmp; }
47 | 
48 |   template <typename const_iterator>
49 |   bool operator==(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ == it_; }
50 |   template <typename const_iterator>
51 |   bool operator!=(const hollow_iterator_base<const_iterator, is_empty>& rhs) { return rhs.it_ != it_; }
52 | 
53 |   // should be friend
54 |   iterator it_;
55 |   is_empty empty_;
56 | 
57 |  private:
58 |   void advance() {
59 |     while (empty_(it_)) ++it_;
60 |   }
61 | };
62 | 
63 | template <typename container_type, typename iterator>
64 | inline auto make_solid(
65 |    container_type* v, const vector<bool>* p, iterator it) ->
66 |        hollow_iterator_base<iterator, is_empty<const container_type>> {
67 |   return hollow_iterator_base<iterator, is_empty<const container_type>>(
68 |       it, is_empty<const container_type>(v, p));
69 | }
70 | 
71 | template <typename container_type, typename iterator>
72 | inline auto make_hollow(
73 |    container_type* v, const vector<bool>* p, iterator it) ->
74 |        hollow_iterator_base<iterator, is_empty<const container_type>> {
75 |   return hollow_iterator_base<iterator, is_empty<const container_type>>(
76 |       it, is_empty<const container_type>(v, p), false);
77 | }
78 | 
79 | }  // namespace cxxmph
80 | 
81 | #endif  // __CXXMPH_HOLLOW_ITERATOR_H__
82 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/hollow_iterator_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>
 2 | #include <cstdio>
 3 | #include <vector>
 4 | #include <iostream>
 5 | 
 6 | 
 7 | using std::cerr;
 8 | using std::endl;
 9 | using std::vector;
10 | #include "hollow_iterator.h"
11 | using cxxmph::hollow_iterator_base;
12 | using cxxmph::make_hollow;
13 | using cxxmph::is_empty;
14 | 
15 | int main(int argc, char** argv) {
16 |   vector<int> v;
17 |   vector<bool> p;
18 |   for (int i = 0; i < 100; ++i) {
19 |     v.push_back(i);
20 |     p.push_back(i % 2 == 0);
21 |   }
22 |   auto begin = make_hollow(&v, &p, v.begin());
23 |   auto end = make_hollow(&v, &p, v.end());
24 |   for (auto it = begin; it != end; ++it) {
25 |     if (((*it) % 2) != 0) exit(-1);
26 |   }
27 |   const vector<int>* cv(&v);
28 |   auto cbegin(make_hollow(cv, &p, cv->begin()));
29 |   auto cend(make_hollow(cv, &p, cv->begin()));
30 |   for (auto it = cbegin; it != cend; ++it) {
31 |     if (((*it) % 2) != 0) exit(-1);
32 |   }
33 |   const vector<bool>* cp(&p);
34 |   cbegin = make_hollow(cv, cp, v.begin());
35 |   cend = make_hollow(cv, cp, cv->end());
36 | 
37 |   vector<int>::iterator vit1 = v.begin();
38 |   vector<int>::const_iterator vit2 = v.begin();
39 |   if (vit1 != vit2) exit(-1);
40 |   auto it1 = make_hollow(&v, &p, vit1);
41 |   auto it2 = make_hollow(&v, &p, vit2);
42 |   if (it1 != it2) exit(-1);
43 | 
44 |   typedef is_empty<const vector<int>> iev;
45 |   hollow_iterator_base<vector<int>::iterator, iev> default_constructed;
46 |   default_constructed = make_hollow(&v, &p, v.begin());
47 |   return 0;
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/map_tester.cc:
--------------------------------------------------------------------------------
1 | #include "map_tester.h"
2 | 
3 | namespace cxxxmph {
4 | }
5 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/map_tester_test.cc:
--------------------------------------------------------------------------------
 1 | #include "map_tester.h"
 2 | #include "test.h"
 3 | 
 4 | using namespace cxxmph;
 5 | 
 6 | typedef MapTester<std::unordered_map> Tester;
 7 | 
 8 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
 9 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
10 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
11 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
12 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
13 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
14 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
15 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
16 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
17 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
18 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/mph_bits.cc:
--------------------------------------------------------------------------------
 1 | #include "mph_bits.h"
 2 | 
 3 | namespace cxxmph {
 4 | 
 5 | const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f};
 6 | dynamic_2bitset::dynamic_2bitset() : size_(0), fill_(false) {}
 7 | dynamic_2bitset::dynamic_2bitset(uint32_t size, bool fill)
 8 |     : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {}
 9 | dynamic_2bitset::~dynamic_2bitset() {}
10 | 
11 | }
12 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/mph_bits.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_MPH_BITS_H__
 2 | #define __CXXMPH_MPH_BITS_H__
 3 | 
 4 | #include <stdint.h>  // for uint32_t and friends
 5 | 
 6 | #include <array>
 7 | #include <cassert>
 8 | #include <climits>
 9 | #include <cmath>
10 | #include <cstdio>
11 | #include <cstring>
12 | #include <limits>
13 | #include <vector>
14 | #include <utility>
15 | 
16 | namespace cxxmph {
17 | 
18 | class dynamic_2bitset {
19 |  public:
20 |   dynamic_2bitset();
21 |   ~dynamic_2bitset();
22 |   dynamic_2bitset(uint32_t size, bool fill = false);
23 | 
24 |   const uint8_t operator[](uint32_t i) const { return get(i); }
25 |   const uint8_t get(uint32_t i) const { 
26 |     assert(i < size());
27 |     assert((i >> 2) < data_.size());
28 |     return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3);
29 |   }
30 |   void set(uint32_t i, uint8_t v) { 
31 |     assert((i >> 2) < data_.size());
32 |     data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3];
33 |     data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]);
34 |     assert(v <= 3);
35 |     assert(get(i) == v);
36 |   }
37 |   void resize(uint32_t size) {
38 |     size_ = size;
39 |     data_.resize(size >> 2, fill_*ones());
40 |   }
41 |   void swap(dynamic_2bitset& other) {
42 |     std::swap(other.size_, size_);
43 |     std::swap(other.fill_, fill_);
44 |     other.data_.swap(data_);
45 |   }
46 |   void clear() { data_.clear(); size_ = 0; }
47 |     
48 |   uint32_t size() const { return size_; }
49 |   static const uint8_t vmask[];
50 |   const std::vector<uint8_t>& data() const { return data_; }
51 |  private:
52 |   uint32_t size_;
53 |   bool fill_;
54 |   std::vector<uint8_t> data_;
55 |   const uint8_t ones() { return std::numeric_limits<uint8_t>::max(); }
56 | };
57 | 
58 | static uint32_t nextpoweroftwo(uint32_t k) {
59 |   if (k == 0) return 1;
60 |   k--;
61 |   for (uint32_t i=1; i<sizeof(uint32_t)*CHAR_BIT; i<<=1) k = k | k >> i;
62 |   return k+1;
63 | }
64 | // Interesting bit tricks that might end up here:
65 | // http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
66 | // Fast a % (k*2^t)
67 | // http://www.azillionmonkeys.com/qed/adiv.html
68 | // rank and select:
69 | // http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf
70 | 
71 | }  // namespace cxxmph
72 | 
73 | #endif
74 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/mph_bits_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <cstdlib>
 3 | 
 4 | #include "mph_bits.h"
 5 | 
 6 | using cxxmph::dynamic_2bitset;
 7 | using cxxmph::nextpoweroftwo;
 8 | 
 9 | int main(int argc, char** argv) {
10 |   dynamic_2bitset small(256, true);
11 |   for (uint32_t i = 0; i < small.size(); ++i) small.set(i, i % 4);
12 |   for (uint32_t i = 0; i < small.size(); ++i) {
13 |     if (small[i] != i % 4) {
14 |       fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4);
15 |       exit(-1);
16 |     }
17 |   }
18 | 
19 |   uint32_t size = 256;
20 |   dynamic_2bitset bits(size, true /* fill with ones */);
21 |   for (uint32_t i = 0; i < size; ++i) {
22 |     if (bits[i] != 3)  {
23 |       fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 3);
24 |       exit(-1);
25 |     }
26 |   }
27 |   for (uint32_t i = 0; i < size; ++i) bits.set(i, 0);
28 |   for (uint32_t i = 0; i < size; ++i) {
29 |     if (bits[i] != 0)  {
30 |       fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 0);
31 |       exit(-1);
32 |     }
33 |   }
34 |   for (uint32_t i = 0; i < size; ++i) bits.set(i, i % 4);
35 |   for (uint32_t i = 0; i < size; ++i) {
36 |     if (bits[i] != i % 4) {
37 |       fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, i % 4);
38 |       exit(-1);
39 |     }
40 |   }
41 |   dynamic_2bitset size_corner1(1);
42 |   if (size_corner1.size() != 1) exit(-1);
43 |   dynamic_2bitset size_corner2(2);
44 |   if (size_corner2.size() != 2) exit(-1);
45 |   (dynamic_2bitset(4, true)).swap(size_corner2);
46 |   if (size_corner2.size() != 4) exit(-1);
47 |   for (uint32_t i = 0; i < size_corner2.size(); ++i) {
48 |     if (size_corner2[i] != 3) exit(-1);
49 |   }
50 |   size_corner2.clear();
51 |   if (size_corner2.size() != 0) exit(-1);
52 | 
53 |   dynamic_2bitset empty;
54 |   empty.clear();
55 |   dynamic_2bitset large(1000, true);
56 |   empty.swap(large);
57 | 
58 |   if (nextpoweroftwo(3) != 4) exit(-1);
59 | }
60 |   
61 |   
62 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/mph_index_test.cc:
--------------------------------------------------------------------------------
 1 | #include <algorithm>
 2 | #include <cassert>
 3 | #include <string>
 4 | #include <vector>
 5 | 
 6 | #include "mph_index.h"
 7 | 
 8 | using std::string;
 9 | using std::vector;
10 | using namespace cxxmph;
11 | 
12 | int main(int argc, char** argv) {
13 | 
14 |   srand(1);
15 |   vector<string> keys;
16 |   keys.push_back("davi");
17 |   keys.push_back("paulo");
18 |   keys.push_back("joao");
19 |   keys.push_back("maria");
20 |   keys.push_back("bruno");
21 |   keys.push_back("paula");
22 |   keys.push_back("diego");
23 |   keys.push_back("diogo");
24 |   keys.push_back("algume");
25 | 
26 |   SimpleMPHIndex<string> mph_index;
27 |   if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); }
28 |   vector<int> ids;
29 |   for (vector<int>::size_type i = 0; i < keys.size(); ++i) {
30 |      ids.push_back(mph_index.index(keys[i]));
31 |      cerr << " " << *(ids.end() - 1);
32 |   }
33 |   cerr << endl;
34 |   sort(ids.begin(), ids.end());
35 |   for (vector<int>::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast<vector<int>::value_type>(i));
36 | 
37 |   FlexibleMPHIndex<false, true, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> square_empty;
38 |   auto id = square_empty.index(1);
39 |   FlexibleMPHIndex<false, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> unordered_empty;
40 |   id ^= unordered_empty.index(1);
41 |   FlexibleMPHIndex<true, false, int64_t, seeded_hash<std::hash<int64_t>>::hash_function> minimal_empty;
42 |   id ^= minimal_empty.index(1);
43 | }
44 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/mph_map_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdio>
 2 | #include <cstdlib>
 3 | #include <iostream>
 4 | #include <string>
 5 | 
 6 | #include "mph_map.h"
 7 | #include "map_tester.h"
 8 | #include "test.h"
 9 | 
10 | using namespace cxxmph;
11 | 
12 | typedef MapTester<mph_map> Tester;
13 | 
14 | /*
15 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert);
16 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert);
17 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search);
18 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search);
19 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search);
20 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search);
21 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero);
22 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size);
23 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value);
24 | */
25 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator);
26 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/seeded_hash_test.cc:
--------------------------------------------------------------------------------
 1 | #include "seeded_hash.h"
 2 | 
 3 | #include <unordered_map>
 4 | #include <string>
 5 | #include <iostream>
 6 | 
 7 | using std::cerr;
 8 | using std::endl;
 9 | using std::string;
10 | using std::unordered_map;
11 | using namespace cxxmph;
12 | 
13 | int main(int argc, char** argv) {
14 |   auto hasher = seeded_hash_function<Murmur3StringPiece>();
15 |   string key1("0");
16 |   string key2("1");
17 |   auto h1 = hasher.hash128(key1, 1);
18 |   auto h2 = hasher.hash128(key2, 1);
19 |   if (h1 == h2) {
20 |     fprintf(stderr, "unexpected murmur collision\n");
21 |     exit(-1);
22 |   }
23 | 
24 |   unordered_map<uint64_t, int> g;
25 |   for (int i = 0; i < 1000; ++i) g[i] = i;
26 |   for (int i = 0; i < 1000; ++i) if (g[i] != i) exit(-1);
27 | 
28 |   auto inthasher = seeded_hash_function<std::hash<uint64_t>>();
29 |   unordered_map<h128, uint64_t, h128::hash32> g2;
30 |   for (uint64_t i = 0; i < 1000; ++i) {
31 |     auto h = inthasher.hash128(i, 0);
32 |     if (g2.find(h) != g2.end()) {
33 |       std::cerr << "Incorrectly found " << i << std::endl;
34 |       exit(-1);
35 |     }
36 |     if (h128::hash32()(h) != h[3]) {
37 |       cerr << "Buggy hash method." << endl;
38 |       exit(-1);
39 |     }
40 |     auto h2 = inthasher.hash128(i, 0);
41 |     if (!(h == h2)) {
42 |       cerr << "h 64(0) " << h.get64(0) << " h 64(1) " << h.get64(1) << endl;
43 |       cerr << " h2 64(0) " << h2.get64(0) << " h2 64(1) " << h2.get64(1) << endl;
44 |       cerr << "Broken equality for h128" << endl;
45 |       exit(-1);
46 |     }
47 |     if (h128::hash32()(h) != h128::hash32()(h2)) {
48 |       cerr << "Inconsistent hash method." << endl;
49 |       exit(-1);
50 |     }
51 |     g2[h] = i;
52 |     if (g2.find(h) == g2.end()) {
53 |       std::cerr << "Incorrectly missed " << i << std::endl;
54 |       exit(-1);
55 |     }
56 |   }
57 |     
58 |   for (uint64_t i = 0; i < 1000; ++i) if (g2[inthasher.hash128(i, 0)] != i) exit(-1);
59 | }
60 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/string_util.cc:
--------------------------------------------------------------------------------
 1 | #include "string_util.h"
 2 | 
 3 | #include <cassert>
 4 | #include <cstdint>
 5 | #include <iostream>
 6 | #include <string>
 7 | 
 8 | using namespace std;
 9 | 
10 | namespace cxxmph {
11 | 
12 | bool stream_printf(
13 |     const std::string& format_string, uint32_t offset, std::ostream* out) {
14 |   if (offset == format_string.length()) return true;
15 |   assert(offset < format_string.length());
16 |   cerr << "length:" << format_string.length() << endl;
17 |   cerr << "offset:" << offset << endl;
18 |   auto txt = format_string.substr(offset, format_string.length() - offset);
19 |   *out << txt;
20 |   return true;
21 | }
22 | 
23 | }  // namespace cxxmph
24 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/string_util_test.cc:
--------------------------------------------------------------------------------
 1 | #include "string_util.h"
 2 | #include "test.h"
 3 | 
 4 | using namespace cxxmph;
 5 | 
 6 | bool test_format() {
 7 |   string expected = " %% 4 foo 0x0A bar ";
 8 |   string foo = "foo";
 9 |   string fmt = format(" %%%% %v %v 0x%.2X bar ", 4, foo, 10);
10 |   fail_unless(fmt == expected, "expected\n-%s-\n got \n-%s-", expected.c_str(), fmt.c_str());
11 |   return true;
12 | }
13 | 
14 | bool test_infoln() {
15 |   infoln(string("%s:%d: MY INFO LINE"), __FILE__, __LINE__);
16 |   return true;
17 | }
18 | 
19 | 
20 | bool test_macro() {
21 |   CXXMPH_DEBUGLN("here i am")();
22 |   return true;
23 | }
24 | 
25 | CXXMPH_TEST_CASE(test_format)
26 | CXXMPH_TEST_CASE(test_infoln)
27 | CXXMPH_TEST_CASE(test_macro)
28 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/test.cc:
--------------------------------------------------------------------------------
 1 | #include <cstdlib>  // For EXIT_SUCCESS, EXIT_FAILURE
 2 | 
 3 | #include "test.h"
 4 | 
 5 | Suite* global_suite = suite_create("cxxmph_test_suite");
 6 | TCase* global_tc_core = tcase_create("Core");
 7 | 
 8 | int main (void) {
 9 |   suite_add_tcase(global_suite, global_tc_core);
10 |   int number_failed;
11 |   SRunner *sr = srunner_create (global_suite);
12 |   srunner_run_all (sr, CK_NORMAL);
13 |   number_failed = srunner_ntests_failed (sr);
14 |   srunner_free (sr);
15 |   return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
16 | }
17 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/test.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_TEST_H__
 2 | #define __CXXMPH_TEST_H__
 3 | 
 4 | // Thin wrapper on top of check.h to get rid of boilerplate in tests. Assumes a
 5 | // single test suite and test case per file, with each fixture represented by a
 6 | // parameter-less boolean function.
 7 | //
 8 | // The check.h header macro-clashes with c++ libraries so this file needs to be
 9 | // included last.
10 | 
11 | #include <check.h>
12 | 
13 | extern Suite* global_suite;
14 | extern TCase* global_tc_core;
15 | 
16 | // Creates a new test case calling boolean_function. Name must be a valid,
17 | // unique c identifier when prefixed with tc_.
18 | #define CXXMPH_CXX_TEST_CASE(name, boolean_function) \
19 |   START_TEST(tc_ ## name) \
20 |        { fail_unless(boolean_function()); } END_TEST \
21 |   static TestCase global_cxxmph_tc_ ## name(tc_ ## name);
22 | 
23 | #define CXXMPH_TEST_CASE(name) CXXMPH_CXX_TEST_CASE(name, name)
24 | 
25 | struct TestCase {
26 |   TestCase(void (*f)(int)) {
27 |     tcase_add_test(global_tc_core, f);
28 |   }
29 | };
30 | 
31 | #endif //  __CXXMPH_TEST_H__
32 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/test_test.cc:
--------------------------------------------------------------------------------
1 | #include "test.h"
2 | 
3 | bool tautology() { return true; }
4 | CXXMPH_TEST_CASE(tautology)
5 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/trigraph.cc:
--------------------------------------------------------------------------------
 1 | #include <cassert>
 2 | #include <limits>
 3 | #include <iostream>
 4 | 
 5 | #include "trigraph.h"
 6 | 
 7 | using std::cerr;
 8 | using std::endl;
 9 | using std::vector;
10 | 
11 | namespace {
12 | static const uint32_t kInvalidEdge = std::numeric_limits<uint32_t>::max();
13 | } 
14 | 
15 | namespace cxxmph {
16 | 
17 | TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges)
18 |       : nedges_(0),
19 |         edges_(nedges),
20 |         next_edge_(nedges),
21 |         first_edge_(nvertices, kInvalidEdge),
22 |         vertex_degree_(nvertices, 0) { }
23 | TriGraph::~TriGraph() {}
24 | 
25 | void TriGraph::ExtractEdgesAndClear(vector<Edge>* edges) {
26 |   vector<Edge>().swap(next_edge_);
27 |   vector<uint32_t>().swap(first_edge_);
28 |   vector<uint8_t>().swap(vertex_degree_);
29 |   nedges_ = 0;
30 |   edges->swap(edges_);
31 | }
32 | void TriGraph::AddEdge(const Edge& edge) { 
33 |   edges_[nedges_] = edge; 
34 |   assert(first_edge_.size() > edge[0]);
35 |   assert(first_edge_.size() > edge[1]);
36 |   assert(first_edge_.size() > edge[0]);
37 |   assert(first_edge_.size() > edge[1]);
38 |   assert(first_edge_.size() > edge[2]);
39 |   assert(next_edge_.size() > nedges_);
40 |   next_edge_[nedges_] = Edge(
41 |       first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]);
42 |    first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_;
43 |    ++vertex_degree_[edge[0]];
44 |    ++vertex_degree_[edge[1]];
45 |    ++vertex_degree_[edge[2]];
46 |    ++nedges_;
47 | }
48 | 
49 | void TriGraph::RemoveEdge(uint32_t current_edge) {
50 |   // cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl;
51 |   for (int i = 0; i < 3; ++i) {
52 |     uint32_t vertex = edges_[current_edge][i];
53 |     uint32_t edge1 = first_edge_[vertex];
54 |     uint32_t edge2 = kInvalidEdge;
55 |     uint32_t j = 0;
56 |     while (edge1 != current_edge && edge1 != kInvalidEdge) {
57 |       edge2 = edge1;
58 |       if (edges_[edge1][0] == vertex) j = 0;
59 |       else if (edges_[edge1][1] == vertex) j = 1;
60 |       else j = 2;
61 |       edge1 = next_edge_[edge1][j];
62 |     }
63 |     assert(edge1 != kInvalidEdge);
64 |     if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i];
65 |     else first_edge_[vertex] = next_edge_[edge1][i];
66 |     --vertex_degree_[vertex];
67 |   }
68 | }
69 | 
70 | void TriGraph::DebugGraph() const {
71 |   uint32_t i;
72 |   for(i = 0; i < edges_.size(); i++){
73 |     cerr << i << "  " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2]
74 |          << " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl;
75 |   }
76 |   for(i = 0; i < first_edge_.size();i++){
77 |     cerr << "first for vertice " <<i << " " << first_edge_[i] << endl;
78 |   }
79 | }
80 | 
81 |      
82 | }  // namespace cxxmph
83 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/trigraph.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CXXMPH_TRIGRAPH_H__
 2 | #define __CXXMPH_TRIGRAPH_H__
 3 | // Build a trigraph using a memory efficient representation.
 4 | //
 5 | // Prior knowledge of the number of edges and vertices for the graph is
 6 | // required. For each vertex, we store how many edges touch it (degree) and the
 7 | // index of the first edge in the vector of triples representing the edges.
 8 | 
 9 | #include <stdint.h>  // for uint32_t and friends
10 | 
11 | #include <vector>
12 | 
13 | namespace cxxmph {
14 | 
15 | class TriGraph {
16 |  public:
17 |   struct Edge {
18 |     Edge() { }
19 |     Edge(uint32_t v0, uint32_t v1, uint32_t v2) { 
20 |       vertices[0] = v0;
21 |       vertices[1] = v1;
22 |       vertices[2] = v2;
23 |     }
24 |     uint32_t& operator[](uint8_t v) { return vertices[v]; }
25 |     const uint32_t& operator[](uint8_t v) const { return vertices[v]; }
26 |     uint32_t vertices[3];
27 |   };
28 |   TriGraph(uint32_t nedges, uint32_t nvertices);
29 |   ~TriGraph();
30 |   void AddEdge(const Edge& edge);
31 |   void RemoveEdge(uint32_t edge_id);
32 |   void ExtractEdgesAndClear(std::vector<Edge>* edges);
33 |   void DebugGraph() const;
34 | 
35 |   const std::vector<Edge>& edges() const { return edges_; }
36 |   const std::vector<uint8_t>& vertex_degree() const { return vertex_degree_; }
37 |   const std::vector<uint32_t>& first_edge() const { return first_edge_; }
38 | 
39 |  private:
40 |   uint32_t nedges_;  // total number of edges
41 |   std::vector<Edge> edges_;
42 |   std::vector<Edge> next_edge_;  // for implementing removal
43 |   std::vector<uint32_t> first_edge_;  // the first edge for this vertex
44 |   std::vector<uint8_t> vertex_degree_;  // number of edges for this vertex
45 | };
46 | 
47 | }  // namespace cxxmph
48 | 
49 | #endif  // __CXXMPH_TRIGRAPH_H__
50 | 


--------------------------------------------------------------------------------
/cmph-2.0/cxxmph/trigraph_test.cc:
--------------------------------------------------------------------------------
 1 | #include <cassert>
 2 | 
 3 | #include "trigraph.h"
 4 | 
 5 | using cxxmph::TriGraph;
 6 | 
 7 | int main(int argc, char** argv) {
 8 |   TriGraph g(4, 2);
 9 |   g.AddEdge(TriGraph::Edge(0, 1, 2));
10 |   g.AddEdge(TriGraph::Edge(1, 3, 2));
11 |   assert(g.vertex_degree()[0] == 1);
12 |   assert(g.vertex_degree()[1] == 2);
13 |   assert(g.vertex_degree()[2] == 2);
14 |   assert(g.vertex_degree()[3] == 1);
15 |   g.RemoveEdge(0);
16 |   assert(g.vertex_degree()[0] == 0);
17 |   assert(g.vertex_degree()[1] == 1);
18 |   assert(g.vertex_degree()[2] == 1);
19 |   assert(g.vertex_degree()[3] == 1);
20 |   std::vector<TriGraph::Edge> edges;
21 |   g.ExtractEdgesAndClear(&edges);
22 | }
23 | 


--------------------------------------------------------------------------------
/cmph-2.0/examples/Makefile.am:
--------------------------------------------------------------------------------
 1 | noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 struct_vector_adapter_ex3
 2 | 
 3 | INCLUDES = -I../src/
 4 | 
 5 | vector_adapter_ex1_LDADD   = ../src/libcmph.la
 6 | vector_adapter_ex1_SOURCES = vector_adapter_ex1.c
 7 | 
 8 | file_adapter_ex2_LDADD     = ../src/libcmph.la
 9 | file_adapter_ex2_SOURCES   = file_adapter_ex2.c
10 | 
11 | struct_vector_adapter_ex3_LDADD     = ../src/libcmph.la
12 | struct_vector_adapter_ex3_SOURCES   = struct_vector_adapter_ex3.c
13 | 


--------------------------------------------------------------------------------
/cmph-2.0/examples/file_adapter_ex2.c:
--------------------------------------------------------------------------------
 1 | #include <cmph.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 |  // Create minimal perfect hash function from in-disk keys using BDZ algorithm
 5 | int main(int argc, char **argv)
 6 | {   
 7 | 	 //Open file with newline separated list of keys
 8 | 	FILE * keys_fd = fopen("keys.txt", "r");
 9 | 	cmph_t *hash = NULL;
10 | 	if (keys_fd == NULL) 
11 | 	{
12 | 	  fprintf(stderr, "File \"keys.txt\" not found\n");
13 | 	  exit(1);
14 | 	}	
15 | 	// Source of keys
16 | 	cmph_io_adapter_t *source = cmph_io_nlfile_adapter(keys_fd);
17 | 
18 | 	cmph_config_t *config = cmph_config_new(source);
19 | 	cmph_config_set_algo(config, CMPH_BDZ);
20 | 	hash = cmph_new(config);
21 | 	cmph_config_destroy(config);
22 |    
23 | 	//Find key
24 | 	const char *key = "jjjjjjjjjj";
25 | 	unsigned int id = cmph_search(hash, key, (cmph_uint32)strlen(key));
26 | 	fprintf(stderr, "Id:%u\n", id);
27 | 	//Destroy hash
28 | 	cmph_destroy(hash);
29 | 	cmph_io_nlfile_adapter_destroy(source);   
30 | 	fclose(keys_fd);
31 | 	return 0;
32 | }
33 | 


--------------------------------------------------------------------------------
/cmph-2.0/examples/struct_vector_adapter_ex3.c:
--------------------------------------------------------------------------------
 1 | #include <cmph.h>
 2 | #include <string.h>
 3 | // Create minimal perfect hash function from in-memory vector
 4 | 
 5 | #pragma pack(1)
 6 | typedef struct {
 7 | 	cmph_uint32 id;
 8 | 	char key[11];
 9 | 	cmph_uint32 year;
10 | } rec_t;
11 | #pragma pack(0)
12 | 
13 | int main(int argc, char **argv)
14 | {   
15 |     // Creating a filled vector
16 |     unsigned int i = 0;  
17 |     rec_t vector[10] = {{1, "aaaaaaaaaa", 1999}, {2, "bbbbbbbbbb", 2000}, {3, "cccccccccc", 2001},
18 |     			{4, "dddddddddd", 2002}, {5, "eeeeeeeeee", 2003}, {6, "ffffffffff", 2004},
19 |     			{7, "gggggggggg", 2005}, {8, "hhhhhhhhhh", 2006}, {9, "iiiiiiiiii", 2007},
20 |     			{10,"jjjjjjjjjj", 2008}};
21 |     unsigned int nkeys = 10;
22 |     FILE* mphf_fd = fopen("temp_struct_vector.mph", "w");
23 |     // Source of keys
24 |     cmph_io_adapter_t *source = cmph_io_struct_vector_adapter(vector, (cmph_uint32)sizeof(rec_t), (cmph_uint32)sizeof(cmph_uint32), 11, nkeys); 
25 | 
26 |     //Create minimal perfect hash function using the BDZ algorithm.
27 |     cmph_config_t *config = cmph_config_new(source);
28 |     cmph_config_set_algo(config, CMPH_BDZ);
29 |     cmph_config_set_mphf_fd(config, mphf_fd);
30 |     cmph_t *hash = cmph_new(config);
31 |     cmph_config_destroy(config);
32 |     cmph_dump(hash, mphf_fd); 
33 |     cmph_destroy(hash);	
34 |     fclose(mphf_fd);
35 |    
36 |     //Find key
37 |     mphf_fd = fopen("temp_struct_vector.mph", "r");
38 |     hash = cmph_load(mphf_fd);
39 |     while (i < nkeys) {
40 |       const char *key = vector[i].key;
41 |       unsigned int id = cmph_search(hash, key, 11);
42 |       fprintf(stderr, "key:%s -- hash:%u\n", key, id);
43 |       i++;
44 |     }
45 |     
46 |     //Destroy hash
47 |     cmph_destroy(hash);
48 |     cmph_io_vector_adapter_destroy(source);   
49 |     fclose(mphf_fd);
50 |     return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/cmph-2.0/examples/vector_adapter_ex1.c:
--------------------------------------------------------------------------------
 1 | #include <cmph.h>
 2 | #include <string.h>
 3 | // Create minimal perfect hash function from in-memory vector
 4 | int main(int argc, char **argv)
 5 | { 
 6 | 
 7 |     // Creating a filled vector
 8 |     unsigned int i = 0;
 9 |     const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee", 
10 |         "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"};
11 |     unsigned int nkeys = 10;
12 |     FILE* mphf_fd = fopen("temp.mph", "w");
13 |     // Source of keys
14 |     cmph_io_adapter_t *source = cmph_io_vector_adapter((char **)vector, nkeys);
15 | 
16 |     //Create minimal perfect hash function using the brz algorithm.
17 |     cmph_config_t *config = cmph_config_new(source);
18 |     cmph_config_set_algo(config, CMPH_BRZ);
19 |     cmph_config_set_mphf_fd(config, mphf_fd);
20 |     cmph_t *hash = cmph_new(config);
21 |     cmph_config_destroy(config);
22 |     cmph_dump(hash, mphf_fd); 
23 |     cmph_destroy(hash);	
24 |     fclose(mphf_fd);
25 | 
26 |     //Find key
27 |     mphf_fd = fopen("temp.mph", "r");
28 |     hash = cmph_load(mphf_fd);
29 |     while (i < nkeys) {
30 |         const char *key = vector[i];
31 |         unsigned int id = cmph_search(hash, key, (cmph_uint32)strlen(key));
32 |         fprintf(stderr, "key:%s -- hash:%u\n", key, id);
33 |         i++;
34 |     }
35 | 
36 |     //Destroy hash
37 |     cmph_destroy(hash);
38 |     cmph_io_vector_adapter_destroy(source);   
39 |     fclose(mphf_fd);
40 |     return 0;
41 | }
42 | 


--------------------------------------------------------------------------------
/cmph-2.0/m4/ltversion.m4:
--------------------------------------------------------------------------------
 1 | # ltversion.m4 -- version numbers			-*- Autoconf -*-
 2 | #
 3 | #   Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # @configure_input@
11 | 
12 | # serial 4179 ltversion.m4
13 | # This file is part of GNU Libtool
14 | 
15 | m4_define([LT_PACKAGE_VERSION], [2.4.6])
16 | m4_define([LT_PACKAGE_REVISION], [2.4.6])
17 | 
18 | AC_DEFUN([LTVERSION_VERSION],
19 | [macro_version='2.4.6'
20 | macro_revision='2.4.6'
21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
22 | _LT_DECL(, macro_revision, 0)
23 | ])
24 | 


--------------------------------------------------------------------------------
/cmph-2.0/man/Makefile.am:
--------------------------------------------------------------------------------
1 | man_MANS = cmph.1
2 | EXTRA_DIST=cmph.1
3 | 


--------------------------------------------------------------------------------
/cmph-2.0/man/cmph.1:
--------------------------------------------------------------------------------
 1 | .TH CMPH "1" "October 2007" "cmph 0.6" "User Commands"
 2 | .SH NAME
 3 | cmph \- minimum perfect hashing tool
 4 | .SH SYNOPSIS
 5 | .B cmph
 6 | [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b BRZ_parameter] [-d tmp_dir] [-m file.mph] keysfile
 7 | .SH DESCRIPTION
 8 | .PP
 9 | Command line tool to generate and query minimal perfect hash functions.
10 | .PP
11 | Please refer to http://cmph.sf.net for full documentation.
12 | .TP
13 | \fB\-h\fR
14 | Print an help message
15 | .TP
16 | \fB\-c\fR
17 | This value determines: the number of vertices in the graph for the algorithms BMZ and CHM; the number of bits per key required in the FCH algorithm
18 | .TP
19 | \fB\-a\fR
20 | Algorithm. Valid values are: bmz, bmz8, chm, brz, fch
21 | .TP
22 | \fB\-f\fR
23 | hash function (may be used multiple times). valid values are: djb2, fnv, jenkins, sdbm
24 | .TP
25 | \fB\-V\fR	
26 | Print version number and exit
27 | .TP
28 | \fB\-v\fR
29 | Increase verbosity (may be used multiple times)
30 | .TP
31 | \fB\-k\fR
32 | Number of keys
33 | .TP
34 | \fB\-g\fR
35 | Generation mode
36 | .TP
37 | \fB\-s\fR
38 | Random seed
39 | .TP
40 | \fB\-m\fR
41 | Minimum perfect hash function file 
42 | .TP
43 | \fB\-M\fR
44 | Main memory availability (in MB)
45 | .TP
46 | \fB\-d\fR
47 | Temporary directory used in brz algorithm 
48 | .TP
49 | \fB\-b\fR
50 | Parameter of BRZ algorithm to make the maximal number of keys in a bucket lower than 256
51 | .TP
52 | \fBkeysfile\fR
53 | Line separated file with keys
54 | .SH EXAMPLE
55 | $ # Using the default algorithm (chm) for constructing a mphf 
56 | .br
57 | $ # for keys in file keys_file. Lines in keys_file _must_ be unique.
58 | .br
59 | $ ./cmph -v -g keys_file
60 | .br
61 | $ # Query id of keys in the file keys_query
62 | .br
63 | $ ./cmph -v -m keys_file.mph keys_query
64 | .SH AUTHOR
65 | This manual page was written by Enrico Tassi <gareuselesinge@users.sourceforge.net>,
66 | for the Debian project (but may be used by others).
67 | 
68 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | bin_PROGRAMS = cmph
 2 | noinst_PROGRAMS = bm_numbers
 3 | lib_LTLIBRARIES = libcmph.la
 4 | include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h
 5 | libcmph_la_SOURCES =  hash.h hash.c \
 6 | 		      jenkins_hash.h jenkins_hash.c \
 7 | 		      hash_state.h debug.h \
 8 | 		      vstack.h vstack.c vqueue.h vqueue.c\
 9 | 		      graph.h graph.c bitbool.h \
10 | 		      cmph.h cmph.c cmph_structs.h cmph_structs.c\
11 | 		      chm.h chm.c chm_structs.h \
12 | 		      bmz.h bmz.c bmz_structs.h \
13 |                       bmz8.h bmz8.c bmz8_structs.h \
14 | 		      bdz.h bdz.c bdz_structs.h \
15 | 		      bdz_ph.h bdz_ph.c bdz_structs_ph.h \
16 | 		      brz.h brz.c brz_structs.h \
17 | 		      fch.h fch.c fch_structs.h \
18 | 		      fch_buckets.h fch_buckets.c \
19 | 		      chd.h chd.c chd_structs.h \
20 | 		      chd_ph.h chd_ph.c chd_structs_ph.h \
21 | 		      miller_rabin.h miller_rabin.c \
22 | 		      buffer_manager.h buffer_manager.c \
23 | 		      buffer_entry.h buffer_entry.c\
24 | 		      select.h select.c select_lookup_tables.h \
25 | 		      compressed_seq.h compressed_seq.c \
26 | 		      compressed_rank.h compressed_rank.c \
27 |                       linear_string_map.h linear_string_map.c \
28 | 		      cmph_benchmark.h cmph_benchmark.c \
29 | 		      cmph_time.h
30 | 
31 | libcmph_la_LDFLAGS = -version-info 0:0:0
32 | 
33 | cmph_SOURCES = 	main.c wingetopt.h wingetopt.c
34 | cmph_LDADD = libcmph.la
35 | 
36 | bm_numbers_SOURCES = bm_numbers.c
37 | bm_numbers_LDADD = libcmph.la
38 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bdz.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BDZ_H__
 2 | #define __CMPH_BDZ_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __bdz_data_t bdz_data_t;
 7 | typedef struct __bdz_config_data_t bdz_config_data_t;
 8 | 
 9 | bdz_config_data_t *bdz_config_new(void);
10 | void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void bdz_config_destroy(cmph_config_t *mph);
12 | void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
13 | cmph_t *bdz_new(cmph_config_t *mph, double c);
14 | 
15 | void bdz_load(FILE *f, cmph_t *mphf);
16 | int bdz_dump(cmph_t *mphf, FILE *f);
17 | void bdz_destroy(cmph_t *mphf);
18 | cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
19 | 
20 | /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf);
21 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
22 |  *  \param mphf pointer to the resulting mphf
23 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
24 |  */
25 | void bdz_pack(cmph_t *mphf, void *packed_mphf);
26 | 
27 | /** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf);
28 |  *  \brief Return the amount of space needed to pack mphf.
29 |  *  \param mphf pointer to a mphf
30 |  *  \return the size of the packed function or zero for failures
31 |  */ 
32 | cmph_uint32 bdz_packed_size(cmph_t *mphf);
33 | 
34 | /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
35 |  *  \brief Use the packed mphf to do a search. 
36 |  *  \param  packed_mphf pointer to the packed mphf
37 |  *  \param key key to be hashed
38 |  *  \param keylen key legth in bytes
39 |  *  \return The mphf value
40 |  */
41 | cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bdz_ph.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BDZ_PH_H__
 2 | #define __CMPH_BDZ_PH_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __bdz_ph_data_t bdz_ph_data_t;
 7 | typedef struct __bdz_ph_config_data_t bdz_ph_config_data_t;
 8 | 
 9 | bdz_ph_config_data_t *bdz_ph_config_new(void);
10 | void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void bdz_ph_config_destroy(cmph_config_t *mph);
12 | cmph_t *bdz_ph_new(cmph_config_t *mph, double c);
13 | 
14 | void bdz_ph_load(FILE *f, cmph_t *mphf);
15 | int bdz_ph_dump(cmph_t *mphf, FILE *f);
16 | void bdz_ph_destroy(cmph_t *mphf);
17 | cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
18 | 
19 | /** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
20 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
21 |  *  \param mphf pointer to the resulting mphf
22 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
23 |  */
24 | void bdz_ph_pack(cmph_t *mphf, void *packed_mphf);
25 | 
26 | /** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
27 |  *  \brief Return the amount of space needed to pack mphf.
28 |  *  \param mphf pointer to a mphf
29 |  *  \return the size of the packed function or zero for failures
30 |  */ 
31 | cmph_uint32 bdz_ph_packed_size(cmph_t *mphf);
32 | 
33 | /** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
34 |  *  \brief Use the packed mphf to do a search. 
35 |  *  \param  packed_mphf pointer to the packed mphf
36 |  *  \param key key to be hashed
37 |  *  \param keylen key legth in bytes
38 |  *  \return The mphf value
39 |  */
40 | cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bdz_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BDZ_STRUCTS_H__
 2 | #define __CMPH_BDZ_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __bdz_data_t
 7 | {
 8 | 	cmph_uint32 m; //edges (words) count
 9 | 	cmph_uint32 n; //vertex count
10 | 	cmph_uint32 r; //partition vertex count
11 | 	cmph_uint8 *g;
12 | 	hash_state_t *hl; // linear hashing
13 | 
14 | 	cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
15 | 	cmph_uint8 b; // number of bits of k
16 | 	cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
17 | 	cmph_uint32 *ranktable; // rank table
18 | };
19 | 
20 | 
21 | struct __bdz_config_data_t
22 | {
23 | 	cmph_uint32 m; //edges (words) count
24 | 	cmph_uint32 n; //vertex count
25 | 	cmph_uint32 r; //partition vertex count
26 | 	cmph_uint8 *g;
27 | 	hash_state_t *hl; // linear hashing
28 | 
29 | 	cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$
30 | 	cmph_uint8 b; // number of bits of k
31 | 	cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$
32 | 	cmph_uint32 *ranktable; // rank table
33 | 	CMPH_HASH hashfunc;
34 | };
35 | 
36 | #endif
37 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bdz_structs_ph.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BDZ_STRUCTS_PH_H__
 2 | #define __CMPH_BDZ_STRUCTS_PH_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __bdz_ph_data_t
 7 | {
 8 | 	cmph_uint32 m; //edges (words) count
 9 | 	cmph_uint32 n; //vertex count
10 | 	cmph_uint32 r; //partition vertex count
11 | 	cmph_uint8 *g;
12 | 	hash_state_t *hl; // linear hashing
13 | };
14 | 
15 | 
16 | struct __bdz_ph_config_data_t
17 | {
18 | 	CMPH_HASH hashfunc;
19 | 	cmph_uint32 m; //edges (words) count
20 | 	cmph_uint32 n; //vertex count
21 | 	cmph_uint32 r; //partition vertex count
22 | 	cmph_uint8 *g;
23 | 	hash_state_t *hl; // linear hashing
24 | };
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bmz.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BMZ_H__
 2 | #define __CMPH_BMZ_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __bmz_data_t bmz_data_t;
 7 | typedef struct __bmz_config_data_t bmz_config_data_t;
 8 | 
 9 | bmz_config_data_t *bmz_config_new(void);
10 | void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void bmz_config_destroy(cmph_config_t *mph);
12 | cmph_t *bmz_new(cmph_config_t *mph, double c);
13 | 
14 | void bmz_load(FILE *f, cmph_t *mphf);
15 | int bmz_dump(cmph_t *mphf, FILE *f);
16 | void bmz_destroy(cmph_t *mphf);
17 | cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
18 | 
19 | /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf);
20 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
21 |  *  \param mphf pointer to the resulting mphf
22 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
23 |  */
24 | void bmz_pack(cmph_t *mphf, void *packed_mphf);
25 | 
26 | /** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf);
27 |  *  \brief Return the amount of space needed to pack mphf.
28 |  *  \param mphf pointer to a mphf
29 |  *  \return the size of the packed function or zero for failures
30 |  */ 
31 | cmph_uint32 bmz_packed_size(cmph_t *mphf);
32 | 
33 | /** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
34 |  *  \brief Use the packed mphf to do a search. 
35 |  *  \param  packed_mphf pointer to the packed mphf
36 |  *  \param key key to be hashed
37 |  *  \param keylen key legth in bytes
38 |  *  \return The mphf value
39 |  */
40 | cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bmz8.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BMZ8_H__
 2 | #define __CMPH_BMZ8_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __bmz8_data_t bmz8_data_t;
 7 | typedef struct __bmz8_config_data_t bmz8_config_data_t;
 8 | 
 9 | bmz8_config_data_t *bmz8_config_new(void);
10 | void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void bmz8_config_destroy(cmph_config_t *mph);
12 | cmph_t *bmz8_new(cmph_config_t *mph, double c);
13 | 
14 | void bmz8_load(FILE *f, cmph_t *mphf);
15 | int bmz8_dump(cmph_t *mphf, FILE *f);
16 | void bmz8_destroy(cmph_t *mphf);
17 | cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
18 | 
19 | /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf);
20 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
21 |  *  \param mphf pointer to the resulting mphf
22 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
23 |  */
24 | void bmz8_pack(cmph_t *mphf, void *packed_mphf);
25 | 
26 | /** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf);
27 |  *  \brief Return the amount of space needed to pack mphf.
28 |  *  \param mphf pointer to a mphf
29 |  *  \return the size of the packed function or zero for failures
30 |  */ 
31 | cmph_uint32 bmz8_packed_size(cmph_t *mphf);
32 | 
33 | /** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
34 |  *  \brief Use the packed mphf to do a search. 
35 |  *  \param  packed_mphf pointer to the packed mphf
36 |  *  \param key key to be hashed
37 |  *  \param keylen key legth in bytes
38 |  *  \return The mphf value
39 |  */
40 | cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bmz8_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BMZ8_STRUCTS_H__
 2 | #define __CMPH_BMZ8_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __bmz8_data_t
 7 | {
 8 | 	cmph_uint8 m; //edges (words) count
 9 | 	cmph_uint8 n; //vertex count
10 | 	cmph_uint8 *g;
11 | 	hash_state_t **hashes;
12 | };
13 | 
14 | 
15 | struct __bmz8_config_data_t
16 | {
17 | 	CMPH_HASH hashfuncs[2];
18 | 	cmph_uint8 m; //edges (words) count
19 | 	cmph_uint8 n; //vertex count
20 | 	graph_t *graph;
21 | 	cmph_uint8 *g;
22 | 	hash_state_t **hashes;
23 | };
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/bmz_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BMZ_STRUCTS_H__
 2 | #define __CMPH_BMZ_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __bmz_data_t
 7 | {
 8 | 	cmph_uint32 m; //edges (words) count
 9 | 	cmph_uint32 n; //vertex count
10 | 	cmph_uint32 *g;
11 | 	hash_state_t **hashes;
12 | };
13 | 
14 | 
15 | struct __bmz_config_data_t
16 | {
17 | 	CMPH_HASH hashfuncs[2];
18 | 	cmph_uint32 m; //edges (words) count
19 | 	cmph_uint32 n; //vertex count
20 | 	graph_t *graph;
21 | 	cmph_uint32 *g;
22 | 	hash_state_t **hashes;
23 | };
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/brz.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BRZ_H__
 2 | #define __CMPH_BRZ_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __brz_data_t brz_data_t;
 7 | typedef struct __brz_config_data_t brz_config_data_t;
 8 | 
 9 | brz_config_data_t *brz_config_new(void);
10 | void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir);
12 | void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd);
13 | void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b);
14 | void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo);
15 | void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability);
16 | void brz_config_destroy(cmph_config_t *mph);
17 | cmph_t *brz_new(cmph_config_t *mph, double c);
18 | 
19 | void brz_load(FILE *f, cmph_t *mphf);
20 | int brz_dump(cmph_t *mphf, FILE *f);
21 | void brz_destroy(cmph_t *mphf);
22 | cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
23 | 
24 | /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf);
25 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
26 |  *  \param mphf pointer to the resulting mphf
27 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
28 |  */
29 | void brz_pack(cmph_t *mphf, void *packed_mphf);
30 | 
31 | /** \fn cmph_uint32 brz_packed_size(cmph_t *mphf);
32 |  *  \brief Return the amount of space needed to pack mphf.
33 |  *  \param mphf pointer to a mphf
34 |  *  \return the size of the packed function or zero for failures
35 |  */ 
36 | cmph_uint32 brz_packed_size(cmph_t *mphf);
37 | 
38 | /** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
39 |  *  \brief Use the packed mphf to do a search. 
40 |  *  \param  packed_mphf pointer to the packed mphf
41 |  *  \param key key to be hashed
42 |  *  \param keylen key legth in bytes
43 |  *  \return The mphf value
44 |  */
45 | cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/brz_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BRZ_STRUCTS_H__
 2 | #define __CMPH_BRZ_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __brz_data_t
 7 | {
 8 | 	CMPH_ALGO algo;      // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
 9 | 	cmph_uint32 m;       // edges (words) count
10 | 	double c;      // constant c
11 | 	cmph_uint8  *size;   // size[i] stores the number of edges represented by g[i][...]. 
12 | 	cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
13 | 	cmph_uint8 **g;      // g function. 
14 | 	cmph_uint32 k;       // number of components
15 | 	hash_state_t **h1;
16 | 	hash_state_t **h2;
17 | 	hash_state_t * h0;
18 | };
19 | 
20 | struct __brz_config_data_t
21 | {
22 | 	CMPH_HASH hashfuncs[3];
23 | 	CMPH_ALGO algo;      // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8)
24 | 	double c;      // constant c
25 | 	cmph_uint32 m;       // edges (words) count
26 | 	cmph_uint8  *size;   // size[i] stores the number of edges represented by g[i][...]. 
27 | 	cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1].
28 | 	cmph_uint8 **g;      // g function. 
29 | 	cmph_uint8  b;       // parameter b. 
30 | 	cmph_uint32 k;       // number of components
31 | 	hash_state_t **h1;
32 | 	hash_state_t **h2;
33 | 	hash_state_t * h0;    
34 | 	cmph_uint32 memory_availability; 
35 | 	cmph_uint8 * tmp_dir; // temporary directory 
36 | 	FILE * mphf_fd; // mphf file
37 | };
38 | 
39 | #endif
40 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/buffer_entry.c:
--------------------------------------------------------------------------------
  1 | #include "buffer_entry.h"
  2 | #include <stdio.h>
  3 | #include <assert.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | 
  7 | struct __buffer_entry_t
  8 | {
  9 | 	FILE *fd;
 10 | 	cmph_uint8 * buff;
 11 | 	cmph_uint32 capacity, // buffer entry capacity
 12 | 		    nbytes,   // buffer entry used bytes
 13 | 		    pos;      // current read position in buffer entry
 14 | 	cmph_uint8  eof;      // flag to indicate end of file
 15 | };
 16 | 
 17 | buffer_entry_t * buffer_entry_new(cmph_uint32 capacity)
 18 | {
 19 | 	buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t));
 20 |         if (!buff_entry) return NULL;
 21 | 	buff_entry->fd = NULL;
 22 | 	buff_entry->buff = NULL;
 23 | 	buff_entry->capacity = capacity;
 24 | 	buff_entry->nbytes = capacity;
 25 | 	buff_entry->pos = capacity;
 26 |         buff_entry->eof = 0;
 27 | 	return buff_entry;
 28 | }
 29 | 
 30 | void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename)
 31 | {
 32 | 	buffer_entry->fd = fopen(filename, "rb");
 33 | }
 34 | 
 35 | void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity)
 36 | {
 37 | 	buffer_entry->capacity = capacity;
 38 | }
 39 | 
 40 | 
 41 | cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry)
 42 | {
 43 | 	return buffer_entry->capacity;
 44 | }
 45 | 
 46 | static void buffer_entry_load(buffer_entry_t * buffer_entry)
 47 | {
 48 | 	free(buffer_entry->buff);
 49 | 	buffer_entry->buff = (cmph_uint8 *)calloc((size_t)buffer_entry->capacity, sizeof(cmph_uint8));
 50 | 	buffer_entry->nbytes = (cmph_uint32)fread(buffer_entry->buff, (size_t)1, (size_t)buffer_entry->capacity, buffer_entry->fd);
 51 | 	if (buffer_entry->nbytes != buffer_entry->capacity) buffer_entry->eof = 1;
 52 | 	buffer_entry->pos = 0;
 53 | }
 54 | 
 55 | cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen)
 56 | {
 57 | 	cmph_uint8 * buf = NULL;
 58 | 	cmph_uint32 lacked_bytes = sizeof(*keylen);
 59 | 	cmph_uint32 copied_bytes = 0;
 60 | 	if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end
 61 | 	{
 62 | 		free(buf);
 63 | 		return NULL;
 64 | 	}
 65 | 	if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes)
 66 | 	{
 67 | 		copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
 68 | 		lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
 69 | 		if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
 70 | 		buffer_entry_load(buffer_entry);
 71 | 	}
 72 | 	memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
 73 | 	buffer_entry->pos += lacked_bytes;
 74 | 
 75 | 	lacked_bytes = *keylen;
 76 | 	copied_bytes = 0;
 77 | 	buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen));
 78 |         memcpy(buf, keylen, sizeof(*keylen));
 79 | 	if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) {
 80 | 		copied_bytes = buffer_entry->nbytes - buffer_entry->pos;
 81 | 		lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes;
 82 | 		if (copied_bytes != 0) {
 83 | 			memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes);
 84 |                 }
 85 | 		buffer_entry_load(buffer_entry);
 86 | 	}
 87 | 	memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes);
 88 | 	buffer_entry->pos += lacked_bytes;
 89 | 	return buf;
 90 | }
 91 | 
 92 | void buffer_entry_destroy(buffer_entry_t * buffer_entry)
 93 | {
 94 |   fclose(buffer_entry->fd);
 95 |   buffer_entry->fd = NULL;
 96 |   free(buffer_entry->buff);
 97 |   buffer_entry->buff = NULL;
 98 |   buffer_entry->capacity = 0;
 99 |   buffer_entry->nbytes = 0;
100 |   buffer_entry->pos = 0;
101 |   buffer_entry->eof = 0;
102 |   free(buffer_entry);
103 | }
104 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/buffer_entry.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BUFFER_ENTRY_H__
 2 | #define __CMPH_BUFFER_ENTRY_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | #include <stdio.h>
 6 | typedef struct __buffer_entry_t buffer_entry_t;
 7 | 
 8 | buffer_entry_t * buffer_entry_new(cmph_uint32 capacity);
 9 | void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity);
10 | cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry);
11 | void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename);
12 | cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen);
13 | void buffer_entry_destroy(buffer_entry_t * buffer_entry);
14 | #endif
15 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/buffer_manager.c:
--------------------------------------------------------------------------------
 1 | #include "buffer_manager.h"
 2 | #include "buffer_entry.h"
 3 | #include <stdio.h>
 4 | #include <assert.h>
 5 | #include <stdlib.h>
 6 | struct __buffer_manager_t
 7 | {
 8 | 	cmph_uint32 memory_avail;         // memory available
 9 | 	buffer_entry_t ** buffer_entries; // buffer entries to be managed
10 | 	cmph_uint32 nentries;             // number of entries to be managed
11 | 	cmph_uint32 *memory_avail_list;   // memory available list
12 | 	int pos_avail_list;               // current position in memory available list
13 | };
14 | 
15 | buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries)
16 | {
17 | 	cmph_uint32 memory_avail_entry, i;
18 | 	buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t));
19 |         if (!buff_manager) return NULL;
20 | 	buff_manager->memory_avail = memory_avail;
21 | 	buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *));
22 | 	buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32));
23 | 	buff_manager->pos_avail_list = -1;
24 | 	buff_manager->nentries = nentries;
25 | 	memory_avail_entry = buff_manager->memory_avail/buff_manager->nentries + 1;
26 | 	for(i = 0; i < buff_manager->nentries; i++)
27 | 	{
28 | 		buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry);
29 | 	}
30 | 	return buff_manager;
31 | }
32 | 
33 | void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename)
34 | {
35 | 	buffer_entry_open(buffer_manager->buffer_entries[index], filename);
36 | }
37 | 
38 | cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen)
39 | {
40 | 	cmph_uint8 * key = NULL;
41 | 	if (buffer_manager->pos_avail_list >= 0 ) // recovering memory
42 | 	{
43 | 		cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--];
44 | 		buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity);
45 | 	}
46 | 	key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen);
47 | 	if (key == NULL) // storing memory to be recovered
48 | 	{
49 | 		buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]);
50 | 	}
51 | 	return key;
52 | }
53 | 
54 | void buffer_manager_destroy(buffer_manager_t * buffer_manager)
55 | {
56 | 	cmph_uint32 i;
57 | 	for(i = 0; i < buffer_manager->nentries; i++)
58 | 	{
59 | 		buffer_entry_destroy(buffer_manager->buffer_entries[i]);
60 | 	}
61 | 	free(buffer_manager->memory_avail_list);
62 | 	free(buffer_manager->buffer_entries);
63 | 	free(buffer_manager);
64 | }
65 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/buffer_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BUFFER_MANAGE_H__
 2 | #define __CMPH_BUFFER_MANAGE_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | #include <stdio.h>
 6 | typedef struct __buffer_manager_t buffer_manager_t;
 7 | 
 8 | buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries);
 9 | void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename);
10 | cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen);
11 | void buffer_manager_destroy(buffer_manager_t * buffer_manager);
12 | #endif
13 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chd.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CMPH_CHD_H__
 2 | #define _CMPH_CHD_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __chd_data_t chd_data_t;
 7 | typedef struct __chd_config_data_t chd_config_data_t;
 8 | 
 9 | /* Config API */
10 | chd_config_data_t *chd_config_new(cmph_config_t * mph);
11 | void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
12 | 
13 | /** \fn void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
14 |  *  \brief Allows to set the number of keys per bin.
15 |  *  \param mph pointer to the configuration structure
16 |  *  \param keys_per_bin value for the number of keys per bin 
17 |  */
18 | void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
19 | 
20 | /** \fn void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
21 |  *  \brief Allows to set the number of keys per bucket.
22 |  *  \param mph pointer to the configuration structure
23 |  *  \param keys_per_bucket value for the number of keys per bucket 
24 |  */
25 | void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
26 | void chd_config_destroy(cmph_config_t *mph);
27 | 
28 | 
29 | /* Chd algorithm API */
30 | cmph_t *chd_new(cmph_config_t *mph, double c);
31 | void chd_load(FILE *fd, cmph_t *mphf);
32 | int chd_dump(cmph_t *mphf, FILE *fd);
33 | void chd_destroy(cmph_t *mphf);
34 | cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
35 | 
36 | /** \fn void chd_pack(cmph_t *mphf, void *packed_mphf);
37 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
38 |  *  \param mphf pointer to the resulting mphf
39 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
40 |  */
41 | void chd_pack(cmph_t *mphf, void *packed_mphf);
42 | 
43 | /** \fn cmph_uint32 chd_packed_size(cmph_t *mphf);
44 |  *  \brief Return the amount of space needed to pack mphf.
45 |  *  \param mphf pointer to a mphf
46 |  *  \return the size of the packed function or zero for failures
47 |  */ 
48 | cmph_uint32 chd_packed_size(cmph_t *mphf);
49 | 
50 | /** cmph_uint32 chd_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
51 |  *  \brief Use the packed mphf to do a search. 
52 |  *  \param  packed_mphf pointer to the packed mphf
53 |  *  \param key key to be hashed
54 |  *  \param keylen key legth in bytes
55 |  *  \return The mphf value
56 |  */
57 | cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chd_ph.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CMPH_CHD_PH_H__
 2 | #define _CMPH_CHD_PH_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __chd_ph_data_t chd_ph_data_t;
 7 | typedef struct __chd_ph_config_data_t chd_ph_config_data_t;
 8 | 
 9 | /* Config API */
10 | chd_ph_config_data_t *chd_ph_config_new(void);
11 | void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
12 | 
13 | /** \fn void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
14 |  *  \brief Allows to set the number of keys per bin.
15 |  *  \param mph pointer to the configuration structure
16 |  *  \param keys_per_bin value for the number of keys per bin 
17 |  */
18 | void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin);
19 | 
20 | /** \fn void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
21 |  *  \brief Allows to set the number of keys per bucket.
22 |  *  \param mph pointer to the configuration structure
23 |  *  \param keys_per_bucket value for the number of keys per bucket 
24 |  */
25 | void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket);
26 | void chd_ph_config_destroy(cmph_config_t *mph);
27 | 
28 | 
29 | /* Chd algorithm API */
30 | cmph_t *chd_ph_new(cmph_config_t *mph, double c);
31 | void chd_ph_load(FILE *fd, cmph_t *mphf);
32 | int chd_ph_dump(cmph_t *mphf, FILE *fd);
33 | void chd_ph_destroy(cmph_t *mphf);
34 | cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
35 | 
36 | /** \fn void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
37 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
38 |  *  \param mphf pointer to the resulting mphf
39 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
40 |  */
41 | void chd_ph_pack(cmph_t *mphf, void *packed_mphf);
42 | 
43 | /** \fn cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
44 |  *  \brief Return the amount of space needed to pack mphf.
45 |  *  \param mphf pointer to a mphf
46 |  *  \return the size of the packed function or zero for failures
47 |  */ 
48 | cmph_uint32 chd_ph_packed_size(cmph_t *mphf);
49 | 
50 | /** cmph_uint32 chd_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
51 |  *  \brief Use the packed mphf to do a search. 
52 |  *  \param  packed_mphf pointer to the packed mphf
53 |  *  \param key key to be hashed
54 |  *  \param keylen key legth in bytes
55 |  *  \return The mphf value
56 |  */
57 | cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
58 | 
59 | #endif
60 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chd_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_CHD_STRUCTS_H__
 2 | #define __CMPH_CHD_STRUCTS_H__
 3 | 
 4 | #include "chd_structs_ph.h"
 5 | #include "chd_ph.h"
 6 | #include "compressed_rank.h"
 7 | 
 8 | struct __chd_data_t
 9 | {
10 | 	cmph_uint32 packed_cr_size;
11 | 	cmph_uint8 * packed_cr; // packed compressed rank structure to control the number of zeros in a bit vector
12 | 	
13 | 	cmph_uint32 packed_chd_phf_size;
14 | 	cmph_uint8 * packed_chd_phf;
15 | };
16 | 
17 | struct __chd_config_data_t
18 | {	
19 | 	cmph_config_t *chd_ph;     // chd_ph algorithm must be used here
20 | };
21 | #endif
22 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chd_structs_ph.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_CHD_PH_STRUCTS_H__
 2 | #define __CMPH_CHD_PH_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | #include "compressed_seq.h"
 6 | 
 7 | struct __chd_ph_data_t
 8 | {
 9 | 	compressed_seq_t * cs;	// compressed displacement values
10 | 	cmph_uint32 nbuckets;	// number of buckets
11 | 	cmph_uint32 n;		// number of bins
12 | 	hash_state_t *hl;	// linear hash function
13 | };
14 | 
15 | struct __chd_ph_config_data_t
16 | {
17 | 	CMPH_HASH hashfunc;	// linear hash function to be used
18 | 	compressed_seq_t * cs;	// compressed displacement values
19 | 	cmph_uint32 nbuckets;	// number of buckets
20 | 	cmph_uint32 n;		// number of bins
21 | 	hash_state_t *hl;	// linear hash function
22 | 	
23 | 	cmph_uint32 m;		// number of keys
24 | 	cmph_uint8 use_h;	// flag to indicate the of use of a heuristic (use_h = 1)
25 | 	cmph_uint32 keys_per_bin;//maximum number of keys per bin 
26 | 	cmph_uint32 keys_per_bucket; // average number of keys per bucket
27 | 	cmph_uint8 *occup_table;     // table that indicates occupied positions	
28 | };
29 | #endif
30 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chm.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_CHM_H__
 2 | #define __CMPH_CHM_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __chm_data_t chm_data_t;
 7 | typedef struct __chm_config_data_t chm_config_data_t;
 8 | 
 9 | chm_config_data_t *chm_config_new(void);
10 | void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
11 | void chm_config_destroy(cmph_config_t *mph);
12 | cmph_t *chm_new(cmph_config_t *mph, double c);
13 | 
14 | void chm_load(FILE *f, cmph_t *mphf);
15 | int chm_dump(cmph_t *mphf, FILE *f);
16 | void chm_destroy(cmph_t *mphf);
17 | cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
18 | 
19 | /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf);
20 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
21 |  *  \param mphf pointer to the resulting mphf
22 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
23 |  */
24 | void chm_pack(cmph_t *mphf, void *packed_mphf);
25 | 
26 | /** \fn cmph_uint32 chm_packed_size(cmph_t *mphf);
27 |  *  \brief Return the amount of space needed to pack mphf.
28 |  *  \param mphf pointer to a mphf
29 |  *  \return the size of the packed function or zero for failures
30 |  */ 
31 | cmph_uint32 chm_packed_size(cmph_t *mphf);
32 | 
33 | /** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
34 |  *  \brief Use the packed mphf to do a search. 
35 |  *  \param  packed_mphf pointer to the packed mphf
36 |  *  \param key key to be hashed
37 |  *  \param keylen key legth in bytes
38 |  *  \return The mphf value
39 |  */
40 | cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/chm_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_CHM_STRUCTS_H__
 2 | #define __CMPH_CHM_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __chm_data_t
 7 | {
 8 | 	cmph_uint32 m; //edges (words) count
 9 | 	cmph_uint32 n; //vertex count
10 | 	cmph_uint32 *g;
11 | 	hash_state_t **hashes;
12 | };
13 | 
14 | struct __chm_config_data_t
15 | {
16 | 	CMPH_HASH hashfuncs[2];
17 | 	cmph_uint32 m; //edges (words) count
18 | 	cmph_uint32 n; //vertex count
19 | 	graph_t *graph;
20 | 	cmph_uint32 *g;
21 | 	hash_state_t **hashes;
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_benchmark.c:
--------------------------------------------------------------------------------
  1 | // A simple benchmark tool around getrusage
  2 | 
  3 | #include <assert.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <sys/resource.h>
  8 | 
  9 | #include "cmph_benchmark.h"
 10 | 
 11 | typedef struct {
 12 |   const char* name;
 13 |   void (*func)(int);
 14 |   int iters;
 15 |   struct rusage begin;
 16 |   struct rusage end;
 17 | } benchmark_t;
 18 | 
 19 | static benchmark_t* global_benchmarks = NULL;
 20 | 
 21 | /* Subtract the `struct timeval' values X and Y,
 22 |    storing the result in RESULT.
 23 |    Return 1 if the difference is negative, otherwise 0.  */
 24 | 
 25 | int timeval_subtract ( 
 26 |     struct timeval *result, struct timeval *x, struct timeval* y) {
 27 |   /* Perform the carry for the later subtraction by updating y. */
 28 |   if (x->tv_usec < y->tv_usec) {
 29 |     int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1;
 30 |     y->tv_usec -= 1000000 * nsec;
 31 |     y->tv_sec += nsec;
 32 |   }
 33 |   if (x->tv_usec - y->tv_usec > 1000000) {
 34 |     int nsec = (x->tv_usec - y->tv_usec) / 1000000;
 35 |     y->tv_usec += 1000000 * nsec;
 36 |     y->tv_sec -= nsec;
 37 |   }
 38 | 
 39 |   /* Compute the time remaining to wait.
 40 |      tv_usec is certainly positive. */
 41 |   result->tv_sec = x->tv_sec - y->tv_sec;
 42 |   result->tv_usec = x->tv_usec - y->tv_usec;
 43 | 
 44 |   /* Return 1 if result is negative. */
 45 |   return x->tv_sec < y->tv_sec;
 46 | }
 47 | 
 48 | benchmark_t* find_benchmark(const char* name) {
 49 |   benchmark_t* benchmark = global_benchmarks;
 50 |   while (benchmark && benchmark->name != NULL) {
 51 |     if (strcmp(benchmark->name, name) == 0) break;
 52 |     ++benchmark;
 53 |   }
 54 |   if (!benchmark || !benchmark->name) return NULL;
 55 |   return benchmark;
 56 | }
 57 | 
 58 | int global_benchmarks_length() {
 59 |   benchmark_t* benchmark = global_benchmarks;
 60 |   int length = 0;
 61 |   if (benchmark == NULL) return 0;
 62 |   while (benchmark->name != NULL) ++length, ++benchmark;
 63 |   return length;
 64 | }
 65 | 
 66 | void bm_register(const char* name, void (*func)(int), int iters) {
 67 |   benchmark_t benchmark;
 68 |   int length = global_benchmarks_length();
 69 |   benchmark.name = name;
 70 |   benchmark.func = func;
 71 |   benchmark.iters = iters;
 72 |   assert(!find_benchmark(name));
 73 |   global_benchmarks = (benchmark_t *)realloc(
 74 |       global_benchmarks, (length + 2)*sizeof(benchmark_t));
 75 |   global_benchmarks[length] = benchmark;
 76 |   memset(&benchmark, 0, sizeof(benchmark_t));  // pivot
 77 |   global_benchmarks[length + 1] = benchmark;
 78 | }
 79 | 
 80 | void bm_start(const char* name) {
 81 |   benchmark_t* benchmark;
 82 |   struct rusage rs;
 83 | 
 84 |   benchmark = find_benchmark(name);
 85 |   assert(benchmark);
 86 |   int ret = getrusage(RUSAGE_SELF, &rs);  
 87 |   if (ret != 0) {
 88 |     perror("rusage failed");    
 89 |     exit(-1);
 90 |   }
 91 |   benchmark->begin = rs;
 92 |   (*benchmark->func)(benchmark->iters);
 93 | }
 94 | 
 95 | void bm_end(const char* name) { 
 96 |   benchmark_t* benchmark;
 97 |   struct rusage rs;
 98 | 
 99 |   int ret = getrusage(RUSAGE_SELF, &rs);  
100 |   if (ret != 0) {
101 |     perror("rusage failed");    
102 |     exit(-1);
103 |   }
104 | 
105 |   benchmark = find_benchmark(name);
106 |   benchmark->end = rs;
107 | 
108 |   struct timeval utime;
109 |   timeval_subtract(&utime, &benchmark->end.ru_utime, &benchmark->begin.ru_utime);
110 |   struct timeval stime;
111 |   timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime);
112 |   
113 |   printf("Benchmark: %s\n", benchmark->name);
114 |   printf("User time used  : %ld.%06ld\n",
115 |          utime.tv_sec, (long int)utime.tv_usec);
116 |   printf("System time used: %ld.%06ld\n",
117 |          stime.tv_sec, (long int)stime.tv_usec);
118 |   printf("\n");
119 | }
120 |  
121 | void run_benchmarks(int argc, char** argv) {
122 |   benchmark_t* benchmark = global_benchmarks;
123 |   while (benchmark && benchmark->name != NULL) {
124 |     bm_start(benchmark->name);
125 |     bm_end(benchmark->name);
126 |     ++benchmark;
127 |   }
128 | }
129 | 
130 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_benchmark.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_BENCHMARK_H__
 2 | #define __CMPH_BENCHMARK_H__
 3 | 
 4 | #include <sys/time.h>
 5 | #include <sys/resource.h>
 6 | 
 7 | #ifdef __cplusplus
 8 | extern "C"
 9 | {
10 | #endif
11 |   
12 | #define BM_REGISTER(func, iters) bm_register(#func, func, iters)
13 | void bm_register(const char* name, void (*func)(int), int iters);
14 | void run_benchmarks(int argc, char** argv);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | 
20 | #endif  // __CMPH_BENCHMARK_H__
21 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_structs.c:
--------------------------------------------------------------------------------
 1 | #include "cmph_structs.h"
 2 | 
 3 | #include <string.h>
 4 | 
 5 | //#define DEBUG
 6 | #include "debug.h"
 7 | 
 8 | cmph_config_t *__config_new(cmph_io_adapter_t *key_source)
 9 | {
10 | 	cmph_config_t *mph = (cmph_config_t *)malloc(sizeof(cmph_config_t));
11 | 	if (mph == NULL) return NULL;
12 | 	memset(mph, 0, sizeof(cmph_config_t));
13 | 	mph->key_source = key_source;
14 | 	mph->verbosity = 0;
15 | 	mph->data = NULL;
16 | 	mph->c = 0;
17 | 	return mph;
18 | }
19 | 
20 | void __config_destroy(cmph_config_t *mph)
21 | {
22 | 	free(mph);
23 | }
24 | 
25 | void __cmph_dump(cmph_t *mphf, FILE *fd)
26 | {
27 | 	register size_t nbytes;
28 | 	nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd);
29 | 	nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd);
30 | }
31 | cmph_t *__cmph_load(FILE *f)
32 | {
33 | 	cmph_t *mphf = NULL;
34 | 	cmph_uint32 i;
35 | 	char algo_name[BUFSIZ];
36 | 	char *ptr = algo_name;
37 | 	CMPH_ALGO algo = CMPH_COUNT;
38 | 	register size_t nbytes;
39 | 
40 | 	DEBUGP("Loading mphf\n");
41 | 	while(1)
42 | 	{
43 | 		size_t c = fread(ptr, (size_t)1, (size_t)1, f);
44 | 		if (c != 1) return NULL;
45 | 		if (*ptr == 0) break;
46 | 		++ptr;
47 | 	}
48 | 	for(i = 0; i < CMPH_COUNT; ++i)
49 | 	{
50 | 		if (strcmp(algo_name, cmph_names[i]) == 0)
51 | 		{
52 | 			algo = (CMPH_ALGO)(i);
53 | 		}
54 | 	}
55 | 	if (algo == CMPH_COUNT)
56 | 	{
57 | 		DEBUGP("Algorithm %s not found\n", algo_name);
58 | 		return NULL;
59 | 	}
60 | 	mphf = (cmph_t *)malloc(sizeof(cmph_t));
61 | 	mphf->algo = algo;
62 | 	nbytes = fread(&(mphf->size), sizeof(mphf->size), (size_t)1, f);
63 | 	mphf->data = NULL;
64 | 	DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo],  mphf->size);
65 | 
66 | 	return mphf;
67 | }
68 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_STRUCTS_H__
 2 | #define __CMPH_STRUCTS_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | /** Hash generation algorithm data
 7 |   */
 8 | struct __config_t
 9 | {
10 |         CMPH_ALGO algo;
11 |         cmph_io_adapter_t *key_source;
12 |         cmph_uint32 verbosity;
13 |         double c;
14 |         void *data; // algorithm dependent data
15 | };
16 | 
17 | /** Hash querying algorithm data
18 |   */
19 | struct __cmph_t
20 | {
21 |         CMPH_ALGO algo;
22 |         cmph_uint32 size;
23 |         cmph_io_adapter_t *key_source;
24 |         void *data; // algorithm dependent data
25 | };
26 | 
27 | cmph_config_t *__config_new(cmph_io_adapter_t *key_source);
28 | void __config_destroy(cmph_config_t*);
29 | void __cmph_dump(cmph_t *mphf, FILE *);
30 | cmph_t *__cmph_load(FILE *f);
31 | 
32 | 
33 | #endif
34 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_time.h:
--------------------------------------------------------------------------------
 1 | #ifdef ELAPSED_TIME_IN_SECONDS
 2 | #undef ELAPSED_TIME_IN_SECONDS
 3 | #endif
 4 | 
 5 | #ifdef ELAPSED_TIME_IN_uSECONDS
 6 | #undef ELAPSED_TIME_IN_uSECONDS
 7 | #endif
 8 | 
 9 | #ifdef WIN32
10 | // include headers to use gettimeofday
11 | #else
12 | 	#ifdef __GNUC__
13 | 	#include <sys/time.h>
14 | 	#include <sys/resource.h>
15 | 	#endif
16 | #endif
17 | 
18 | #ifdef __GNUC__
19 | 	#ifndef __CMPH_TIME_H__
20 | 		#define __CMPH_TIME_H__
21 | 		static inline void elapsed_time_in_seconds(double * elapsed_time)
22 | 		{
23 | 			struct timeval e_time;
24 | 			if (gettimeofday(&e_time, NULL) < 0) {
25 | 				return;
26 | 			}
27 | 			*elapsed_time =  (double)e_time.tv_sec + ((double)e_time.tv_usec/1000000.0);
28 | 		}
29 | 		static inline void dummy_elapsed_time_in_seconds()
30 | 		{
31 | 		}
32 | 		static inline void elapsed_time_in_useconds(cmph_uint64 * elapsed_time)
33 | 		{
34 | 			struct timeval e_time;
35 | 			if (gettimeofday(&e_time, NULL) < 0) {
36 | 				return;
37 | 			}
38 | 			*elapsed_time =  (cmph_uint64)(e_time.tv_sec*1000000 + e_time.tv_usec);
39 | 		}
40 | 		static inline void dummy_elapsed_time_in_useconds()
41 | 		{
42 | 		}	
43 | 	#endif
44 | #endif
45 | 
46 | #ifdef CMPH_TIMING
47 | 	  #ifdef __GNUC__
48 | 		  #define ELAPSED_TIME_IN_SECONDS elapsed_time_in_seconds
49 | 		  #define ELAPSED_TIME_IN_uSECONDS elapsed_time_in_useconds
50 | 	  #else
51 | 		  #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
52 | 		  #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
53 | 	  #endif
54 | #else
55 | 	  #ifdef __GNUC__
56 | 		  #define ELAPSED_TIME_IN_SECONDS
57 | 		  #define ELAPSED_TIME_IN_uSECONDS
58 | 	  #else
59 | 		  #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds
60 | 		  #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds
61 | 	  #endif
62 | #endif
63 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/cmph_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_TYPES_H__
 2 | #define __CMPH_TYPES_H__
 3 | 
 4 | typedef char cmph_int8;
 5 | typedef unsigned char cmph_uint8;
 6 | 
 7 | typedef short cmph_int16;
 8 | typedef unsigned short cmph_uint16;
 9 | 
10 | typedef int cmph_int32;
11 | typedef unsigned int cmph_uint32;
12 | 
13 | #if defined(__ia64) || defined(__x86_64__)
14 |   /** \typedef long cmph_int64;
15 |    *  \brief 64-bit integer for a 64-bit achitecture.
16 |    */
17 |   typedef long cmph_int64;
18 | 
19 |   /** \typedef unsigned long cmph_uint64;
20 |    *  \brief Unsigned 64-bit integer for a 64-bit achitecture.
21 |    */
22 |   typedef unsigned long cmph_uint64;
23 | #else
24 |   /** \typedef long long cmph_int64;
25 |    *  \brief 64-bit integer for a 32-bit achitecture.
26 |    */
27 |   typedef long long cmph_int64;
28 | 
29 |   /** \typedef unsigned long long cmph_uint64;
30 |    *  \brief Unsigned 64-bit integer for a 32-bit achitecture.
31 |    */
32 |   typedef unsigned long long cmph_uint64;
33 | #endif
34 | 
35 | typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH;
36 | extern const char *cmph_hash_names[];
37 | typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH,
38 |                CMPH_BDZ, CMPH_BDZ_PH,
39 |                CMPH_CHD_PH, CMPH_CHD, CMPH_COUNT } CMPH_ALGO;
40 | extern const char *cmph_names[];
41 | 
42 | #endif
43 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/compressed_rank.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_COMPRESSED_RANK_H__
 2 | #define __CMPH_COMPRESSED_RANK_H__
 3 | 
 4 | #include "select.h"
 5 | 
 6 | struct _compressed_rank_t
 7 | {
 8 | 	cmph_uint32 max_val;
 9 | 	cmph_uint32 n; // number of values stored in vals_rems
10 | 	// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
11 | 	// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
12 | 	cmph_uint32 rem_r;
13 | 	select_t sel;
14 | 	cmph_uint32 * vals_rems;
15 | };
16 | 
17 | typedef struct _compressed_rank_t compressed_rank_t;
18 | 
19 | void compressed_rank_init(compressed_rank_t * cr);
20 | 
21 | void compressed_rank_destroy(compressed_rank_t * cr);
22 |  
23 | void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n);
24 | 
25 | cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx);
26 | 
27 | cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr);
28 | 
29 | void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen);
30 | 
31 | void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen);
32 | 
33 | 
34 | /** \fn void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
35 |  *  \brief Support the ability to pack a compressed_rank structure into a preallocated contiguous memory space pointed by cr_packed.
36 |  *  \param cr points to the compressed_rank structure
37 |  *  \param cr_packed pointer to the contiguous memory area used to store the compressed_rank structure. The size of cr_packed must be at least @see compressed_rank_packed_size 
38 |  */
39 | void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed);
40 | 
41 | /** \fn cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
42 |  *  \brief Return the amount of space needed to pack a compressed_rank structure.
43 |  *  \return the size of the packed compressed_rank structure or zero for failures
44 |  */ 
45 | cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr);
46 | 
47 | 
48 | /** \fn cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
49 |  *  \param cr_packed is a pointer to a contiguous memory area
50 |  *  \param idx is an index to compute the rank
51 |  *  \return an integer that represents the compressed_rank value.
52 |  */
53 | cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx);
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/compressed_seq.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_COMPRESSED_SEQ_H__
 2 | #define __CMPH_COMPRESSED_SEQ_H__
 3 | 
 4 | #include"select.h"
 5 | 
 6 | struct _compressed_seq_t
 7 | {
 8 | 	cmph_uint32 n; // number of values stored in store_table
 9 | 	// The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure
10 | 	// the remaining LSBs are stored in a table of n cells, each one of rem_r bits.
11 | 	cmph_uint32 rem_r;
12 | 	cmph_uint32 total_length; // total length in bits of stored_table
13 | 	select_t sel;
14 | 	cmph_uint32 * length_rems;
15 | 	cmph_uint32 * store_table;
16 | };
17 | 
18 | typedef struct _compressed_seq_t compressed_seq_t;
19 | 
20 | /** \fn void compressed_seq_init(compressed_seq_t * cs);
21 |  *  \brief Initialize a compressed sequence structure.
22 |  *  \param cs points to the compressed sequence structure to be initialized
23 |  */
24 | void compressed_seq_init(compressed_seq_t * cs);
25 | 
26 | /** \fn void compressed_seq_destroy(compressed_seq_t * cs);
27 |  *  \brief Destroy a compressed sequence given as input.
28 |  *  \param cs points to the compressed sequence structure to be destroyed
29 |  */
30 | void compressed_seq_destroy(compressed_seq_t * cs);
31 | 
32 | /** \fn void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
33 |  *  \brief Generate a compressed sequence from an input array with n values.
34 |  *  \param cs points to the compressed sequence structure
35 |  *  \param vals_table poiter to the array given as input
36 |  *  \param n number of values in @see vals_table
37 |  */
38 | void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n);
39 | 
40 | 
41 | /** \fn cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
42 |  *  \brief Returns the value stored at index @see idx of the compressed sequence structure.
43 |  *  \param cs points to the compressed sequence structure
44 |  *  \param idx index to retrieve the value from
45 |  *  \return the value stored at index @see idx of the compressed sequence structure
46 |  */
47 | cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx);
48 | 
49 | 
50 | /** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
51 |  *  \brief Returns amount of space (in bits) to store the compressed sequence.
52 |  *  \param cs points to the compressed sequence structure
53 |  *  \return the amount of space (in bits) to store @see cs
54 |  */
55 | cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs);
56 | 
57 | void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen);
58 | 
59 | void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen);
60 | 
61 | 
62 | /** \fn void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
63 |  *  \brief Support the ability to pack a compressed sequence structure into a preallocated contiguous memory space pointed by cs_packed.
64 |  *  \param cs points to the compressed sequence structure
65 |  *  \param cs_packed pointer to the contiguous memory area used to store the compressed sequence structure. The size of cs_packed must be at least @see compressed_seq_packed_size 
66 |  */
67 | void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed);
68 | 
69 | /** \fn cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
70 |  *  \brief Return the amount of space needed to pack a compressed sequence structure.
71 |  *  \return the size of the packed compressed sequence structure or zero for failures
72 |  */ 
73 | cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs);
74 | 
75 | 
76 | /** \fn cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
77 |  *  \brief Returns the value stored at index @see idx of the packed compressed sequence structure.
78 |  *  \param cs_packed is a pointer to a contiguous memory area
79 |  *  \param idx is the index to retrieve the value from
80 |  *  \return the value stored at index @see idx of the packed compressed sequence structure
81 |  */
82 | cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx);
83 | 
84 | #endif
85 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/debug.h:
--------------------------------------------------------------------------------
 1 | #ifdef DEBUGP
 2 | #undef DEBUGP
 3 | #endif
 4 | 
 5 | #ifdef __cplusplus
 6 | #include <cstdio>
 7 | #ifdef WIN32
 8 | #include <cstring>
 9 | #endif
10 | #else
11 | #include <stdio.h>
12 | #ifdef WIN32
13 | #include <string.h>
14 | #endif
15 | #endif
16 | 
17 | #ifndef __GNUC__
18 | #ifndef __DEBUG_H__
19 | #define __DEBUG_H__
20 | #include <stdarg.h>
21 | static void debugprintf(const char *format, ...)
22 | {
23 |     va_list ap;
24 | 	char *f = NULL;
25 | 	const char *p="%s:%d ";
26 | 	size_t plen = strlen(p);
27 |     va_start(ap, format);
28 | 	f = (char *)malloc(plen + strlen(format) + 1);
29 | 	if (!f) return;
30 | 	memcpy(f, p, plen);
31 | 	memcpy(f + plen, format, strlen(format) + 1);
32 |     vfprintf(stderr, f, ap);
33 |     va_end(ap);
34 | 	free(f);
35 | }
36 | static void dummyprintf(const char *format, ...)
37 | {}
38 | #endif
39 | #endif
40 | 
41 | #ifdef DEBUG
42 | #ifndef __GNUC__
43 | #define DEBUGP debugprintf
44 | #else
45 | #define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0)
46 | #endif
47 | #else
48 | #ifndef __GNUC__
49 | #define DEBUGP dummyprintf
50 | #else
51 | #define DEBUGP(args...)
52 | #endif
53 | #endif
54 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/fch.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_FCH_H__
 2 | #define __CMPH_FCH_H__
 3 | 
 4 | #include "cmph.h"
 5 | 
 6 | typedef struct __fch_data_t fch_data_t;
 7 | typedef struct __fch_config_data_t fch_config_data_t;
 8 | 
 9 | /* Parameters calculation */
10 | cmph_uint32 fch_calc_b(double c, cmph_uint32 m);
11 | double fch_calc_p1(cmph_uint32 m);
12 | double fch_calc_p2(cmph_uint32 b);
13 | cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index);
14 | 
15 | fch_config_data_t *fch_config_new(void);
16 | void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs);
17 | void fch_config_destroy(cmph_config_t *mph);
18 | cmph_t *fch_new(cmph_config_t *mph, double c);
19 | 
20 | void fch_load(FILE *f, cmph_t *mphf);
21 | int fch_dump(cmph_t *mphf, FILE *f);
22 | void fch_destroy(cmph_t *mphf);
23 | cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen);
24 | 
25 | /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf);
26 |  *  \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf.
27 |  *  \param mphf pointer to the resulting mphf
28 |  *  \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 
29 |  */
30 | void fch_pack(cmph_t *mphf, void *packed_mphf);
31 | 
32 | /** \fn cmph_uint32 fch_packed_size(cmph_t *mphf);
33 |  *  \brief Return the amount of space needed to pack mphf.
34 |  *  \param mphf pointer to a mphf
35 |  *  \return the size of the packed function or zero for failures
36 |  */ 
37 | cmph_uint32 fch_packed_size(cmph_t *mphf);
38 | 
39 | /** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen);
40 |  *  \brief Use the packed mphf to do a search. 
41 |  *  \param  packed_mphf pointer to the packed mphf
42 |  *  \param key key to be hashed
43 |  *  \param keylen key legth in bytes
44 |  *  \return The mphf value
45 |  */
46 | cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen);
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/fch_buckets.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_FCH_BUCKETS_H__
 2 | #define __CMPH_FCH_BUCKETS_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | typedef struct __fch_buckets_t fch_buckets_t;
 6 | 
 7 | fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets);
 8 | 
 9 | cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index);
10 | 
11 | void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length);
12 | 
13 | cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index);
14 | 
15 | char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
16 | 
17 | cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key);
18 | 
19 | // returns the size of biggest bucket.
20 | cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets);
21 | 
22 | // returns the number of buckets.
23 | cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets);
24 | 
25 | cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets);
26 | 
27 | void fch_buckets_print(fch_buckets_t * buckets);
28 | 
29 | void fch_buckets_destroy(fch_buckets_t * buckets);
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/fch_structs.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_FCH_STRUCTS_H__
 2 | #define __CMPH_FCH_STRUCTS_H__
 3 | 
 4 | #include "hash_state.h"
 5 | 
 6 | struct __fch_data_t
 7 | {
 8 | 	cmph_uint32 m;       // words count
 9 | 	double c;      // constant c
10 | 	cmph_uint32  b;      // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored 
11 | 	double p1;     // constant p1 = ceil(0.6*m). Don't need to be stored 
12 | 	double p2;     // constant p2 = ceil(0.3*b). Don't need to be stored 
13 | 	cmph_uint32 *g;      // g function. 
14 | 	hash_state_t *h1;    // h10 function. 
15 | 	hash_state_t *h2;    // h20 function.
16 | };
17 | 
18 | struct __fch_config_data_t
19 | {
20 | 	CMPH_HASH hashfuncs[2];
21 | 	cmph_uint32 m;       // words count
22 | 	double c;      // constant c
23 | 	cmph_uint32  b;      // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored 
24 | 	double p1;     // constant p1 = ceil(0.6*m). Don't need to be stored 
25 | 	double p2;     // constant p2 = ceil(0.3*b). Don't need to be stored 
26 | 	cmph_uint32 *g;      // g function. 
27 | 	hash_state_t *h1;    // h10 function. 
28 | 	hash_state_t *h2;    // h20 function.
29 | };
30 | #endif
31 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/graph.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CMPH_GRAPH_H__
 2 | #define _CMPH_GRAPH_H__
 3 | 
 4 | #include <limits.h>
 5 | #include "cmph_types.h"
 6 | 
 7 | #define GRAPH_NO_NEIGHBOR UINT_MAX
 8 | 
 9 | typedef struct __graph_t graph_t;
10 | typedef struct __graph_iterator_t graph_iterator_t;
11 | struct __graph_iterator_t
12 | {
13 | 	cmph_uint32 vertex;
14 | 	cmph_uint32 edge;
15 | };
16 | 
17 | 
18 | 
19 | graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges);
20 | void graph_destroy(graph_t *graph);
21 | 
22 | void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
23 | void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
24 | void graph_clear_edges(graph_t *g);
25 | cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
26 | cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2);
27 | 
28 | graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v);
29 | cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it);
30 | 
31 | void graph_obtain_critical_nodes(graph_t *g);            /* included -- Fabiano*/
32 | cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v);     /* included -- Fabiano */
33 | cmph_uint32 graph_ncritical_nodes(graph_t *g);                /* included -- Fabiano*/
34 | cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id); /* included -- Fabiano*/
35 | 
36 | int graph_is_cyclic(graph_t *g);
37 | 
38 | void graph_print(graph_t *);
39 | 
40 | #endif
41 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_HASH_H__
 2 | #define __CMPH_HASH_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | 
 6 | typedef union __hash_state_t hash_state_t;
 7 | 
 8 | hash_state_t *hash_state_new(CMPH_HASH, cmph_uint32 hashsize);
 9 | 
10 | /** \fn cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
11 |  *  \param state is a pointer to a hash_state_t structure
12 |  *  \param key is a pointer to a key
13 |  *  \param keylen is the key length
14 |  *  \return an integer that represents a hash value of 32 bits.
15 |  */
16 | cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen);
17 | 
18 | /** \fn void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
19 |  *  \param state is a pointer to a hash_state_t structure
20 |  *  \param key is a pointer to a key
21 |  *  \param keylen is the key length
22 |  *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
23 |  */
24 | void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes);
25 | 
26 | void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen);
27 | 
28 | hash_state_t * hash_state_copy(hash_state_t *src_state);
29 | 
30 | hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen);
31 | 
32 | void hash_state_destroy(hash_state_t *state);
33 | 
34 | /** \fn void hash_state_pack(hash_state_t *state, void *hash_packed);
35 |  *  \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
36 |  *  \param state points to the hash function
37 |  *  \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size()
38 |  *  
39 |  * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed.
40 |  * However, the hash function type must be packed outside.
41 |  */
42 | void hash_state_pack(hash_state_t *state, void *hash_packed);
43 | 
44 | /** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
45 |  *  \param hash_packed is a pointer to a contiguous memory area
46 |  *  \param hashfunc is the type of the hash function packed in hash_packed
47 |  *  \param key is a pointer to a key
48 |  *  \param keylen is the key length
49 |  *  \return an integer that represents a hash value of 32 bits.
50 |  */
51 | cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen);
52 | 
53 | /** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc)
54 |  *  \brief Return the amount of space needed to pack a hash function.
55 |  *  \param hashfunc function type
56 |  *  \return the size of the packed function or zero for failures
57 |  */ 
58 | cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc);
59 | 
60 | 
61 | /** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
62 |  *  \param hash_packed is a pointer to a contiguous memory area
63 |  *  \param key is a pointer to a key
64 |  *  \param keylen is the key length
65 |  *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
66 |  */
67 | void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
68 | 
69 | 
70 | /** \fn CMPH_HASH hash_get_type(hash_state_t *state);
71 |  *  \param state is a pointer to a hash_state_t structure
72 |  *  \return the hash function type pointed by state
73 |  */
74 | CMPH_HASH hash_get_type(hash_state_t *state);
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/hash_state.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HASH_STATE_H__
 2 | #define __HASH_STATE_H__
 3 | 
 4 | #include "hash.h"
 5 | #include "jenkins_hash.h"
 6 | union __hash_state_t
 7 | {
 8 | 	CMPH_HASH hashfunc;
 9 | 	jenkins_state_t jenkins;
10 | };
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/jenkins_hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef __JEKINS_HASH_H__
 2 | #define __JEKINS_HASH_H__
 3 | 
 4 | #include "hash.h"
 5 | 
 6 | typedef struct __jenkins_state_t
 7 | {
 8 | 	CMPH_HASH hashfunc;
 9 | 	cmph_uint32 seed;
10 | } jenkins_state_t;
11 | 	
12 | jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table
13 | 
14 | /** \fn cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
15 |  *  \param state is a pointer to a jenkins_state_t structure
16 |  *  \param key is a pointer to a key
17 |  *  \param keylen is the key length
18 |  *  \return an integer that represents a hash value of 32 bits.
19 |  */
20 | cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen);
21 | 
22 | /** \fn void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
23 |  *  \param state is a pointer to a jenkins_state_t structure
24 |  *  \param key is a pointer to a key
25 |  *  \param keylen is the key length
26 |  *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
27 |  */
28 | void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
29 | 
30 | void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen);
31 | jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state);
32 | jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen);
33 | void jenkins_state_destroy(jenkins_state_t *state);
34 | 
35 | /** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
36 |  *  \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed.
37 |  *  \param state points to the jenkins function
38 |  *  \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() 
39 |  */
40 | void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed);
41 | 
42 | /** \fn cmph_uint32 jenkins_state_packed_size();
43 |  *  \brief Return the amount of space needed to pack a jenkins function.
44 |  *  \return the size of the packed function or zero for failures
45 |  */ 
46 | cmph_uint32 jenkins_state_packed_size(void);
47 | 
48 | 
49 | /** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
50 |  *  \param jenkins_packed is a pointer to a contiguous memory area
51 |  *  \param key is a pointer to a key
52 |  *  \param keylen is the key length
53 |  *  \return an integer that represents a hash value of 32 bits.
54 |  */
55 | cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen);
56 | 
57 | /** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
58 |  *  \param jenkins_packed is a pointer to a contiguous memory area
59 |  *  \param key is a pointer to a key
60 |  *  \param keylen is the key length
61 |  *  \param hashes is a pointer to a memory large enough to fit three 32-bit integers.
62 |  */
63 | void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes);
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/linear_string_map.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <stdio.h>
 3 | #include <string.h>
 4 | 
 5 | #include "linear_string_map.h"
 6 | 
 7 | struct __linear_string_map_t {
 8 |   const char *key;
 9 |   void *value;
10 |   struct __linear_string_map_t* next;
11 | };
12 | 
13 | lsmap_t *lsmap_new() {
14 |   lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t));
15 |   if (!lsmap) return NULL;
16 |   lsmap->key = "dummy node";
17 |   lsmap->next = NULL;
18 |   return lsmap;
19 | }
20 | 
21 | int lsmap_size(lsmap_t *lsmap) {
22 |   int size = 0;
23 |   while (lsmap->next != NULL) ++size;
24 |   return size;
25 | }
26 | 
27 | void lsmap_append(lsmap_t *lsmap, const char *key, void *value) {
28 |   while (lsmap->next != NULL) lsmap = lsmap->next;
29 |   lsmap->next = (lsmap_t*)malloc(sizeof(lsmap_t));
30 |   lsmap->key = key;
31 |   lsmap->value = value;
32 |   lsmap = lsmap->next;
33 |   lsmap->key = "dummy node";
34 |   lsmap->next = NULL;
35 | }
36 | 
37 | void* lsmap_search(lsmap_t *lsmap, const char *key) {
38 |   while (lsmap->next != NULL) {
39 |     if (strcmp(lsmap->key, key) == 0) {
40 |       return lsmap->value;
41 |     }
42 |     lsmap = lsmap->next;
43 |   }
44 |   return NULL;
45 | }
46 | 
47 | void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) {
48 |   while (lsmap->next != NULL) {
49 |     f(lsmap->key);
50 |     lsmap = lsmap->next;
51 |   }
52 | }
53 | 
54 | void lsmap_foreach_value(lsmap_t *lsmap, void (*f)(void*)) {
55 |   while (lsmap->next != NULL) {
56 |     f(lsmap->value);
57 |     lsmap = lsmap->next;
58 |   }
59 | }
60 | 
61 | void lsmap_destroy(lsmap_t *lsmap) {
62 |   while (lsmap->next != NULL) {
63 |     lsmap_t* freeme = lsmap;
64 |     lsmap = lsmap->next;
65 |     free(freeme);
66 |   }
67 |   free(lsmap);
68 | }
69 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/linear_string_map.h:
--------------------------------------------------------------------------------
 1 | // A simple linked list based dynamic sized associative map from const char* to
 2 | // void*. Designed to maximize ease of use instead of performance. Should be
 3 | // used in benchmarks and tests only, not to be distributed with the cmph
 4 | // runtime headers.
 5 | 
 6 | typedef struct __linear_string_map_t lsmap_t;
 7 | 
 8 | lsmap_t *lsmap_new();
 9 | void lsmap_append(lsmap_t *lsmap, const char *key, void *value);
10 | void* lsmap_search(lsmap_t *lsmap, const char *key);
11 | void lsmap_foreach_key(lsmap_t* lsmap, void (*f)(const char*));
12 | void lsmap_foreach_value(lsmap_t* lsmap, void (*f)(void*));
13 | void lsmap_destroy(lsmap_t* lsmap);
14 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/miller_rabin.c:
--------------------------------------------------------------------------------
 1 | #include "miller_rabin.h"
 2 | 
 3 | static inline cmph_uint64 int_pow(cmph_uint64 a, cmph_uint64 d, cmph_uint64 n)
 4 | {
 5 | 	cmph_uint64 a_pow = a;
 6 | 	cmph_uint64 res = 1;
 7 | 	while(d > 0)
 8 | 	{
 9 | 		if((d & 1) == 1)
10 | 			res =(((cmph_uint64)res) * a_pow) % n;
11 | 		a_pow = (((cmph_uint64)a_pow) * a_pow) % n;
12 | 		d /= 2;
13 | 	};
14 | 	return res;
15 | };
16 | 
17 | static inline cmph_uint8 check_witness(cmph_uint64 a_exp_d, cmph_uint64 n, cmph_uint64 s)
18 | {
19 | 	cmph_uint64 i;
20 | 	cmph_uint64 a_exp = a_exp_d;
21 | 	if(a_exp == 1 || a_exp == (n - 1))
22 | 		return 1;
23 | 	for(i = 1; i < s; i++)
24 | 	{
25 | 		a_exp = (((cmph_uint64)a_exp) * a_exp) % n;
26 | 		if(a_exp == (n - 1))
27 | 			return 1;
28 | 	};
29 | 	return 0;
30 | };
31 | 
32 | cmph_uint8 check_primality(cmph_uint64 n)
33 | {
34 | 	cmph_uint64 a, d, s, a_exp_d;
35 | 	if((n % 2) == 0)
36 | 		return 0;
37 | 	if((n % 3) == 0)
38 | 		return 0;
39 | 	if((n % 5) == 0)
40 | 		return 0;
41 | 	if((n % 7 ) == 0)
42 | 		return 0;
43 | 	//we decompoe the number n - 1 into 2^s*d
44 | 	s = 0;
45 | 	d = n - 1;
46 | 	do 
47 | 	{
48 | 		s++;
49 | 		d /= 2;
50 | 	}while((d % 2) == 0);
51 | 
52 | 	a = 2;
53 | 	a_exp_d = int_pow(a, d, n);
54 | 	if(check_witness(a_exp_d, n, s) == 0)
55 | 		return 0;
56 | 	a = 7;
57 | 	a_exp_d = int_pow(a, d, n);
58 | 	if(check_witness(a_exp_d, n, s) == 0)
59 | 		return 0;
60 | 	a = 61;
61 | 	a_exp_d = int_pow(a, d, n);
62 | 	if(check_witness(a_exp_d, n, s) == 0)
63 | 		return 0;
64 | 	return 1;
65 | };
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/miller_rabin.h:
--------------------------------------------------------------------------------
1 | #ifndef _CMPH_MILLER_RABIN_H__ 
2 | #define _CMPH_MILLER_RABIN_H__ 
3 | #include "cmph_types.h"
4 | cmph_uint8 check_primality(cmph_uint64 n);
5 | #endif
6 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/select.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_SELECT_H__
 2 | #define __CMPH_SELECT_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | 
 6 | struct _select_t
 7 | {
 8 | 	cmph_uint32 n,m;
 9 | 	cmph_uint32 * bits_vec;
10 | 	cmph_uint32 * select_table;
11 | };
12 | 
13 | typedef struct _select_t select_t;
14 | 
15 | void select_init(select_t * sel);
16 | 
17 | void select_destroy(select_t * sel);
18 |  
19 | void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m);
20 | 
21 | cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx);
22 | 
23 | cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx);
24 | 
25 | cmph_uint32 select_get_space_usage(select_t * sel);
26 | 
27 | void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen);
28 | 
29 | void select_load(select_t * sel, const char *buf, cmph_uint32 buflen);
30 | 
31 | 
32 | /** \fn void select_pack(select_t *sel, void *sel_packed);
33 |  *  \brief Support the ability to pack a select structure into a preallocated contiguous memory space pointed by sel_packed.
34 |  *  \param sel points to the select structure
35 |  *  \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size 
36 |  */
37 | void select_pack(select_t *sel, void *sel_packed);
38 | 
39 | /** \fn cmph_uint32 select_packed_size(select_t *sel);
40 |  *  \brief Return the amount of space needed to pack a select structure.
41 |  *  \return the size of the packed select structure or zero for failures
42 |  */ 
43 | cmph_uint32 select_packed_size(select_t *sel);
44 | 
45 | 
46 | /** \fn cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
47 |  *  \param sel_packed is a pointer to a contiguous memory area
48 |  *  \param one_idx is the rank for which we want to calculate the inverse function select
49 |  *  \return an integer that represents the select value of rank idx.
50 |  */
51 | cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx);
52 | 
53 | 
54 | /** \fn cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
55 |  *  \param sel_packed is a pointer to a contiguous memory area
56 |  *  \param vec_bit_idx is a value prior computed by @see select_query_packed
57 |  *  \return an integer that represents the next select value greater than @see vec_bit_idx.
58 |  */
59 | cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx);
60 | 
61 | #endif
62 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/vqueue.c:
--------------------------------------------------------------------------------
 1 | #include "vqueue.h"
 2 | #include <stdio.h>
 3 | #include <assert.h>
 4 | #include <stdlib.h>
 5 | struct __vqueue_t
 6 | {
 7 |   cmph_uint32 * values;
 8 |   cmph_uint32 beg, end, capacity;
 9 | };
10 | 
11 | vqueue_t * vqueue_new(cmph_uint32 capacity)
12 | {
13 |   size_t capacity_plus_one = capacity + 1;
14 |   vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t));
15 |   if (!q) return NULL;
16 |   q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32));
17 |   q->beg = q->end = 0;
18 |   q->capacity = (cmph_uint32) capacity_plus_one;
19 |   return q;
20 | }
21 | 
22 | cmph_uint8 vqueue_is_empty(vqueue_t * q)
23 | {
24 |   return (cmph_uint8)(q->beg == q->end);
25 | }
26 | 
27 | void vqueue_insert(vqueue_t * q, cmph_uint32 val)
28 | {
29 |   assert((q->end + 1)%q->capacity != q->beg); // Is queue full?
30 |   q->end = (q->end + 1)%q->capacity;
31 |   q->values[q->end] = val;
32 | }
33 | 
34 | cmph_uint32 vqueue_remove(vqueue_t * q)
35 | {
36 |   assert(!vqueue_is_empty(q)); // Is queue empty?
37 |   q->beg = (q->beg + 1)%q->capacity;
38 |   return q->values[q->beg];
39 | }
40 | 
41 | void vqueue_print(vqueue_t * q)
42 | {
43 |   cmph_uint32 i;
44 |   for (i = q->beg; i != q->end; i = (i + 1)%q->capacity)
45 |     fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]);
46 | }
47 | 
48 | void vqueue_destroy(vqueue_t *q)
49 | {
50 |   free(q->values); q->values = NULL; free(q);
51 | }
52 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/vqueue.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_VQUEUE_H__
 2 | #define __CMPH_VQUEUE_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | typedef struct __vqueue_t vqueue_t;
 6 | 
 7 | vqueue_t * vqueue_new(cmph_uint32 capacity);
 8 | 
 9 | cmph_uint8 vqueue_is_empty(vqueue_t * q);
10 | 
11 | void vqueue_insert(vqueue_t * q, cmph_uint32 val);
12 | 
13 | cmph_uint32 vqueue_remove(vqueue_t * q);
14 | 
15 | void vqueue_print(vqueue_t * q);
16 | 
17 | void vqueue_destroy(vqueue_t * q);
18 | #endif
19 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/vstack.c:
--------------------------------------------------------------------------------
 1 | #include "vstack.h"
 2 | 
 3 | #include <stdlib.h>
 4 | #include <assert.h>
 5 | 
 6 | //#define DEBUG
 7 | #include "debug.h"
 8 | 
 9 | struct __vstack_t
10 | {
11 | 	cmph_uint32 pointer;
12 | 	cmph_uint32 *values;
13 | 	cmph_uint32 capacity;
14 | };
15 | 
16 | vstack_t *vstack_new(void)
17 | {
18 | 	vstack_t *stack = (vstack_t *)malloc(sizeof(vstack_t));
19 | 	assert(stack);
20 | 	stack->pointer = 0;
21 | 	stack->values = NULL;
22 | 	stack->capacity = 0;
23 | 	return stack;
24 | }
25 | 
26 | void vstack_destroy(vstack_t *stack)
27 | {
28 | 	assert(stack);
29 | 	free(stack->values);
30 | 	free(stack);
31 | }
32 | 
33 | void vstack_push(vstack_t *stack, cmph_uint32 val)
34 | {
35 | 	assert(stack);
36 | 	vstack_reserve(stack, stack->pointer + 1);
37 | 	stack->values[stack->pointer] = val;
38 | 	++(stack->pointer);
39 | }
40 | void vstack_pop(vstack_t *stack)
41 | {
42 | 	assert(stack);
43 | 	assert(stack->pointer > 0);
44 | 	--(stack->pointer);
45 | }
46 | 
47 | cmph_uint32 vstack_top(vstack_t *stack)
48 | {
49 | 	assert(stack);
50 | 	assert(stack->pointer > 0);
51 | 	return stack->values[(stack->pointer - 1)];
52 | }
53 | int vstack_empty(vstack_t *stack)
54 | {
55 | 	assert(stack);
56 | 	return stack->pointer == 0;
57 | }
58 | cmph_uint32 vstack_size(vstack_t *stack)
59 | {
60 | 	return stack->pointer;
61 | }
62 | void vstack_reserve(vstack_t *stack, cmph_uint32 size)
63 | {
64 | 	assert(stack);
65 | 	if (stack->capacity < size)
66 | 	{
67 | 		cmph_uint32 new_capacity = stack->capacity + 1;
68 | 		DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size);
69 | 		while (new_capacity	< size)
70 | 		{
71 | 			new_capacity *= 2;
72 | 		}
73 | 		stack->values = (cmph_uint32 *)realloc(stack->values, sizeof(cmph_uint32)*new_capacity);
74 | 		assert(stack->values);
75 | 		stack->capacity = new_capacity;
76 | 		DEBUGP("Increased\n");
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/vstack.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CMPH_VSTACK_H__
 2 | #define __CMPH_VSTACK_H__
 3 | 
 4 | #include "cmph_types.h"
 5 | typedef struct __vstack_t vstack_t;
 6 | 
 7 | vstack_t *vstack_new();
 8 | void vstack_destroy(vstack_t *stack);
 9 | 
10 | void vstack_push(vstack_t *stack, cmph_uint32 val);
11 | cmph_uint32 vstack_top(vstack_t *stack);
12 | void vstack_pop(vstack_t *stack);
13 | int vstack_empty(vstack_t *stack);
14 | cmph_uint32 vstack_size(vstack_t *stack);
15 | 
16 | void vstack_reserve(vstack_t *stack, cmph_uint32 size);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/cmph-2.0/src/wingetopt.h:
--------------------------------------------------------------------------------
 1 | #ifdef __cplusplus
 2 | extern "C" {
 3 | #endif
 4 | 
 5 | #ifndef WIN32
 6 | 	#include <getopt.h>
 7 | #else
 8 | 	#ifndef _GETOPT_
 9 | 	#define _GETOPT_
10 | 
11 | 	#include <stdio.h>                  /* for EOF */ 
12 | 	#include <string.h>                 /* for strchr() */ 
13 | 
14 | 	char *optarg = NULL;    /* pointer to the start of the option argument  */ 
15 | 	int   optind = 1;       /* number of the next argv[] to be evaluated    */ 
16 | 	int   opterr = 1;       /* non-zero if a question mark should be returned */
17 | 
18 | 	int getopt(int argc, char *argv[], char *opstring); 
19 | 	#endif //_GETOPT_
20 | #endif //WIN32
21 | 
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/Makefile.am:
--------------------------------------------------------------------------------
 1 | TESTS = $(check_PROGRAMS)
 2 | check_PROGRAMS = graph_tests select_tests compressed_seq_tests compressed_rank_tests cmph_benchmark_test
 3 | noinst_PROGRAMS = packed_mphf_tests mphf_tests
 4 | 
 5 | INCLUDES = -I../src/
 6 | 
 7 | graph_tests_SOURCES = graph_tests.c
 8 | graph_tests_LDADD = ../src/libcmph.la
 9 | 
10 | packed_mphf_tests_SOURCES = packed_mphf_tests.c
11 | packed_mphf_tests_LDADD = ../src/libcmph.la
12 | 
13 | mphf_tests_SOURCES = mphf_tests.c
14 | mphf_tests_LDADD = ../src/libcmph.la
15 | 
16 | select_tests_SOURCES = select_tests.c
17 | select_tests_LDADD = ../src/libcmph.la
18 | 
19 | compressed_seq_tests_SOURCES = compressed_seq_tests.c
20 | compressed_seq_tests_LDADD = ../src/libcmph.la
21 | 
22 | compressed_rank_tests_SOURCES = compressed_rank_tests.c
23 | compressed_rank_tests_LDADD = ../src/libcmph.la
24 | 
25 | cmph_benchmark_test_SOURCES = cmph_benchmark_test.c
26 | cmph_benchmark_test_LDADD = ../src/libcmph.la
27 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/cmph_benchmark_test.c:
--------------------------------------------------------------------------------
 1 | #include <unistd.h>  // for sleep
 2 | #include <limits.h>
 3 | 
 4 | #include "cmph_benchmark.h"
 5 | 
 6 | void bm_sleep(int iters) {
 7 |   sleep(1);
 8 | }
 9 | 
10 | void bm_increment(int iters) {
11 |   int i, v = 0;
12 |   for (i = 0; i < INT_MAX; ++i) {
13 |     v += i;
14 |   }
15 | }
16 | 
17 | int main(int argc, char** argv) {
18 |   BM_REGISTER(bm_sleep, 1);
19 |   BM_REGISTER(bm_increment, 1);
20 |   run_benchmarks(argc, argv);
21 |   return 0;
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/compressed_rank_tests.c:
--------------------------------------------------------------------------------
 1 | #include "../src/compressed_rank.h"
 2 | 
 3 | #define DEBUG
 4 | #include "../src/debug.h"
 5 | #include <stdlib.h>
 6 | 
 7 | static inline void print_values(compressed_rank_t * cr, cmph_uint32 idx)
 8 | {
 9 | 	register cmph_uint32 index;
10 | 	
11 | 	index = compressed_rank_query(cr, idx);
12 | 	fprintf(stderr, "Index[%u]\t= %u\n", idx, index);
13 | }
14 | 
15 | 
16 | static inline void print_values_packed(char * cr_packed, cmph_uint32 idx)
17 | {
18 | 	register cmph_uint32 index;
19 | 	
20 | 	index = compressed_rank_query_packed(cr_packed, idx);
21 | 	fprintf(stderr, "Index[%u]\t= %u\n", idx, index);
22 | }
23 | 
24 | /*
25 | n = 20
26 | Indices:         0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19
27 | vector[]      = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1}
28 | nzeros = 12
29 | zeroIndices[] = {0, 1, 2, 5, 7, 9, 11, 12, 13, 16, 17, 18}
30 | */
31 | int main(int argc, char **argv)
32 | {
33 | 	compressed_rank_t cr;
34 | 	cmph_uint32 i = 0;
35 | 	cmph_uint32 n = 12;
36 | 	cmph_uint32 nIndices = 20;
37 | 	cmph_uint32 keys_vec[] = {0, 1, 2, 5, 7, 9, 11, 12, 13, 16, 17, 18};
38 | 	char *buf = NULL;
39 | 	cmph_uint32 buflen = 0;
40 | 	char * cr_packed = NULL;
41 | 	cmph_uint32 cr_pack_size = 0;
42 | 	
43 | 	compressed_rank_init(&cr);
44 | 	compressed_rank_generate(&cr, keys_vec, n);
45 | 	fprintf(stderr, "Space usage = %u\n", compressed_rank_get_space_usage(&cr));
46 | 	for(i = 0; i < nIndices; i++)
47 | 	{
48 | 		print_values(&cr, i);
49 | 	}
50 | 	
51 | 	fprintf(stderr, "Dumping compressed rank structure\n");
52 | 	compressed_rank_dump(&cr, &buf, &buflen);
53 | 	
54 | 	compressed_rank_destroy(&cr);
55 | 	fprintf(stderr, "Loading compressed rank structure\n");
56 | 	
57 | 	compressed_rank_load(&cr, buf, buflen);
58 | 	for(i = 0; i < nIndices; i++)
59 | 	{
60 | 		print_values(&cr, i);
61 | 	}
62 | 	free(buf);
63 | 	
64 | 	cr_pack_size = compressed_rank_packed_size(&cr);
65 | 	
66 | 	cr_packed = (char *) calloc(cr_pack_size, sizeof(char));
67 | 	compressed_rank_pack(&cr, cr_packed);
68 | 	compressed_rank_destroy(&cr);
69 | 	
70 | 	fprintf(stderr, "Querying the packed compressed rank structure\n");
71 | 	for(i = 0; i < nIndices; i++)
72 | 	{
73 | 		print_values_packed(cr_packed, i);
74 | 	}
75 | 	
76 | 	free(cr_packed);
77 | 	return 0;
78 | }
79 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/compressed_seq_tests.c:
--------------------------------------------------------------------------------
 1 | #include "../src/compressed_seq.h"
 2 | 
 3 | #define DEBUG
 4 | #include "../src/debug.h"
 5 | #include <stdlib.h>
 6 | 
 7 | static inline void print_values(compressed_seq_t * cs, cmph_uint32 idx)
 8 | {
 9 | 	register cmph_uint32 index;
10 | 	
11 | 	index = compressed_seq_query(cs, idx);
12 | 	fprintf(stderr, "Index[%u]\t= %u\n", idx, index);
13 | }
14 | 
15 | 
16 | static inline void print_values_packed(char * cs_packed, cmph_uint32 idx)
17 | {
18 | 	register cmph_uint32 index;
19 | 	
20 | 	index = compressed_seq_query_packed(cs_packed, idx);
21 | 	fprintf(stderr, "Index[%u]\t= %u\n", idx, index);
22 | }
23 | 
24 | int main(int argc, char **argv)
25 | {
26 | 	compressed_seq_t cs;
27 | 	cmph_uint32 i = 0;
28 | 	cmph_uint32 n = 20;
29 | 	cmph_uint32 keys_vec[] = {  0,   1,   1,   1,   2,   2,   2,   3,   5,   5,
30 | 	                            6,   6,   9,   9,   9,  12,  12,  13,  17,  1077};
31 | 	char *buf = NULL;
32 | 	cmph_uint32 buflen = 0;
33 | 	char * cs_packed = NULL;
34 | 	cmph_uint32 cs_pack_size = 0;
35 | 	
36 | 	compressed_seq_init(&cs);
37 | 	compressed_seq_generate(&cs, keys_vec, n);
38 | 	fprintf(stderr, "Space usage = %u\n", compressed_seq_get_space_usage(&cs));
39 | 	for(i = 0; i < n; i++)
40 | 	{
41 | 		print_values(&cs, i);
42 | 	}
43 | 	
44 | 	fprintf(stderr, "Dumping compressed seq structure\n");
45 | 	compressed_seq_dump(&cs, &buf, &buflen);
46 | 	
47 | 	compressed_seq_destroy(&cs);
48 | 	fprintf(stderr, "Loading compressed seq structure\n");
49 | 	
50 | 	compressed_seq_load(&cs, buf, buflen);
51 | 	for(i = 0; i < n; i++)
52 | 	{
53 | 		print_values(&cs, i);
54 | 	}
55 | 	free(buf);
56 | 	
57 | 	cs_pack_size = compressed_seq_packed_size(&cs);
58 | 	
59 | 	cs_packed = (char *) calloc(cs_pack_size, sizeof(char));
60 | 	compressed_seq_pack(&cs, cs_packed);
61 | 	compressed_seq_destroy(&cs);
62 | 	
63 | 	fprintf(stderr, "Querying the packed compressed seq structure\n");
64 | 	for(i = 0; i < n; i++)
65 | 	{
66 | 		print_values_packed(cs_packed, i);
67 | 	}
68 | 	
69 | 	free(cs_packed);
70 | 	return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/graph_tests.c:
--------------------------------------------------------------------------------
 1 | #include "../src/graph.h"
 2 | 
 3 | #define DEBUG
 4 | #include "../src/debug.h"
 5 | 
 6 | int main(int argc, char **argv)
 7 | {
 8 | 	graph_iterator_t it;
 9 | 	cmph_uint32 i, neighbor;
10 | 	graph_t *g = graph_new(5, 10);
11 | 
12 | 	fprintf(stderr, "Building random graph\n");
13 | 	for (i = 0; i < 10; ++i)
14 | 	{
15 | 		cmph_uint32 v1 = i % 5;
16 | 		cmph_uint32 v2 = (i*2) % 5;
17 | 		if (v1 == v2) continue;
18 | 		graph_add_edge(g, v1, v2);
19 | 		DEBUGP("Added edge %u %u\n", v1, v2);
20 | 	}
21 | 	graph_print(g);
22 | 	graph_del_edge(g, 4, 3);
23 | 	graph_print(g);
24 | 	graph_clear_edges(g);
25 | 	graph_print(g);
26 | 	graph_destroy(g);
27 | 
28 | 	fprintf(stderr, "Building cyclic graph\n");
29 | 	g = graph_new(4, 5);
30 | 	graph_add_edge(g, 0, 3);
31 | 	graph_add_edge(g, 0, 1);
32 | 	graph_add_edge(g, 1, 2);
33 | 	graph_add_edge(g, 2, 0);
34 | 	if (!graph_is_cyclic(g))
35 | 	{
36 | 		return 1;
37 | 	}
38 | 	graph_destroy(g);
39 | 
40 | 	fprintf(stderr, "Building non-cyclic graph\n");
41 | 	g = graph_new(5, 4);
42 | 	graph_add_edge(g, 0, 1);
43 | 	graph_add_edge(g, 1, 2);
44 | 	graph_add_edge(g, 2, 3);
45 | 	graph_add_edge(g, 3, 4);
46 | 
47 | 	if (graph_is_cyclic(g))
48 | 	{
49 | 		return 1;
50 | 	}
51 | 
52 | 	fprintf(stderr, "Checking neighbors iterator\n");
53 | 	it = graph_neighbors_it(g, 1);
54 | 	neighbor = graph_next_neighbor(g, &it);
55 | 	DEBUGP("Neighbor is %u\n", neighbor);
56 | 	if (neighbor != 0 && neighbor != 2) return 1;
57 | 	neighbor = graph_next_neighbor(g, &it);
58 | 	DEBUGP("Neighbor is %u\n", neighbor);
59 | 	if (neighbor != 0 && neighbor != 2) return 1;
60 | 	neighbor = graph_next_neighbor(g, &it);
61 | 	DEBUGP("Neighbor is %u\n", neighbor);
62 | 	if (neighbor != GRAPH_NO_NEIGHBOR) return 1;
63 | 
64 | 
65 | 	graph_destroy(g);
66 | 	return 0;
67 | }
68 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/mphf_tests.c:
--------------------------------------------------------------------------------
  1 | #ifdef WIN32
  2 | #include "../wingetopt.h"
  3 | #else
  4 | #include <getopt.h>
  5 | #endif
  6 | #include <stdio.h>
  7 | #include <stdlib.h>
  8 | #include <errno.h>
  9 | #include <string.h>
 10 | #include <time.h>
 11 | #include <limits.h>
 12 | #include <assert.h>
 13 | #include <cmph.h>
 14 | 
 15 | #ifdef WIN32
 16 | #define VERSION "0.8"
 17 | #else
 18 | #include "config.h"
 19 | #endif
 20 | 
 21 | 
 22 | void usage(const char *prg)
 23 | {
 24 | 	fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph]  keysfile\n", prg);   
 25 | }
 26 | void usage_long(const char *prg)
 27 | {
 28 | 	fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg);   
 29 | 	fprintf(stderr, "Packed MPHFs testing tool\n\n"); 
 30 | 	fprintf(stderr, "  -h\t print this help message\n");
 31 | 	fprintf(stderr, "  -V\t print version number and exit\n");
 32 | 	fprintf(stderr, "  -v\t increase verbosity (may be used multiple times)\n");
 33 | 	fprintf(stderr, "  -k\t number of keys\n");
 34 | 	fprintf(stderr, "  -m\t minimum perfect hash function file \n");
 35 | 	fprintf(stderr, "  keysfile\t line separated file with keys\n");
 36 | }
 37 | 
 38 | int main(int argc, char **argv)
 39 | {
 40 | 	char verbosity = 0;
 41 | 	char *mphf_file = NULL;
 42 | 	const char *keys_file = NULL;
 43 | 	FILE *mphf_fd = stdout;
 44 | 	FILE *keys_fd;
 45 | 	cmph_uint32 nkeys = UINT_MAX;
 46 | 	cmph_uint32 i = 0;
 47 | 	cmph_t *mphf = NULL;
 48 | 	cmph_io_adapter_t *source;
 49 | 	while (1)
 50 | 	{
 51 | 		char ch = (char)getopt(argc, argv, "hVvk:m:");
 52 | 		if (ch == -1) break;
 53 | 		switch (ch)
 54 | 		{
 55 | 			case 'k':
 56 | 			        {
 57 | 					char *endptr;
 58 | 					nkeys = (cmph_uint32) strtoul(optarg, &endptr, 10);
 59 | 					if(*endptr != 0) {
 60 | 						fprintf(stderr, "Invalid number of keys %s\n", optarg);
 61 | 						exit(1);
 62 | 					}
 63 | 				}
 64 | 				break;
 65 | 			case 'm':
 66 | 				mphf_file = strdup(optarg);
 67 | 				break;
 68 | 			case 'v':
 69 | 				++verbosity;
 70 | 				break;
 71 | 			case 'V':
 72 | 				printf("%s\n", VERSION);
 73 | 				return 0;
 74 | 			case 'h':
 75 | 				usage_long(argv[0]);
 76 | 				return 0;
 77 | 			default:
 78 | 				usage(argv[0]);
 79 | 				return 1;
 80 | 		}
 81 | 	}
 82 | 
 83 | 	if (optind != argc - 1)
 84 | 	{
 85 | 		usage(argv[0]);
 86 | 		return 1;
 87 | 	}
 88 | 	keys_file = argv[optind];
 89 |   
 90 | 	int ret = 0;
 91 | 	if (mphf_file == NULL)
 92 | 	{
 93 | 		mphf_file = (char *)malloc(strlen(keys_file) + 5);
 94 | 		memcpy(mphf_file, keys_file, strlen(keys_file));
 95 | 		memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5);
 96 | 	}	
 97 | 
 98 | 	keys_fd = fopen(keys_file, "r");
 99 | 
100 | 	if (keys_fd == NULL)
101 | 	{
102 | 		fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno));
103 | 		return -1;
104 | 	}
105 | 
106 | 	if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd);
107 | 	else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys);
108 | 
109 | 	cmph_uint8 * hashtable = NULL;
110 | 	mphf_fd = fopen(mphf_file, "r");
111 | 	if (mphf_fd == NULL)
112 | 	{
113 | 		fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno));
114 | 		free(mphf_file);
115 | 		return -1;
116 | 	}
117 | 	mphf = cmph_load(mphf_fd);
118 | 	fclose(mphf_fd);
119 | 	if (!mphf)
120 | 	{
121 | 		fprintf(stderr, "Unable to parser input file %s\n", mphf_file);
122 | 		free(mphf_file);
123 | 		return -1;
124 | 	}
125 | 	cmph_uint32 siz = cmph_size(mphf);
126 | 	hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8));
127 | 	memset(hashtable, 0, (size_t)siz);
128 | 	//check all keys
129 | 	for (i = 0; i < source->nkeys; ++i)
130 | 	{
131 | 		cmph_uint32 h;
132 | 		char *buf;
133 | 		cmph_uint32 buflen = 0;
134 | 		source->read(source->data, &buf, &buflen);
135 | 		h = cmph_search(mphf, buf, buflen);
136 | 		if (!(h < siz))
137 | 		{
138 | 			fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf);
139 | 			ret = 1;
140 | 		} else if(hashtable[h])
141 | 		{
142 | 			fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf);
143 | 			ret = 1;
144 | 		} else hashtable[h] = 1;
145 | 
146 | 		if (verbosity)
147 | 		{
148 | 			printf("%s -> %u\n", buf, h);
149 | 		}
150 | 		source->dispose(source->data, buf, buflen);
151 | 	}
152 | 		
153 | 	cmph_destroy(mphf);
154 | 	free(hashtable);
155 | 
156 | 	fclose(keys_fd);
157 | 	free(mphf_file);
158 |     cmph_io_nlfile_adapter_destroy(source);
159 | 	return ret;
160 |   
161 | }
162 | 


--------------------------------------------------------------------------------
/cmph-2.0/tests/select_tests.c:
--------------------------------------------------------------------------------
 1 | #include "../src/select.h"
 2 | 
 3 | #define DEBUG
 4 | #include "../src/debug.h"
 5 | #include <stdlib.h>
 6 | 
 7 | static inline void print_values(select_t * sel)
 8 | {
 9 | 	register cmph_uint32 index;
10 | 	
11 | 	index = select_query(sel, 0);
12 | 	fprintf(stderr, "Index[0]\t= %u\n", index - 0);
13 | 
14 | 	index = select_next_query(sel, index);
15 | 	fprintf(stderr, "Next Index\t= %u\n", index);
16 | 
17 | 	index = select_query(sel, 1);
18 | 	fprintf(stderr, "Index[1]\t= %u\n", index - 1);
19 | 
20 | 	index = select_next_query(sel, index);
21 | 	fprintf(stderr, "Next Index\t= %u\n", index);
22 | 
23 | 	index = select_query(sel, 2);
24 | 	fprintf(stderr, "Index[2]\t= %u\n", index - 2);
25 | 
26 | 	index = select_next_query(sel, index);
27 | 	fprintf(stderr, "Next Index\t= %u\n", index);
28 | 
29 | 	index = select_query(sel, 3);
30 | 	fprintf(stderr, "Index[3]\t= %u\n", index - 3);
31 | }
32 | 
33 | 
34 | static inline void print_values_packed(char * sel_packed)
35 | {
36 | 	register cmph_uint32 index;
37 | 	
38 | 	index = select_query_packed(sel_packed, 0);
39 | 	fprintf(stderr, "Index[0]\t= %u\n", index - 0);
40 | 
41 | 	index = select_next_query_packed(sel_packed, index);
42 | 	fprintf(stderr, "Next Index\t= %u\n", index);
43 | 
44 | 	index = select_query_packed(sel_packed, 1);
45 | 	fprintf(stderr, "Index[1]\t= %u\n", index - 1);
46 | 
47 | 	index = select_next_query_packed(sel_packed, index);
48 | 	fprintf(stderr, "Next Index\t= %u\n", index);
49 | 
50 | 	index = select_query_packed(sel_packed, 2);
51 | 	fprintf(stderr, "Index[2]\t= %u\n", index - 2);
52 | 
53 | 	index = select_next_query_packed(sel_packed, index);
54 | 	fprintf(stderr, "Next Index\t= %u\n", index);
55 | 
56 | 	index = select_query_packed(sel_packed, 3);
57 | 	fprintf(stderr, "Index[3]\t= %u\n", index - 3);
58 | }
59 | 
60 | int main(int argc, char **argv)
61 | {
62 | 	select_t sel;
63 | 	cmph_uint32 n = 4;
64 | 	cmph_uint32 keys_vec[4] = {0,1,2,3}; 
65 | 	cmph_uint32 m = keys_vec[3];
66 | 	char *buf = NULL;
67 | 	cmph_uint32 buflen = 0;
68 | 	char * select_packed = NULL;
69 | 	cmph_uint32 select_pack_size = 0;
70 | 	
71 | 	select_init(&sel);
72 | 	select_generate(&sel, keys_vec, n, m);
73 | 	fprintf(stderr, "Space usage = %u\n", select_get_space_usage(&sel));
74 | 	print_values(&sel);
75 | 	
76 | 	fprintf(stderr, "Dumping select structure\n");
77 | 	select_dump(&sel, &buf, &buflen);
78 | 	
79 | 	select_destroy(&sel);
80 | 	fprintf(stderr, "Loading select structure\n");
81 | 	
82 | 	select_load(&sel, buf, buflen);
83 | 	print_values(&sel);
84 | 	free(buf);;
85 | 	
86 | 	select_pack_size = select_packed_size(&sel);
87 | 	
88 | 	select_packed = (char *) calloc(select_pack_size, sizeof(char));
89 | 	select_pack(&sel, select_packed);
90 | 	select_destroy(&sel);
91 | 	
92 | 	fprintf(stderr, "Querying the packed select structure\n");
93 | 	print_values_packed(select_packed);
94 | 	
95 | 	free(select_packed);
96 | 	return 0;
97 | }
98 | 


--------------------------------------------------------------------------------
/examples/all-bench.sh:
--------------------------------------------------------------------------------
 1 | #/bin/sh
 2 | p=${p:-perl}
 3 | 
 4 | make clean
 5 | $p Makefile.PL && make -s
 6 | #cd bob; git checkout Makefile; cd ..
 7 | g=`git describe --long --tags --dirty --always`
 8 | 
 9 | $p -Mblib examples/bench.pl -size 127              | tee log.bench-$g-127
10 | $p -Mblib examples/bench.pl -size 500              | tee log.bench-$g-500
11 | $p -Mblib examples/bench.pl -size 2000             | tee log.bench-$g-2000
12 | $p -Mblib examples/bench.pl -size 10000 -nul -1opt | tee log.bench-$g-10000
13 | $p -Mblib examples/bench.pl -size 25000 -nul -1opt | tee log.bench-$g-25000
14 | $p -Mblib examples/bench.pl -nul -pic -1opt        | tee log.bench-$g
15 | 


--------------------------------------------------------------------------------
/examples/words20:
--------------------------------------------------------------------------------
 1 | A
 2 | A's
 3 | AA's
 4 | AB's
 5 | ABM's
 6 | AC's
 7 | ACTH's
 8 | AI's
 9 | AIDS's
10 | AM's
11 | AOL
12 | AOL's
13 | ASCII's
14 | ASL's
15 | ATM's
16 | ATP's
17 | AWOL's
18 | AZ's
19 | AZT's
20 | Aachen
21 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH.pm:
--------------------------------------------------------------------------------
  1 | package Perfect::Hash::CMPH;
  2 | 
  3 | use strict;
  4 | our $VERSION = '0.01';
  5 | #use warnings;
  6 | our @ISA = qw(Perfect::Hash Perfect::Hash::C);
  7 | use B ();
  8 | use Config;
  9 | 
 10 | use XSLoader;
 11 | XSLoader::load('Perfect::Hash::CMPH');
 12 | 
 13 | =head1 DESCRIPTION
 14 | 
 15 | XS interface to the cmph library, the current state of the art library
 16 | for perfect hashes and minimal perfect hashes.
 17 | 
 18 | L<http://cmph.sourceforge.net>
 19 | 
 20 | =head1 METHODS
 21 | 
 22 | =over
 23 | 
 24 | =item new $filename, @options
 25 | 
 26 | filename only so far
 27 | 
 28 | Honored options are: I<-nul>
 29 | 
 30 | =cut
 31 | 
 32 | # TODO support arrayref and hashref converted to arrayrefs, as byte-packed vector
 33 | # for the cmph io_vector or io_byte_vector adapter.
 34 | sub new {
 35 |   my $class = shift or die;
 36 |   my $dict = shift; #hashref, arrayref or filename
 37 |   my $size;
 38 |   # enforce KEYFILE
 39 |   my $fn = "pperf_keys.tmp";
 40 |   if (ref $dict eq 'ARRAY') {
 41 |     open my $F, ">", $fn;
 42 |     my $i = 0;
 43 |     my %dict;
 44 |     $size = scalar @$dict;
 45 |     for (@$dict) {
 46 |       print $F "$_\n";
 47 |       $dict{$_} = $i++;
 48 |     }
 49 |     close $F;
 50 |     $dict = \%dict;
 51 |   }
 52 |   elsif (ref $dict eq 'HASH') {
 53 |     open my $F, ">", $fn;
 54 |     for (sort keys %$dict) {
 55 |       print $F $_,"\t",$dict->{$_},"\n";
 56 |     }
 57 |     #print $F "%%";
 58 |     close $F;
 59 |     $size = scalar keys %$dict;
 60 |   } elsif (!ref $dict and ! -e $dict) {
 61 |     die "wrong dict argument. arrayref, hashref or filename expected";
 62 |   } else {
 63 |     $fn = $dict;
 64 |     # against -false-positive
 65 |     my %hash;
 66 |     open my $d, "<", $dict or die; {
 67 |       local $/;
 68 |       my $i = 0;
 69 |       %hash = map {$_ => $i++ } split /\n/, <$d>;
 70 |     }
 71 |     close $d;
 72 |     $dict = \%hash;
 73 |     $size = scalar keys %hash;
 74 |   }
 75 |   my $ph = _new($class, $fn, @_);
 76 |   if (grep /^-false-positives/, @_) {
 77 |     push @$ph, $dict; # at [3]
 78 |   }
 79 |   $ph->[2]->{size} = $size;
 80 |   return $ph;
 81 | }
 82 | 
 83 | =item perfecthash $ph
 84 | 
 85 | XS method. Returns the position of the found key in the file.
 86 | 
 87 | =item false_positives
 88 | 
 89 | =item option $ph
 90 | 
 91 | Access the option hash in $ph.
 92 | 
 93 | =cut
 94 | 
 95 | sub option {
 96 |   return $_[0]->[2]->{$_[1]};
 97 | }
 98 | 
 99 | =item save_c fileprefix, options
100 | 
101 | Generates a $fileprefix.c and $fileprefix.h file.
102 | 
103 | For all CMPH variants.
104 | 
105 | =cut
106 | 
107 | sub save_c {
108 |   my $ph = shift;
109 |   my $size = $ph->[2]->{size};
110 |   require Perfect::Hash::C;
111 |   Perfect::Hash::C->import();
112 | 
113 |   my ($fileprefix, $base) = $ph->save_h_header(@_);
114 |   my $FH = $ph->save_c_header($fileprefix, $base);
115 |   # XXX need to initialize mphf from the temp FILE
116 |   # into a memory buffer.
117 |   print $FH "#include \"cmph.h\"\n";
118 |   print $FH $ph->c_funcdecl($base)." {";
119 |   # XXX check for false positives from dict at [3]
120 |   my $l = $ph->option('-nul') ? "l" : "strlen(s)";
121 |   print $FH "
122 |     static const char *packed_mphf = ",B::cstring($ph->[1]),";
123 |     return cmph_search_packed((void*)packed_mphf, (const char*)s, $l) % $size;
124 | }
125 | ";
126 | }
127 | 
128 | =item c_lib, c_include
129 | 
130 | TODO: to the installed Alien libpath
131 | 
132 | =cut
133 | 
134 | # quirks on temp. uninstalled -lcmph
135 | sub c_include { " -Icmph-2.0/include" }
136 | 
137 | sub c_lib {
138 |   # quirks on temp. uninstalled -lcmph
139 |   my $l = " -Lcmph-2.0/lib -lcmph";
140 |   # rpath not with darwin, solaris, msvc. we should rather install cmph locally or via Alien
141 |   $l .= " -Wl,-rpath=cmph-2.0/lib" if $^O =~ /linux|bsd|cygwin$/ and $Config{cc} =~ /cc|clang/;
142 |   if ($^O eq 'darwin' and $Config{ccflags} =~ /-DDEBUGGING/) {
143 |     $l = " cmph-2.0/lib/libcmph.a"; # static to enable debugging
144 |   }
145 |   return $l;
146 | }
147 | 
148 | =back
149 | 
150 | =head1 LICENSE
151 | 
152 | The code of the cmph library and this perl library is dual licensed under
153 | the B<LGPL version 2> and B<MPL 1.1> licenses. Please refer to the LGPL-2
154 | and MPL-1.1 files in the F<cmph> subdirectory for the full description of
155 | each of the licenses.
156 | 
157 | For cxxmph, the files F<stringpiece.h> and F<MurmurHash2> are covered by the
158 | BSD and MIT licenses, respectively.
159 | 
160 | =cut
161 | 
162 | 1;
163 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/BDZ.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::BDZ;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 BDZ algorithm.
12 | The MPFH minimal variant is L<Perfect::Hash::CMPH::BDZ_PH>.
13 | See http://cmph.sourceforge.net/bdz.html
14 | 
15 | It is a simple, efficient, near-optimal space and practical algorithm
16 | to generate a family of PHFs and MPHFs. It is also referred to as BPZ
17 | algorithm because the work presented by Botelho, Pagh and Ziviani in
18 | [2]. In the Botelho's PhD. dissertation [1] it is also referred to as
19 | RAM algorithm because it is more suitable for key sets that can be
20 | handled in internal memory.
21 | 
22 | The BDZ algorithm uses r-uniform random hypergraphs given by function
23 | values of r uniform random hash functions on the input key set S for
24 | generating PHFs and MPHFs that require O(n) bits to be stored. A
25 | hypergraph is the generalization of a standard undirected graph where
26 | each edge connects vertices. This idea is not new, see e.g. [8], but
27 | we have proceeded differently to achieve a space usage of O(n) bits
28 | rather than O(n log n) bits. Evaluation time for all schemes
29 | considered is constant. For r=3 we obtain a space usage of
30 | approximately 2.6n bits for an MPHF. More compact, and even simpler,
31 | representations can be achieved for larger m. For example, for m=1.23n
32 | we can get a space usage of 1.95n bits.
33 | 
34 | Our best MPHF space upper bound is within a factor of 2 from the
35 | information theoretical lower bound of approximately 1.44 bits. We
36 | have shown that the BDZ algorithm is far more practical than previous
37 | methods with proven space complexity, both because of its simplicity,
38 | and because the constant factor of the space complexity is more than 6
39 | times lower than its closest competitor, for plausible problem
40 | sizes. We verify the practicality experimentally, using slightly more
41 | space than in the mentioned theoretical bounds.
42 | 
43 | =head1 METHODS
44 | 
45 | See L<Perfect::Hash::CMPH>
46 | 
47 | =head1 SEE ALSO
48 | 
49 | [1] F. C. Botelho. Near-Optimal Space Perfect Hashing
50 | Algorithms. PhD. Thesis, Department of Computer Science, Federal
51 | University of Minas Gerais, September 2008. Supervised by N. Ziviani.
52 | Lhttp://cmph.sourceforge.net/papers/thesis.pdf<>
53 | 
54 | [2] F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient
55 | minimal perfect hash functions. In Proceedings of the 10th
56 | International Workshop on Algorithms and Data Structures (WADs'07),
57 | Springer-Verlag Lecture Notes in Computer Science, vol. 4619, Halifax,
58 | Canada, August 2007, 139-150.
59 | L<http://cmph.sourceforge.net/papers/wads07.pdf>
60 | 
61 | =cut
62 | 
63 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/BDZ.pm examples/words20
64 | unless (caller) {
65 |   require Perfect::Hash;
66 |   &Perfect::Hash::_test(@ARGV)
67 | }
68 | 
69 | 1;
70 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/BDZ_PH.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::BDZ_PH;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 BDZ_PH algorithm, the MPFH minimal
12 | variant to L<Perfect::Hash::CMPH::BDZ>.  See
13 | L<http://cmph.sourceforge.net/bdz.html>
14 | 
15 | It is a simple, efficient, near-optimal space and practical algorithm
16 | to generate a family of PHFs and MPHFs. It is also referred to as BPZ
17 | algorithm because the work presented by Botelho, Pagh and Ziviani in
18 | [2]. In the Botelho's PhD. dissertation [1] it is also referred to as
19 | RAM algorithm because it is more suitable for key sets that can be
20 | handled in internal memory.
21 | 
22 | The BDZ algorithm uses r-uniform random hypergraphs given by function
23 | values of r uniform random hash functions on the input key set S for
24 | generating PHFs and MPHFs that require O(n) bits to be stored. A
25 | hypergraph is the generalization of a standard undirected graph where
26 | each edge connects vertices. This idea is not new, see e.g. [8], but
27 | we have proceeded differently to achieve a space usage of O(n) bits
28 | rather than O(n log n) bits. Evaluation time for all schemes
29 | considered is constant. For r=3 we obtain a space usage of
30 | approximately 2.6n bits for an MPHF. More compact, and even simpler,
31 | representations can be achieved for larger m. For example, for m=1.23n
32 | we can get a space usage of 1.95n bits.
33 | 
34 | Our best MPHF space upper bound is within a factor of 2 from the
35 | information theoretical lower bound of approximately 1.44 bits. We
36 | have shown that the BDZ algorithm is far more practical than previous
37 | methods with proven space complexity, both because of its simplicity,
38 | and because the constant factor of the space complexity is more than 6
39 | times lower than its closest competitor, for plausible problem
40 | sizes. We verify the practicality experimentally, using slightly more
41 | space than in the mentioned theoretical bounds.
42 | 
43 | =head1 METHODS
44 | 
45 | See L<Perfect::Hash::CMPH>
46 | 
47 | =head1 SEE ALSO
48 | 
49 | [1] F. C. Botelho. Near-Optimal Space Perfect Hashing
50 | Algorithms. PhD. Thesis, Department of Computer Science, Federal
51 | University of Minas Gerais, September 2008. Supervised by N. Ziviani.
52 | Lhttp://cmph.sourceforge.net/papers/thesis.pdf<>
53 | 
54 | [2] F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient
55 | minimal perfect hash functions. In Proceedings of the 10th
56 | International Workshop on Algorithms and Data Structures (WADs'07),
57 | Springer-Verlag Lecture Notes in Computer Science, vol. 4619, Halifax,
58 | Canada, August 2007, 139-150.
59 | L<http://cmph.sourceforge.net/papers/wads07.pdf>
60 | 
61 | =cut
62 | 
63 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20
64 | unless (caller) {
65 |   require Perfect::Hash;
66 |   &Perfect::Hash::_test(@ARGV)
67 | }
68 | 
69 | 1;
70 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/BMZ.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::BMZ;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 BMZ algorithm.
12 | See http://cmph.sourceforge.net/chd.html
13 | 
14 | =head1 METHODS
15 | 
16 | See L<Perfect::Hash::CMPH>
17 | 
18 | =cut
19 | 
20 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BMZ.pm examples/words20
21 | unless (caller) {
22 |   require Perfect::Hash;
23 |   &Perfect::Hash::_test(@ARGV)
24 | }
25 | 
26 | 1;
27 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/BMZ8.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::BMZ8;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 BMZ8 algorithm.
12 | See http://cmph.sourceforge.net/bmz.html
13 | 
14 | =head1 METHODS
15 | 
16 | See L<Perfect::Hash::CMPH>
17 | 
18 | =cut
19 | 
20 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BMZ8.pm examples/words20
21 | unless (caller) {
22 |   require Perfect::Hash;
23 |   &Perfect::Hash::_test(@ARGV)
24 | }
25 | 
26 | 1;
27 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/BRZ.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::BRZ;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 BRZ algorithm.
12 | See L<http://cmph.sourceforge.net/brz.html>
13 | 
14 | BRZ is an external memory based algorithm esp. suited to huge
15 | dictionaries, which can easily scale to billions of entries.
16 | 
17 | The algorithm is linear on the size of keys to construct a MPHF, which
18 | is optimal. For instance, for a collection of 1 billion URLs collected
19 | from the web, each one 64 characters long on average, the time to
20 | construct a MPHF using a 2.4 gigahertz PC with 500 megabytes of
21 | available main memory is approximately 3 hours. Second, the algorithm
22 | needs a small a priori defined vector of one byte entries in main
23 | memory to construct a MPHF. For the collection of 1 billion URLs and
24 | using , the algorithm needs only 5.45 megabytes of internal
25 | memory. Third, the evaluation of the MPHF for each retrieval requires
26 | three memory accesses and the computation of three universal hash
27 | functions. This is not optimal as any MPHF requires at least one
28 | memory access and the computation of two universal hash
29 | functions. Fourth, the description of a MPHF takes a constant number
30 | of bits for each key, which is optimal. For the collection of 1
31 | billion URLs, it needs 8.1 bits for each key, while the theoretical
32 | lower bound is bits per key.
33 | 
34 | =head1 METHODS
35 | 
36 | See L<Perfect::Hash::CMPH>
37 | 
38 | =cut
39 | 
40 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BRZ.pm examples/words20
41 | unless (caller) {
42 |   require Perfect::Hash;
43 |   &Perfect::Hash::_test(@ARGV)
44 | }
45 | 
46 | 1;
47 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/CHD.pm:
--------------------------------------------------------------------------------
  1 | package Perfect::Hash::CMPH::CHD;
  2 | 
  3 | use strict;
  4 | our $VERSION = '0.01';
  5 | use Perfect::Hash::CMPH;
  6 | #use warnings;
  7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
  8 | 
  9 | =head1 DESCRIPTION
 10 | 
 11 | XS interface to the cmph-2.0 CHD algorithm.
 12 | See http://cmph.sourceforge.net/chd.html
 13 | 
 14 | The CHD algorithm permits to obtain PHFs with representation size very
 15 | close to optimal while retaining O(n) construction time and O(1)
 16 | evaluation time. For example, in the case m=2n we obtain a PHF that
 17 | uses space 0.67 bits per key, and for m=1.23n we obtain space 1.4 bits
 18 | per key, which was not achievable with previously known methods. The
 19 | CHD algorithm is inspired by several known algorithms; the main new
 20 | feature is that it combines a modification of Pagh's
 21 | ``hash-and-displace'' approach with data compression on a sequence of
 22 | hash function indices. That combination makes it possible to
 23 | significantly reduce space usage while retaining linear construction
 24 | time and constant query time. The CHD algorithm can also be used for
 25 | k-perfect hashing, where at most k keys may be mapped to the same
 26 | value. For the analysis we assume that fully random hash functions are
 27 | given for free; such assumptions can be justified and were made in
 28 | previous papers.
 29 | 
 30 | The compact PHFs generated by the CHD algorithm can be used in many
 31 | applications in which we want to assign a unique identifier to each
 32 | key without storing any information on the key. One of the most
 33 | obvious applications of those functions (or k-perfect hash functions)
 34 | is when we have a small fast memory in which we can store the perfect
 35 | hash function while the keys and associated satellite data are stored
 36 | in slower but larger memory. The size of a block or a transfer unit
 37 | may be chosen so that k data items can be retrieved in one read
 38 | access. In this case we can ensure that data associated with a key can
 39 | be retrieved in a single probe to slower memory. This has been used
 40 | for example in hardware routers [4].
 41 | 
 42 | The CHD algorithm generates the most compact PHFs and MPHFs we know of
 43 | in O(n) time. The time required to evaluate the generated functions is
 44 | constant (in practice less than 1.4 microseconds). The storage space
 45 | of the resulting PHFs and MPHFs are distant from the information
 46 | theoretic lower bound by a factor of 1.43. The closest competitor is
 47 | the algorithm by Martin and Pagh [3] but their algorithm do not work
 48 | in linear time. Furthermore, the CHD algorithm can be tuned to run
 49 | faster than the BPZ algorithm [1] (the fastest algorithm available in
 50 | the literature so far) and to obtain more compact functions. The most
 51 | impressive characteristic is that it has the ability, in principle, to
 52 | approximate the information theoretic lower bound while being
 53 | practical. A detailed description of the CHD algorithm can be found in
 54 | [2].
 55 | 
 56 | =head1 METHODS
 57 | 
 58 | See L<Perfect::Hash::CMPH>
 59 | 
 60 | =head1 SEE ALSO
 61 | 
 62 | =over
 63 | 
 64 | =item [1]
 65 | 
 66 | F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient minimal
 67 | perfect hash functions. In Proceedings of the 10th International
 68 | Workshop on Algorithms and Data Structures (WADs'07), Springer-Verlag
 69 | Lecture Notes in Computer Science, vol. 4619, Halifax, Canada, August
 70 | 2007, 139-150.  L<http://cmph.sourceforge.net/papers/wads07.pdf>
 71 | 
 72 | =item [2]
 73 | 
 74 | F. C. Botelho, D. Belazzougui and M. Dietzfelbinger. Compress, hash
 75 | and displace. In Proceedings of the 17th European Symposium on
 76 | Algorithms (ESA'09). Springer LNCS, 2009.
 77 | L<http://cmph.sourceforge.net/papers/esa09.pdf>
 78 | 
 79 | =item [3]
 80 | 
 81 | M. Dietzfelbinger and R. Pagh. Succinct data structures for retrieval
 82 | and approximate membership. In Proceedings of the 35th international
 83 | colloquium on Automata, Languages and Programming (ICALP'08), pages
 84 | 385-396, Berlin, Heidelberg, 2008. Springer-Verlag.
 85 | 
 86 | =item [4]
 87 | 
 88 | B. Prabhakar and F. Bonomi. Perfect hashing for network
 89 | applications. In Proceedings of the IEEE International Symposium on
 90 | Information Theory. IEEE Press, 2006.
 91 | 
 92 | =back
 93 | 
 94 | =cut
 95 | 
 96 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20
 97 | unless (caller) {
 98 |   require Perfect::Hash;
 99 |   &Perfect::Hash::_test(@ARGV)
100 | }
101 | 
102 | 1;
103 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/CHD_PH.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::CHD_PH;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 CHD_PH algorithm.
12 | See http://cmph.sourceforge.net/chd.html
13 | 
14 | =head1 METHODS
15 | 
16 | See L<Perfect::Hash::CMPH>
17 | 
18 | =cut
19 | 
20 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20
21 | unless (caller) {
22 |   require Perfect::Hash;
23 |   &Perfect::Hash::_test(@ARGV)
24 | }
25 | 
26 | 1;
27 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/CHM.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::CHM;
 2 | 
 3 | our $VERSION = '0.01';
 4 | use strict;
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 CHM algorithm.
12 | See http://cmph.sourceforge.net/chm.html
13 | 
14 | =head1 METHODS
15 | 
16 | =head2 new $filename, @options
17 | 
18 | Computes a minimal perfect hash table using the given dictionary,
19 | given as hashref or arrayref or filename.
20 | 
21 | Honored options are: I<none yet>
22 | 
23 | Planned: I<-minimal>
24 | 
25 | =head2 perfecthash $ph, $key
26 | 
27 | Look up a $key in the minimal perfect hash table and return the
28 | associated index into the initially given $dict.
29 | 
30 | Checks if the index is correct, otherwise it will return undef.
31 | 
32 | =head2 false_positives
33 | 
34 | Returns undef, as cmph hashes always store the keys.
35 | 
36 | =head2 save_c NYI
37 | 
38 | =cut
39 | 
40 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20
41 | unless (caller) {
42 |   require Perfect::Hash;
43 |   &Perfect::Hash::_test(@ARGV)
44 | }
45 | 
46 | 1;
47 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/CMPH/FCH.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::CMPH::FCH;
 2 | 
 3 | use strict;
 4 | our $VERSION = '0.01';
 5 | use Perfect::Hash::CMPH;
 6 | #use warnings;
 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C);
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | XS interface to the cmph-2.0 FCH algorithm.
12 | See http://cmph.sourceforge.net/fch.html
13 | 
14 | The total memory consumption of FCH algorithm for generating a minimal
15 | perfect hash function (MPHF) is: O(n) + 9n + 8cn/(log(n) + 1)
16 | bytes. The value of parameter c must be greater than or equal to 2.6.
17 | 
18 | Memory consumption to store the resulting function: We only need to
19 | store the g function and a constant number of bytes for the seed of
20 | the hash functions used in the resulting MPHF. Thus, we need
21 | cn/(log(n) + 1) + O(1) bytes.
22 | 
23 | E.A. Fox, Q.F. Chen, and L.S. Heath. A faster algorithm for
24 | constructing minimal perfect hash functions. In Proc. 15th Annual
25 | International ACM SIGIR Conference on Research and Development in
26 | Information Retrieval, pages 266-273, 1992.
27 | L<http://cmph.sourceforge.net/papers/fch92.pdf>
28 | 
29 | =head1 METHODS
30 | 
31 | See L<Perfect::Hash::CMPH>
32 | 
33 | =cut
34 | 
35 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/FCH.pm examples/words20
36 | unless (caller) {
37 |   require Perfect::Hash;
38 |   &Perfect::Hash::_test(@ARGV)
39 | }
40 | 
41 | 1;
42 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/Cuckoo.pm:
--------------------------------------------------------------------------------
  1 | package Perfect::Hash::Cuckoo;
  2 | 
  3 | our $VERSION = '0.01';
  4 | use strict;
  5 | #use warnings;
  6 | use Perfect::Hash;
  7 | use Perfect::Hash::C;
  8 | our @ISA = qw(Perfect::Hash Perfect::Hash::C);
  9 | 
 10 | =head1 DESCRIPTION
 11 | 
 12 | Generate non-perfect but fast Cuckoo hashes, with two universal hash
 13 | functions f and g into two tables of load factor 50%, guaranteeing constant
 14 | lookup and insertion time.
 15 | 
 16 | Only for benchmarks yet. Still just a dummy placeholder.
 17 | 
 18 | A study by Zukowski et al. has shown that cuckoo hashing is much
 19 | faster than chained hashing for small, cache-resident hash tables on
 20 | modern processors:
 21 | 
 22 | Zukowski, Marcin; Heman, Sandor; Boncz, Peter (June 2006).
 23 | "Architecture-Conscious Hashing". Proceedings of the
 24 | International Workshop on Data Management on New Hardware (DaMoN).
 25 | L<https://www.cs.cmu.edu/~damon2006/pdf/zukowski06archconscioushashing.pdf>
 26 | 
 27 | =head1 METHODS
 28 | 
 29 | =over
 30 | 
 31 | =item new $filename|hashref|arrayref @options
 32 | 
 33 | Can only handle arrayref or single column keyfiles yet. No values.
 34 | 
 35 | Still a dummy placeholder.
 36 | 
 37 | Honored options are:
 38 | 
 39 | C<-pic>, C<-nul>
 40 | 
 41 | =cut
 42 | 
 43 | sub new {
 44 |   my $class = shift or die;
 45 |   my $dict = shift; #hashref, arrayref or filename
 46 |   my $options = Perfect::Hash::_handle_opts(@_);
 47 |   my ($keys, $values) = _dict_init($dict);
 48 |   # XXX optimize the 2 uhash functions here
 49 |   my $uhash = [];
 50 |   return bless [$uhash, $options, $keys, $values], $class;
 51 | }
 52 | 
 53 | =item save_c prefix, options
 54 | 
 55 | Generates F<$prefix_hash.c> and F<.h> files with no external dependencies.
 56 | 
 57 | =cut
 58 | 
 59 | sub save_c {
 60 |   my $ph = shift;
 61 |   my ($options, $keys) = ($ph->[1], $ph->[2]);
 62 |   my ($fileprefix, $base) = $ph->save_h_header(@_);
 63 |   my $FH = $ph->save_c_header($fileprefix, $base);
 64 |   # print $FH "#include <string.h>\n" if @$C or !$ph->option('-nul');
 65 |   print $FH $ph->c_hash_impl($base);
 66 |   print $FH $ph->c_funcdecl($base)." {\n";
 67 |   print $FH "  int l = strlen(s);" unless $ph->option('-nul');
 68 |   my $size = scalar @$keys;
 69 |   my $type = u_csize($size);
 70 |   if (!$ph->false_positives) { # store keys
 71 |     if ($ph->option('-pic')) {
 72 |       c_stringpool($FH, $keys);
 73 |     } else {
 74 |       print $FH "
 75 |   /* keys */
 76 |   static const char* keys[] = {\n";
 77 |       _save_c_array(4, $FH, $keys, "\"%s\"");
 78 |       print $FH "  };";
 79 |     }
 80 |   }
 81 |   # ...
 82 |   print $FH "
 83 |   return -1;\n";
 84 |   print $FH "}\n";
 85 |   close $FH;
 86 | }
 87 | 
 88 | =item c_hash_impl $ph, $base
 89 | 
 90 | String for C code for the 2 hash functions. Honors C<-nul>.
 91 | 
 92 | =cut
 93 | 
 94 | # XXX use the two randomly generated uhash params to generate 2 hash funcs
 95 | sub c_hash_impl {""}
 96 | 
 97 | =item perfecthash key
 98 | 
 99 | dummy, for testing only. Use the generated C function instead.
100 | 
101 | =cut
102 | 
103 | sub perfecthash {
104 |   my $ph = shift;
105 |   my ($keys, $values) = ($ph->[2], $ph->[3]);
106 |   my $key = shift;
107 |   my $dict = $ph->[4];
108 |   if (!$dict) {
109 |     for my $i (0 .. scalar(@$keys)-1) {
110 |       $dict->{$keys->[$i]} = $values->[$i];
111 |     }
112 |   }
113 |   return exists $dict->{$key} ? $dict->{$key} : undef;
114 | }
115 | 
116 | =item false_positives
117 | 
118 | Returns 1 if the hash might return false positives, i.e. will return
119 | the index of an existing key when you searched for a non-existing key.
120 | 
121 | The default is undef, unless you created the hash with the option
122 | C<-false-positives>.
123 | 
124 | =cut
125 | 
126 | sub false_positives {
127 |   return exists $_[0]->[1]->{'-false-positives'};
128 | }
129 | 
130 | =item option $ph
131 | 
132 | Access the option hash in $ph.
133 | 
134 | =cut
135 | 
136 | sub option {
137 |   return $_[0]->[1]->{$_[1]};
138 | }
139 | 
140 | #sub c_include { }
141 | #sub c_lib { }
142 | 
143 | =back
144 | 
145 | =cut
146 | 
147 | 1;
148 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/MoreHashes.pm:
--------------------------------------------------------------------------------
  1 | package Perfect::Hash::MoreHashes;
  2 | use strict;
  3 | use Perfect::Hash::C;
  4 | our @ISA = qw(Perfect::Hash Perfect::Hash::C);
  5 | our $VERSION = '0.01';
  6 | 
  7 | =head1 NAME
  8 | 
  9 | Perfect::Hash::MoreHashes - perl and c implemenations of alternative hash funcs
 10 | 
 11 | =head1 METHODS
 12 | 
 13 | =over
 14 | 
 15 | =item hash_murmur3 string, [seed]
 16 | 
 17 | pure-perl murmur3 int32 finalizer
 18 | 
 19 | =cut
 20 | 
 21 | sub hash_murmur3 {
 22 |   use bytes;
 23 |   my $ph = shift;
 24 |   my $str = shift;
 25 |   my $h = shift || 0;
 26 |   for my $c (split "", $str) {
 27 |     $h = $h ^ ord($c); # XXX better slice strings into 4 bytes
 28 |     $h ^= $h >> 16;
 29 |     $h *= 0x85ebca6b;
 30 |     $h ^= $h >> 13;
 31 |     $h *= 0xc2b2ae35;
 32 |     $h ^= $h >> 16;
 33 |   }
 34 |   return $h
 35 | }
 36 | 
 37 | =item c_hash_impl_fnv1_mantis string, [seed]
 38 | 
 39 | C version of a faster FNV1 variant, incompat to our pure-perl fnv1
 40 | 
 41 | =cut
 42 | 
 43 | sub c_hash_impl_fnv1_mantis {
 44 |   my ($ph, $base) = @_;
 45 |   return "
 46 | #ifdef _MSC_VER
 47 | #define INLINE __inline
 48 | #else
 49 | #define INLINE inline
 50 | #endif
 51 | 
 52 | #ifdef _MSC_VER
 53 | # define rotl(a,b) _rotl(a,b)
 54 | #else
 55 | static inline rotl(unsigned int x, unsigned char r) {
 56 |   asm(\"roll %1,%0\" : \"+r\" (x) : \"c\" (r));
 57 |   return x;
 58 | }
 59 | #endif
 60 | 
 61 | /* optimized Mantis FNV from http://www.sanmayce.com/Fastest_Hash/
 62 |    but without 64 bit and xmm 128 bit extensions.
 63 | */
 64 | static INLINE
 65 | unsigned $base\_hash_mantis(unsigned d, const unsigned char *str, const int len) {
 66 |   const unsigned int PRIME = 709607;        /* ad3e7 */
 67 |   unsigned int hash32 = d ? d : 2166136261; /* 811c9dc5 */
 68 |   const char *p = str;
 69 | 
 70 |   /* Cases: 0,1,2,3,4,5,6,7,...,15 */
 71 |   if (len & 2*sizeof(int)) {
 72 |     hash32 = (hash32 ^ *(unsigned int*)p) * PRIME;
 73 |     p = sizeof(int);
 74 |     hash32 = (hash32 ^ *(unsigned int*)p) * PRIME;
 75 |     p += sizeof(int);
 76 |   }
 77 |   /* Cases: 0,1,2,3,4,5,6,7 */
 78 |   if (len & sizeof(int)) {
 79 |     hash32 = (hash32 ^ *(unsigned short*)p) * PRIME;
 80 |     p = sizeof(int);
 81 |   }
 82 |   if (len & sizeof(short)) {
 83 |     hash32 = (hash32 ^ *(unsigned short*)p) * PRIME;
 84 |     p += sizeof(short);
 85 |   }
 86 |   if (len & 1)
 87 |     hash32 = (hash32 ^ *p) * PRIME;
 88 |     p += 1;
 89 |   }
 90 |   len -= p-str;
 91 | 
 92 |   for(; len > 2*sizeof(int); len -= 2*sizeof(int), p += 2*sizeof(int)) {
 93 | 	hash32 = (hash32 ^ (rotl(*(int *)p,5) ^ *(int *)(p+4))) * PRIME;
 94 |   }
 95 |   hash32 = (hash32 ^ *(short*)(p+0*sizeof(short))) * PRIME;
 96 |   hash32 = (hash32 ^ *(short*)(p+1*sizeof(short))) * PRIME;
 97 |   hash32 = (hash32 ^ *(short*)(p+2*sizeof(short))) * PRIME;
 98 |   hash32 = (hash32 ^ *(short*)(p+3*sizeof(short))) * PRIME;
 99 |   return hash32 ^ (hash32 >> 16);
100 | }
101 | 
102 | ";
103 | }
104 | 
105 | =back
106 | 
107 | =cut
108 | 
109 | 1;
110 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/Pearson16.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::Pearson16;
 2 | our $VERSION = '0.01';
 3 | #use coretypes;
 4 | use strict;
 5 | #use warnings;
 6 | use Perfect::Hash;
 7 | use Perfect::Hash::PearsonNP;
 8 | use Perfect::Hash::XS;
 9 | our @ISA = qw(Perfect::Hash::Pearson);
10 | use integer;
11 | use bytes;
12 | 
13 | =head1 DESCRIPTION
14 | 
15 | Generate non-perfect pearson hash with an optimized 16bit hash
16 | function, a big 16bit table (size: 65536 shorts), and static binary
17 | tree collision resolution.
18 | 
19 | =head1 METHODS
20 | 
21 | =head2 new $dict, @options
22 | 
23 | Computes a non-prefect, but fast pearson hash table using the given
24 | dictionary, given as hashref or arrayref, with fast lookup.
25 | 
26 | Honored options are:
27 | 
28 | I<-false-positives>
29 | 
30 | I<-max-time seconds> stops generating a pperf at seconds and uses a
31 | non-perfect, but still fast hash then. Default: 60s.
32 | 
33 | It returns an object with @H containing the randomized
34 | pearson lookup table of size 65536.
35 | 
36 | =cut
37 | 
38 | sub new {
39 |   return Perfect::Hash::PearsonNP::new(@_);
40 | }
41 | 
42 | =head2 hash obj, $key
43 | 
44 | =cut
45 | 
46 | #sub hash_pp {
47 | #  my ($ph, $key ) = @_;
48 | #  my $size = $ph->[0];
49 | #  my $H = $ph->[1];
50 | #  my $d = 0;
51 | #  # process in 16bit chunks
52 | #  for my $c (unpack("S*", $key) {
53 | #    $d = $H->[$d ^ $c];
54 | #  }
55 | #  return $d % $size;
56 | #}
57 | 
58 | =head2 perfecthash $obj, $key
59 | 
60 | Look up a $key in the pearson hash table
61 | and return the associated index into the initially 
62 | given $dict.
63 | 
64 | Note that the hash is probably not perfect.
65 | 
66 | Without C<-false-positives> it checks if the index is correct,
67 | otherwise it will return undef.
68 | With C<-false-positives>, the key must have existed in
69 | the given dictionary. If not, a wrong index will be returned.
70 | 
71 | =head2 false_positives
72 | 
73 | Returns 1 if the hash might return false positives,
74 | i.e. will return the index of an existing key when
75 | you searched for a non-existing key.
76 | 
77 | The default is undef, unless you created the hash with the option
78 | C<-false-positives>.
79 | 
80 | =cut
81 | 
82 | # local testing: pb -d lib/Perfect/Hash/Pearson16.pm examples/words500
83 | # or just: pb -d -MPerfect::Hash -e'new Perfect::Hash([split/\n/,`cat "examples/words20"`], "-pearsonpp")'
84 | unless (caller) {
85 |   &Perfect::Hash::_test(shift @ARGV, "-pearson16", @ARGV)
86 | }
87 | 
88 | 1;
89 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/Pearson32.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::Pearson32;
 2 | our $VERSION = '0.01';
 3 | #use coretypes;
 4 | use strict;
 5 | #use warnings;
 6 | use Perfect::Hash;
 7 | use Perfect::Hash::PearsonNP;
 8 | use Perfect::Hash::XS;
 9 | our @ISA = qw(Perfect::Hash::Pearson);
10 | use integer;
11 | use bytes;
12 | 
13 | =head1 DESCRIPTION
14 | 
15 | Generate non-perfect pearson hash with an optimized 32bit hash function,
16 | a pearson table of size 256 and static binary tree collision resolution.
17 | 
18 | =head1 METHODS
19 | 
20 | =head2 new $dict, @options
21 | 
22 | Computes a non-prefect, but fast pearson hash table using the given
23 | dictionary, given as hashref or arrayref, with fast lookup.
24 | 
25 | Honored options are:
26 | 
27 | I<-false-positives>
28 | 
29 | I<-max-time seconds> stops generating a pperf at seconds and uses a
30 | non-perfect, but still fast hash then. Default: 60s.
31 | 
32 | It returns an object with @H containing the randomized
33 | pearson lookup table of size 255.
34 | 
35 | =cut
36 | 
37 | sub new {
38 |   goto &Perfect::Hash::PearsonNP::new;
39 | }
40 | 
41 | =head2 hash obj, $key
42 | 
43 | =cut
44 | 
45 | #sub hash_pp {
46 | #  my ($ph, $key ) = @_;
47 | #  my $size = $ph->[0];
48 | #  my $H = $ph->[1];
49 | #  my $d = 0;
50 | #  # process in 32bit chunks
51 | #  for my $c (split "", $key) {
52 | #    $d = $H->[$d ^ ord($c)];
53 | #  }
54 | #  return $d % $size;
55 | #}
56 | 
57 | =head2 perfecthash $obj, $key
58 | 
59 | Look up a $key in the pearson hash table
60 | and return the associated index into the initially 
61 | given $dict.
62 | 
63 | Note that the hash is probably not perfect.
64 | 
65 | Without C<-false-positives> it checks if the index is correct,
66 | otherwise it will return undef.
67 | With C<-false-positives>, the key must have existed in
68 | the given dictionary. If not, a wrong index will be returned.
69 | 
70 | =head2 false_positives
71 | 
72 | Returns 1 if the hash might return false positives,
73 | i.e. will return the index of an existing key when
74 | you searched for a non-existing key.
75 | 
76 | The default is undef, unless you created the hash with the option
77 | C<-false-positives>.
78 | 
79 | =cut
80 | 
81 | # local testing: pb -d lib/Perfect/Hash/PearsonPP.pm examples/words20
82 | # or just: pb -d -MPerfect::Hash -e'new Perfect::Hash([split/\n/,`cat "examples/words20"`], "-pearsonpp")'
83 | unless (caller) {
84 |   &Perfect::Hash::_test(shift @ARGV, "-pearson32", @ARGV)
85 | }
86 | 
87 | 1;
88 | 


--------------------------------------------------------------------------------
/lib/Perfect/Hash/XS.pm:
--------------------------------------------------------------------------------
 1 | package Perfect::Hash::XS;
 2 | use strict;
 3 | our $VERSION = '0.01';
 4 | use Perfect::Hash::C;
 5 | our @ISA = qw(Perfect::Hash::C Perfect::Hash);
 6 | 
 7 | use XSLoader;
 8 | XSLoader::load('Perfect::Hash', $VERSION);
 9 | 
10 | =head1 NAME
11 | 
12 | Perfect::Hash::XS - Perfect Hash output formatter for XS - compiled perl extensions
13 | 
14 | =head1 SYNOPSIS
15 | 
16 |     pperf keyfile --for-xs --prefix=ph
17 | 
18 |     use Perfect::Hash;
19 |     $hash->{chr($_)} = int rand(2) for 48..90;
20 |     my $ph = new Perfect:Hash $hash;
21 |     $ph->save_xs("ph.inc");
22 | 
23 |     my @dict = split/\n/,`cat /usr/share.dict/words`;
24 |     my $ph2 = Perfect::Hash->new(\@dict, -minimal, -for-xs);
25 |     $ph2->save_xs("ph1.inc");
26 | 
27 | =head1 DESCRIPTION
28 | 
29 | Optimized for sharedlib and PIC, and it can hold more and mixed value
30 | types, not just strings and integers. With the help of Data::Compile
31 | (planned) even any perl values, like code refs, magic, ...
32 | 
33 | This is a replacement for cdb databases or write-once or only daily
34 | Storable containers.
35 | 
36 | =head1 METHODS
37 | 
38 | =over
39 | 
40 | =item save_xs filename, options
41 | 
42 | Generate XS code, with the perl values saved as perl types.
43 | 
44 | =back
45 | 
46 | =cut
47 | 
48 | sub save_h_header { }
49 | 
50 | sub save_c_header {
51 |   my ($ph, $filename) = @_;
52 |   my $FH;
53 |   open $FH, ">", $filename or die "$filename: @!";
54 |   print $FH "#include <string.h>\n"; # for memcmp/strlen
55 |   return $FH;
56 | }
57 | 
58 | sub c_funcdecl {
59 |   my ($ph, $base) = @_;
60 |   if ($ph->option('-nul')) {
61 |     "
62 | long $base\_lookup(const char* s, int l)";
63 |   } else {
64 |     "
65 | long $base\_lookup(const char* s)";
66 |   }
67 | }
68 | 
69 | sub save_xs {
70 |   my $ph = shift;
71 |   my $file = shift;
72 |   my @options = @_;
73 |   die 'save_xs nyi';
74 | }
75 | 


--------------------------------------------------------------------------------
/script/pperf.PL:
--------------------------------------------------------------------------------
  1 | #! perl
  2 | use Config;
  3 | use File::Basename qw(&basename &dirname);
  4 | use File::Spec;
  5 | use Cwd;
  6 | 
  7 | # List explicitly here the variables you want Configure to
  8 | # generate.  Metaconfig only looks for shell variables, so you
  9 | # have to mention them as if they were shell variables, not
 10 | # %Config entries.  Thus you write
 11 | #  $startperl
 12 | # to ensure Configure will look for $Config{startperl}.
 13 | # Wanted:  $archlibexp
 14 | 
 15 | # This forces PL files to create target in same directory as PL file.
 16 | # This is so that make depend always knows where to find PL derivatives.
 17 | $origdir = cwd;
 18 | chdir dirname($0);
 19 | $file = basename($0, '.PL');
 20 | $file .= '.com' if $^O eq 'VMS';
 21 | 
 22 | open OUT,">$file" or die "Can't create $file: $!";
 23 | 
 24 | print "Extracting $file (with variable substitutions)\n";
 25 | 
 26 | # In this section, perl variables will be expanded during extraction.
 27 | # You can use $Config{...} to use Configure variables.
 28 | 
 29 | print OUT <<"!GROK!THIS!";
 30 | $Config{startperl}
 31 |     eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}'
 32 |     if \$running_under_some_shell;
 33 | --\$running_under_some_shell;
 34 | !GROK!THIS!
 35 | 
 36 | # In the following, perl variables are not expanded during extraction.
 37 | 
 38 | print OUT <<'!NO!SUBS!';
 39 | use strict;
 40 | use Perfect::Hash;
 41 | use Getopt::Long;
 42 | use Pod::Usage;
 43 | my $options = {};
 44 | 
 45 | sub opt(*) {
 46 |   my $opt = shift;
 47 |   return exists($options->{$opt}) && ($options->{$opt} || 0);
 48 | }
 49 | 
 50 | sub version {
 51 |   no warnings 'once';
 52 |   return "pperf $Perfect::Hash::VERSION\n";
 53 | }
 54 | 
 55 | sub _usage_and_die {
 56 |   print <<'EOF';
 57 | Usage:
 58 | pperf [method] [options...] keyfile
 59 | 
 60 | Methods:
 61 | --hanovpp
 62 | --hanov
 63 | --urban
 64 | --pearson --pearsonnp --pearson8
 65 | --cmph-bdz_ph --cmph-bdz --cmph-bmz --cmph-chm --cmph-fch --cmph-chd_ph --cmph-chd
 66 | ...
 67 | 
 68 | Options:
 69 | --false-positives
 70 | --nul
 71 | --for-c       (default)
 72 | --prefix=name (default: pperf)
 73 | 
 74 | Planned:
 75 | --minimal
 76 | --optimal-size
 77 | --optimal-speed
 78 | --for-xs
 79 | --hash=name
 80 | --pic
 81 | --inline
 82 | --null-strings
 83 | --7bit
 84 | --ignore-case
 85 | --unicode-ignore-case
 86 | 
 87 | EOF
 88 |   exit;
 89 | }
 90 | 
 91 | sub helpme {
 92 |   print version(),"\n";
 93 |   if (opt('v')) {
 94 |     pod2usage( -verbose => opt('v') );
 95 |   } else {
 96 |     pod2usage( -verbose => 0 );
 97 |   }
 98 |   _usage_and_die;
 99 | }
100 | 
101 | GetOptions
102 |   ($options,
103 |    'help|h',
104 |    'hanovpp',
105 |    'hanov',
106 |    'urban',
107 |    'pearson',
108 |    'pearson8',
109 |    'pearsonnp',
110 |    'cmph-bdz_ph', 'cmph-bdz', 'cmph-bmz', 'cmph-chm', 'cmph-fch', 'cmph-chd_ph', 'cmph-chd',
111 |    #'gperf', 'bob', 'switch', 'cuckoo',
112 |    # and many more ... get them from %algo_methods?
113 |    'false-positives!',
114 |    'nul',
115 |    '7bit',
116 |    'prefix=s',
117 |    'for-c!',
118 |    'for-xs',
119 |    # get them dynamically?
120 |    'for-java',
121 |    'for-ruby',
122 |    'for-python',
123 |    'for-php',
124 |    'for-pecl',
125 |    'ignore-case',
126 |    'unicode-ignore-case',
127 |    'optimal-speed',
128 |    'optimal-size',
129 |    'minimal',
130 |   );
131 | helpme() if opt('help'); # And exit
132 | my $keyfile = shift or _usage_and_die();
133 | my @options = map { ("-".$_, $options->{$_} == 1 ? () : $options->{$_}) } keys %$options;
134 | 
135 | my $ph = new Perfect::Hash $keyfile, @options;
136 | my @fmt = grep /^-for/, @options;
137 | if (@fmt) {
138 |   for (@fmt) {
139 |     s/^-for-//;
140 |     my $class = "Perfect::Hash::" . uc($_);
141 |     eval "require $class;";
142 |     my $save = "save_".$_;
143 |     $ph->$save($options->{prefix});
144 |   }
145 | } else {
146 |   $ph->save_c($options->{prefix});
147 | }
148 | 
149 | # Local Variables:
150 | #   mode: cperl
151 | #   cperl-indent-level: 2
152 | #   fill-column: 80
153 | # End:
154 | # vim: expandtab shiftwidth=2:
155 | !NO!SUBS!
156 | 
157 | close OUT or die "Can't close $file: $!";
158 | chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
159 | exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
160 | chdir $origdir;
161 | 


--------------------------------------------------------------------------------
/t/00basic.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use lib 't';
 6 | require "test.pl";
 7 | 
 8 | my ($default, $methods, $opts) = opt_parse_args();
 9 | $methods = [ grep(!/^-cmph/, @$methods) ];
10 | 
11 | plan tests => scalar(@$methods);
12 | 
13 | my %dict = map {chr $_ => $_-48} (48..64);
14 | delete $dict{'\\'};
15 | for my $m (@$methods) {
16 |   my $ph = new Perfect::Hash \%dict, $m, @$opts;
17 |   unless ($ph) {
18 |     ok(1, "SKIP empty ph $m");
19 |     next;
20 |   }
21 |  TODO: {
22 |    local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m};
23 |    my $ok = 1;
24 |    for my $w (sort keys %dict) {
25 |      my $o = ord $w;
26 |      my $v = $ph->perfecthash($w);
27 |      $ok = 0 if !defined($v) or $v != $o - 48;
28 |      unless ($ok) {
29 |        is(defined($v)?$v:"", $o - 48, "method '$m' for '$w' => ".(defined($v)?$v:""));
30 |        last;
31 |      }
32 |    }
33 |    $ok ? ok($ok, "method '$m'") : 0;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/t/01words.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use lib 't';
 6 | require "test.pl";
 7 | 
 8 | my ($default, $methods, $opts) = opt_parse_args();
 9 | 
10 | plan tests => 3 * scalar(@$methods);
11 | 
12 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words20");
13 | my @dict = @$dictarr;
14 | 
15 | for my $m (@$methods) {
16 |   my $ph = new Perfect::Hash \@dict, $m, @$opts;
17 |   unless ($ph) {
18 |     ok(1, "SKIP empty pperf $m");
19 |     next;
20 |   }
21 |  TODO: {
22 |    local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m};
23 |    my $ok = 1;
24 |    my $i = 0;
25 |    for my $w (@dict) {
26 |      my $v = $ph->perfecthash($w);
27 |      $ok = 0 if !defined($v) or $v ne $i;
28 |      unless ($ok) {
29 |        is(defined($v)?$v:"", $i, "method $m with arrayref for '$w' => ".(defined($v)?$v:""));
30 |        last;
31 |      }
32 |      $i++;
33 |    }
34 |    $ok ? ok($ok, "method $m with arrayref") : 0;
35 |   }
36 | }
37 | 
38 | my $line = 0;
39 | my %dict = map { $_ => $line++ } @dict;
40 | for my $m (@$methods) {
41 |   my $ph = new Perfect::Hash \%dict, $m, @$opts;
42 |   unless ($ph) {
43 |     ok(1, "SKIP empty pperf $m");
44 |     next;
45 |   }
46 |  TODO: {
47 |    local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m};
48 |    my $ok = 1;
49 |    for my $w (sort keys %dict) {
50 |      my $v = $ph->perfecthash($w);
51 |      $ok = 0 if !defined($v) or $v ne $dict{$w};
52 |      unless ($ok) {
53 |        is(defined($v)?$v:"", $dict{$w}, "method $m with hashref for '$w' => ".(defined($v)?$v:""));
54 |        last;
55 |      }
56 |    }
57 |    $ok ? ok($ok, "method $m with hashref") : 0;
58 |   }
59 | }
60 | 
61 | for my $m (@$methods) {
62 |   my $ph = new Perfect::Hash $dict, $m, @$opts;
63 |   unless ($ph) {
64 |     ok(1, "SKIP empty pperf $m");
65 |     next;
66 |   }
67 |  TODO: {
68 |    local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m};
69 |    my $ok = 1;
70 |    my $i = 0;
71 |    for my $w (@dict) {
72 |      my $v = $ph->perfecthash($w);
73 |      $ok = 0 if !defined($v) or $v ne $i;
74 |      unless ($ok) {
75 |        is(defined($v)?$v:"", $i, "method $m with keyfile for '$w' => ".(defined($v)?$v:""));
76 |        last;
77 |      }
78 |      $i++;
79 |    }
80 |    $ok ? ok($ok, "method $m with keyfile") : 0;
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------
/t/02sysdict.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | # pure perl only
 3 | use Test::More;
 4 | use Perfect::Hash;
 5 | 
 6 | use lib 't';
 7 | require "test.pl";
 8 | 
 9 | my ($dict, $dictarr, $size, $custom_size);
10 | for (qw(examples/words /usr/share/dict/words /usr/dict/words /opt/local/share/dict/words)) {
11 |   if (-e $_) { $dict = $_; last }
12 | }
13 | plan skip_all => "no system dict found" unless -e $dict;
14 | 
15 | my ($default, $methods, $opts) = opt_parse_args('-max-time',10);
16 | $methods = [''] if $default;
17 | plan tests => scalar(@$methods);
18 | ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, $dict);
19 | 
20 | for my $m (@$methods) {
21 |   diag "generating $m ph for $size entries in $dict..." if $ENV{TEST_VERBOSE};
22 |   my $t0 = [gettimeofday];
23 |   my $ph = new Perfect::Hash $dict, $m, @$opts;
24 |   diag "done in ",tv_interval($t0),"s\n" if $ENV{TEST_VERBOSE};
25 |   unless ($ph) {
26 |     ok(1, "SKIP empty pperf $m");
27 |     next;
28 |   }
29 |   TODO: {
30 |     local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m};
31 |     my $ok = 1;
32 |     my $i = 0;
33 |     for my $w (@$dictarr) {
34 |       my $v = $ph->perfecthash($w);
35 |       $ok = 0 if !defined($v) or $v ne $i;
36 |       unless ($ok) {
37 |         is($v, $i, "method $m for $i-th '$w' => ".$v);
38 |         last;
39 |       }
40 |       $i++;
41 |     }
42 |     $ok ? ok($ok, "checked all $size words with method $m") : 0;
43 |   }
44 | }
45 | 


--------------------------------------------------------------------------------
/t/03false-positives.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use lib 't';
 6 | require "test.pl";
 7 | 
 8 | my ($default, $methods, $opts) = opt_parse_args();
 9 | $methods = [ grep {$_ ne '-cuckoo'} @$methods ];
10 | plan tests => 2*scalar(@$methods);
11 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words20");
12 | my $small_dict = $size > 255 ? "examples/words20" : $dict;
13 | 
14 | for my $m (@$methods) {
15 |   my $ph = new Perfect::Hash($m eq '-pearson8' ? $small_dict : $dict, $m, @$opts);
16 |   unless ($ph) {
17 |     ok(1, "SKIP empty pperf $m");
18 |     ok(1, "SKIP");
19 |     next;
20 |   }
21 |   my $w = 'good';
22 |   my $v = $ph->perfecthash($w);
23 |   TODO: {
24 |     local $TODO = "$m" if $m =~ /^-cmph/;
25 |     my $vs = defined $v ? "$v" : 'undef';
26 |     if ($ph->false_positives) {
27 |       # this really should not happen!
28 |       ok(defined($v) && $v >= 0, "method $m without false-positives '$w' => $vs");
29 |     } else {
30 |       is($v, undef, "method $m without false-positives '$w' => $vs");
31 |     }
32 |   }
33 | 
34 |   my $ph1 = new Perfect::Hash($m eq '-pearson8' ? $small_dict : $dict, $m, @$opts, '-false-positives');
35 |   $v = $ph1->perfecthash($w);
36 |   TODO: {
37 |     local $TODO = "$m" if $m =~ /^-(cmph-|pearson)/;
38 |     my $vs = defined $v ? "$v" : 'undef';
39 |     if ($ph1->false_positives) {
40 |       ok(defined($v) && $v >= 0, "method $m with false_positives '$w' => $vs");
41 |     } else {
42 |       is($v, undef, "method $m without false_positives '$w' => $vs");
43 |     }
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/t/04save_c.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | #use Config;
 6 | use lib 't';
 7 | require "test.pl";
 8 | 
 9 | my ($default, $methods, $opts) = opt_parse_args();
10 | 
11 | plan tests => 5 * scalar(@$methods);
12 | 
13 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words500");
14 | my $small_dict = $size > 255 ? "examples/words20" : $dict;
15 | 
16 | # CHM passes pure-perl, but not compiled yet
17 | $Perfect::Hash::algo_todo{'-cmph-chm'} = 1;
18 | $Perfect::Hash::algo_todo{'-bob'} = 1;
19 | $Perfect::Hash::algo_todo{'-pearson16'} = 1;
20 | 
21 | my $i = 0;
22 | for my $m (@$methods) {
23 |   my $used_dict = $m eq '-pearson8'
24 |     ? $small_dict
25 |     : ($m eq '-gperf' or $custom_size)
26 |       ? $dictarr
27 |       : $dict;
28 |   my $ph = new Perfect::Hash($used_dict, $m, @$opts);
29 |   unless ($ph) {
30 |     ok(1, "SKIP empty pperf $m");
31 |     ok(1) for 1..4;
32 |     $i++;
33 |     next;
34 |   }
35 |   if ($m =~ /^-cmph/) {
36 |     ok(1, "SKIP nyi save_c for $m");
37 |     ok(1) for 1..4;
38 |     $i++;
39 |     next;
40 |   }
41 |   my ($nul) = grep {$_ eq '-nul'} @$opts;
42 |   my ($shared) = grep {$_ eq '-shared'} @$opts;
43 |   my $suffix = $m eq "-bob" ? "_hash" : "";
44 |   my $base = "pperf$suffix";
45 |   my $out = "$base.c";
46 |   test_wmain($m, 1, 'AOL', $ph->perfecthash('AOL'), $suffix, $nul);
47 |   $i++;
48 |   $ph->save_c($base);
49 |   if (ok(-f "$base.c" && -f "$base.h", "$m generated pperf.c/.h")) {
50 |     my ($cmd, $cmd1);
51 |     if ($shared) {
52 |       $cmd = compile_shared($ph, $suffix);
53 |       $cmd1 = link_shared($ph, $suffix);
54 |     } else {
55 |       $cmd = compile_static($ph, $suffix);
56 |     }
57 |     diag($cmd) if $ENV{TEST_VERBOSE};
58 |     my $retval = system($cmd);
59 |     if (!($retval>>8) and $cmd1) {
60 |       print "$cmd1\n" if $ENV{TEST_VERBOSE};
61 |       $retval = system($cmd1);
62 |     }
63 |     if (ok(!($retval>>8), "could compile $m")) {
64 |       my $callprefix = $^O eq 'MSWin32' ? ""
65 |         : $^O eq 'darwin' ? "DYLD_LIBRARY_PATH=. ./"
66 |         : "LD_LIBRARY_PATH=. ./";
67 |       my $retstr = `${callprefix}$base`;
68 |       $retval = $?;
69 |       TODO: {
70 |         local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m} and $m !~ /^-cmph/;
71 |         like($retstr, qr/^ok - c lookup exists/m, "$m c lookup exists");
72 |       }
73 |       TODO: {
74 |         local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m};
75 |         like($retstr, qr/^ok - c lookup notexists/m, "$m c lookup notexists");
76 |       }
77 |     } else {
78 |       ok(1, "SKIP") for 0..1;
79 |     }
80 |     TODO: {
81 |       local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes
82 |       ok(!($retval>>8), "could run $m");
83 |     }
84 |   } else {
85 |     ok(1, "SKIP") for 0..3;
86 |   }
87 |   unlink($base,"$base.c","$base.h","main.c") if $default;
88 | }
89 | 


--------------------------------------------------------------------------------
/t/05save_c_nul.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use lib 't';
 6 | require "test.pl";
 7 | 
 8 | my ($default, $methods, $opts) = test_parse_args("-nul");
 9 | 
10 | plan tests => 5 * scalar(@$methods);
11 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words500");
12 | my $small_dict = $size > 255 ? "examples/words20" : $dict;
13 | 
14 | # CHM passes pure-perl, but not compiled yet
15 | $Perfect::Hash::algo_todo{'-cmph-chm'} = 1;
16 | $Perfect::Hash::algo_todo{'-bob'} = 1;
17 | $Perfect::Hash::algo_todo{'-pearson16'} = 1;
18 | 
19 | my $i = 0;
20 | my $key = "AOL";
21 | 
22 | for my $m (@$methods) {
23 |   my $used_dict = $m eq '-pearson8'
24 |     ? $small_dict
25 |     : ($m eq '-gperf' or $custom_size)
26 |       ? $dictarr
27 |       : $dict;
28 |   my $ph = new Perfect::Hash($used_dict, $m, @$opts, "-nul");
29 |   unless ($ph) {
30 |     ok(1, "SKIP empty pperf $m");
31 |     ok(1) for 1..4;
32 |     $i++;
33 |     next;
34 |   }
35 |   if ($m =~ /^-cmph/) {
36 |     ok(1, "SKIP nyi save_c for $m");
37 |     ok(1) for 1..4;
38 |     $i++;
39 |     next;
40 |   }
41 |   my $suffix = $m eq "-bob" ? "_hash" : "_nul";
42 |   my $base = "pperf$suffix";
43 |   my $out = "$base.c";
44 |   test_wmain($m, 1, $key, $ph->perfecthash($key), $suffix, 1);
45 |   $i++;
46 |   $ph->save_c($base);
47 |   if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) {
48 |     my $cmd = compile_static($ph, $suffix);
49 |     diag($cmd) if $ENV{TEST_VERBOSE};
50 |     my $retval = system($cmd);
51 |     if (ok(!($retval>>8), "could compile $m")) {
52 |       my $retstr = $^O eq 'MSWin32' ? `$base` : `./$base`;
53 |       $retval = $?;
54 |       TODO: {
55 |         local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m} and $m !~ /^-cmph/;
56 |         like($retstr, qr/^ok - c lookup exists/m, "$m c lookup exists");
57 |       }
58 |       TODO: {
59 |         local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m};
60 |         like($retstr, qr/^ok - c lookup notexists/m, "$m c lookup notexists");
61 |       }
62 |     } else {
63 |       ok(1, "SKIP") for 1..2;
64 |     }
65 |     TODO: {
66 |       local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes
67 |       ok(!($retval>>8), "could run $m");
68 |     }
69 |   } else {
70 |     ok(1, "SKIP") for 1..3;
71 |   }
72 |   unlink("$base","$base.c","$base.h","main$suffix.c") if $default;
73 | }
74 | 


--------------------------------------------------------------------------------
/t/06save_c_utf8.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use bytes;
 6 | use lib 't';
 7 | require "test.pl";
 8 | 
 9 | my ($default, $methods, $opts) = opt_parse_args('-max-time', 10);
10 | 
11 | plan tests => 4 * scalar(@$methods);
12 | 
13 | my ($dict, $dictarr, $size) = opt_dict_size($opts, "examples/utf8");
14 | my @dict = @$dictarr;
15 | 
16 | # CMPH worked fine for some time
17 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz_ph'};
18 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz'};
19 | #delete $Perfect::Hash::algo_todo{'-cmph-bmz'};
20 | delete $Perfect::Hash::algo_todo{'-cmph-chm'};
21 | #delete $Perfect::Hash::algo_todo{'-cmph-fch'};
22 | #delete $Perfect::Hash::algo_todo{'-cmph-chd_ph'};
23 | #delete $Perfect::Hash::algo_todo{'-cmph-chd'};
24 | $Perfect::Hash::algo_todo{'-bob'} = 1;
25 | $Perfect::Hash::algo_todo{'-pearson16'} = 1;
26 | 
27 | my @small_dict = @dict[0..200];
28 | my $i = 0;
29 | #my $suffix = "_utf8";
30 | 
31 | for my $m (@$methods) {
32 |   my $used_dict = $m eq '-pearson8'
33 |     ? \@small_dict
34 |     : $m eq '-gperf'
35 |       ? $dictarr
36 |       : $dict;
37 |   my $ph = new Perfect::Hash($used_dict, $m, @$opts);
38 |   unless ($ph) {
39 |     ok(1, "SKIP empty pperf $m") for 1..4;
40 |     $i++;
41 |     next;
42 |   }
43 |   my $suffix = $m eq "-bob" ? "_hash" : "_utf8";
44 |   my $base = "pperf$suffix";
45 |   test_wmain_all($m, \@dict, $opts, $suffix);
46 |   $i++;
47 |   $ph->save_c($base);
48 |   # utf8 seqs being split on word boundaries with -switch in comments caused
49 |   # emacs display a randomly wrong encoding - mojibake.
50 |   open my $FH, ">>", "$base.c";
51 |   print $FH "/*\nLocal variables:\n  mode: C\n  coding: utf-8-unix\nEnd:\n*/";
52 |   close $FH;
53 |   if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) {
54 |     my $cmd = compile_static($ph, $suffix);
55 |     diag($cmd) if $ENV{TEST_VERBOSE};
56 |     my $retval = system($cmd);
57 |     if (ok(!($retval>>8), "could compile $m")) {
58 |       my $retstr = $^O eq 'MSWin32' ? `$base` : `./$base`;
59 |       $retval = $?;
60 |       TODO: {
61 |         local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m};
62 |         is($retval>>8, 0, "no c lookup errors $m");
63 |         diag($retstr) if $retval>>8 and $ENV{TEST_VERBOSE};
64 |       }
65 |     } else {
66 |       ok(1, "SKIP !compile");
67 |     }
68 |   TODO: {
69 |     local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes
70 |     ok(!($retval>>8), "could run $m");
71 |     }
72 |   } else {
73 |     ok(1, "SKIP !save_c") for 1..3;
74 |   }
75 |   unlink("$base","$base.c","$base.h","main$suffix.c") if $default;
76 | }
77 | 


--------------------------------------------------------------------------------
/t/07save_c_pic.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl -w
 2 | use Test::More;
 3 | use Perfect::Hash;
 4 | 
 5 | use bytes;
 6 | use lib 't';
 7 | require "test.pl";
 8 | 
 9 | my ($default, $methods, $opts) = opt_parse_args('-max-time', 10);
10 | 
11 | plan tests => 4 * scalar(@$methods);
12 | 
13 | my ($dict, $dictarr, $size) = opt_dict_size($opts, "examples/words500");
14 | my @dict = @$dictarr;
15 | 
16 | # CMPH worked fine for some time
17 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz_ph'};
18 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz'};
19 | #delete $Perfect::Hash::algo_todo{'-cmph-bmz'};
20 | delete $Perfect::Hash::algo_todo{'-cmph-chm'};
21 | #delete $Perfect::Hash::algo_todo{'-cmph-fch'};
22 | #delete $Perfect::Hash::algo_todo{'-cmph-chd_ph'};
23 | #delete $Perfect::Hash::algo_todo{'-cmph-chd'};
24 | $Perfect::Hash::algo_todo{'-bob'} = 1;
25 | $Perfect::Hash::algo_todo{'-pearson16'} = 1;
26 | 
27 | my @small_dict = @dict[0..200];
28 | my $i = 0;
29 | 
30 | for my $m (@$methods) {
31 |   my $used_dict = $m eq '-pearson8'
32 |     ? \@small_dict
33 |     : $m eq '-gperf'
34 |       ? $dictarr
35 |       : $dict;
36 |   my $ph = new Perfect::Hash($used_dict, $m, "-pic", @$opts);
37 |   unless ($ph) {
38 |     ok(1, "SKIP empty pperf $m") for 1..4;
39 |     $i++;
40 |     next;
41 |   }
42 |   my $suffix = $m eq "-bob" ? "_hash" : "_pic";
43 |   my $base = "pperf$suffix";
44 |   test_wmain_all($m, \@dict, $opts, $suffix);
45 |   $i++;
46 |   $ph->save_c($base);
47 |   # utf8 seqs being split on word boundaries with -switch in comments caused
48 |   # emacs display a randomly wrong encoding - mojibake.
49 |   open my $FH, ">>", "$base.c";
50 |   print $FH "/*\nLocal variables:\n  mode: C\n  coding: utf-8-unix\nEnd:\n*/";
51 |   close $FH;
52 |   if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) {
53 |     my $cmd = compile_shared($ph, $suffix);
54 |     diag($cmd) if $ENV{TEST_VERBOSE};
55 |     my $cmd1 = link_shared($ph, $suffix);
56 |     my $retval = system($cmd);
57 |     if (!($retval>>8)) {
58 |       print "$cmd1\n" if $ENV{TEST_VERBOSE};
59 |       $retval = system($cmd1);
60 |     }
61 |     if (ok(!($retval>>8), "could compile $m")) {
62 |       my $callprefix = $^O eq 'MSWin32' ? ""
63 |         : $^O eq 'darwin' ? "DYLD_LIBRARY_PATH=. ./"
64 |         : "LD_LIBRARY_PATH=. ./";
65 |       my $retstr = `${callprefix}$base`;
66 |       $retval = $?;
67 |       TODO: {
68 |         local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m};
69 |         is($retval>>8, 0, "no c lookup errors $m");
70 |         diag($retstr) if $retval>>8 and $ENV{TEST_VERBOSE};
71 |       }
72 |     } else {
73 |       ok(1, "SKIP !compile");
74 |     }
75 |   TODO: {
76 |     local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes
77 |     ok(!($retval>>8), "could run $m");
78 |     }
79 |   } else {
80 |     ok(1, "SKIP !save_c") for 1..3;
81 |   }
82 |   unlink("$base","$base.c","$base.h","main$suffix.c") if $default;
83 | }
84 | 


--------------------------------------------------------------------------------
/t/z_kwalitee.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | use Test::More;
 5 | 
 6 | plan skip_all => 'This test is only run for the module author'
 7 |     unless -d '.git' || $ENV{IS_MAINTAINER};
 8 | plan skip_all => 'This test requires RELEASE_TESTING or AUTHOR_TESTING'
 9 |     if !$ENV{AUTHOR_TESTING} and !$ENV{RELEASE_TESTING};
10 | 
11 | use File::Copy 'cp';
12 | cp('MYMETA.yml','META.yml') if -e 'MYMETA.yml' and !-e 'META.yml';
13 | 
14 | eval { require Test::Kwalitee; Test::Kwalitee->import(tests => ['-no_symlinks']) };
15 | plan skip_all => "Test::Kwalitee needed for testing kwalitee"
16 |     if $@;
17 | 


--------------------------------------------------------------------------------
/t/z_meta.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | # Test that our META.yml file matches the current specification.
 4 | 
 5 | use strict;
 6 | BEGIN {
 7 |   $|  = 1;
 8 |   $^W = 1;
 9 | }
10 | 
11 | my $MODULE = 'Test::CPAN::Meta 0.12';
12 | 
13 | # Don't run tests for installs
14 | use Test::More;
15 | plan skip_all => 'This test is only run for the module author'
16 |     unless -d '.git' || $ENV{IS_MAINTAINER};
17 | plan skip_all => 'META is autogenerated'
18 |     unless -e 'META.yml';
19 | 
20 | # Load the testing module
21 | eval "use $MODULE;";
22 | if ( $@ ) {
23 |   plan( skip_all => "$MODULE not available for testing" );
24 |   die "Failed to load required release-testing module $MODULE 0.12"
25 |     if -d '.git' || $ENV{IS_MAINTAINER};
26 | }
27 | meta_yaml_ok();
28 | 


--------------------------------------------------------------------------------
/t/z_pod-coverage.t:
--------------------------------------------------------------------------------
 1 | use strict;
 2 | use warnings;
 3 | 
 4 | use Test::More;
 5 | 
 6 | plan skip_all => 'This test is only run for the module author'
 7 |     unless -d '.git' || $ENV{IS_MAINTAINER};
 8 | plan skip_all => 'This test requires RELEASE_TESTING or AUTHOR_TESTING'
 9 |     if !$ENV{AUTHOR_TESTING} and !$ENV{RELEASE_TESTING};
10 | 
11 | eval "use Test::Pod::Coverage 1.04";
12 | plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage"
13 |     if $@;
14 | 
15 | all_pod_coverage_ok( { trustme => [ qr/constant/ ] } );
16 | 


--------------------------------------------------------------------------------
/t/z_pod.t:
--------------------------------------------------------------------------------
1 | # -*- perl -*-
2 | use Test::More;
3 | eval "use Test::Pod 1.00";
4 | plan skip_all => "Test::Pod 1.00 required for testing POD" if $@;
5 | all_pod_files_ok();
6 | 


--------------------------------------------------------------------------------