├── .gdbinit ├── .gitignore ├── .gitmodules ├── .travis.yml ├── .whitesource ├── Changes ├── Hash.xs ├── MANIFEST ├── Makefile.PL ├── README.md ├── cmph-2.0.tar.gz ├── cmph-2.0 ├── AUTHORS ├── CMPH.xs ├── COPYING ├── ChangeLog ├── INSTALL ├── LGPL-2 ├── MPL-1.1 ├── Makefile.PL ├── Makefile.am ├── Makefile.in ├── NEWS ├── README ├── acinclude.m4 ├── aclocal.m4 ├── cmph.pc.in ├── cmph.spec ├── compile ├── config.guess ├── config.h.in ├── config.sub ├── configure ├── configure.ac ├── cxxmph.pc.in ├── cxxmph │ ├── Makefile.am │ ├── Makefile.in │ ├── MurmurHash3.cpp │ ├── MurmurHash3.h │ ├── benchmark.cc │ ├── benchmark.h │ ├── bm_common.cc │ ├── bm_common.h │ ├── bm_map.cc │ ├── cxxmph.cc │ ├── dense_hash_map_test.cc │ ├── hollow_iterator.h │ ├── hollow_iterator_test.cc │ ├── map_tester.cc │ ├── map_tester.h │ ├── map_tester_test.cc │ ├── mph_bits.cc │ ├── mph_bits.h │ ├── mph_bits_test.cc │ ├── mph_index.cc │ ├── mph_index.h │ ├── mph_index_test.cc │ ├── mph_map.h │ ├── mph_map_test.cc │ ├── seeded_hash.h │ ├── seeded_hash_test.cc │ ├── string_util.cc │ ├── string_util.h │ ├── string_util_test.cc │ ├── stringpiece.h │ ├── test.cc │ ├── test.h │ ├── test_test.cc │ ├── trigraph.cc │ ├── trigraph.h │ └── trigraph_test.cc ├── depcomp ├── examples │ ├── Makefile.am │ ├── Makefile.in │ ├── file_adapter_ex2.c │ ├── struct_vector_adapter_ex3.c │ └── vector_adapter_ex1.c ├── install-sh ├── ltmain.sh ├── m4 │ ├── libtool.m4 │ ├── ltoptions.m4 │ ├── ltsugar.m4 │ ├── ltversion.m4 │ └── lt~obsolete.m4 ├── man │ ├── Makefile.am │ ├── Makefile.in │ └── cmph.1 ├── missing ├── src │ ├── Makefile.am │ ├── Makefile.in │ ├── bdz.c │ ├── bdz.h │ ├── bdz_ph.c │ ├── bdz_ph.h │ ├── bdz_structs.h │ ├── bdz_structs_ph.h │ ├── bitbool.h │ ├── bm_numbers.c │ ├── bmz.c │ ├── bmz.h │ ├── bmz8.c │ ├── bmz8.h │ ├── bmz8_structs.h │ ├── bmz_structs.h │ ├── brz.c │ ├── brz.h │ ├── brz_structs.h │ ├── buffer_entry.c │ ├── buffer_entry.h │ ├── buffer_manager.c │ ├── buffer_manager.h │ ├── chd.c │ ├── chd.h │ ├── chd_ph.c │ ├── chd_ph.h │ ├── chd_structs.h │ ├── chd_structs_ph.h │ ├── chm.c │ ├── chm.h │ ├── chm_structs.h │ ├── cmph.c │ ├── cmph.h │ ├── cmph_benchmark.c │ ├── cmph_benchmark.h │ ├── cmph_structs.c │ ├── cmph_structs.h │ ├── cmph_time.h │ ├── cmph_types.h │ ├── compressed_rank.c │ ├── compressed_rank.h │ ├── compressed_seq.c │ ├── compressed_seq.h │ ├── debug.h │ ├── fch.c │ ├── fch.h │ ├── fch_buckets.c │ ├── fch_buckets.h │ ├── fch_structs.h │ ├── graph.c │ ├── graph.h │ ├── hash.c │ ├── hash.h │ ├── hash_state.h │ ├── jenkins_hash.c │ ├── jenkins_hash.h │ ├── linear_string_map.c │ ├── linear_string_map.h │ ├── main.c │ ├── miller_rabin.c │ ├── miller_rabin.h │ ├── select.c │ ├── select.h │ ├── select_lookup_tables.h │ ├── vqueue.c │ ├── vqueue.h │ ├── vstack.c │ ├── vstack.h │ ├── wingetopt.c │ └── wingetopt.h ├── test-driver └── tests │ ├── Makefile.am │ ├── Makefile.in │ ├── cmph_benchmark_test.c │ ├── compressed_rank_tests.c │ ├── compressed_seq_tests.c │ ├── graph_tests.c │ ├── mphf_tests.c │ ├── packed_mphf_tests.c │ └── select_tests.c ├── examples ├── all-bench.sh ├── bench.pl ├── epmh.py ├── utf8 ├── words20 └── words500 ├── inc └── Devel │ └── CheckLib.pm ├── lib └── Perfect │ ├── Hash.pm │ ├── Hash │ ├── Bob.pm │ ├── C.pm │ ├── CMPH.pm │ ├── CMPH │ │ ├── BDZ.pm │ │ ├── BDZ_PH.pm │ │ ├── BMZ.pm │ │ ├── BMZ8.pm │ │ ├── BRZ.pm │ │ ├── CHD.pm │ │ ├── CHD_PH.pm │ │ ├── CHM.pm │ │ └── FCH.pm │ ├── Cuckoo.pm │ ├── Gperf.pm │ ├── Hanov.pm │ ├── HanovPP.pm │ ├── MoreHashes.pm │ ├── Pearson.pm │ ├── Pearson16.pm │ ├── Pearson32.pm │ ├── Pearson8.pm │ ├── PearsonNP.pm │ ├── Switch.pm │ ├── Urban.pm │ └── XS.pm │ └── PerfectHashInt.pm ├── ppport.h ├── script └── pperf.PL └── t ├── 00basic.t ├── 01words.t ├── 02sysdict.t ├── 03false-positives.t ├── 04save_c.t ├── 05save_c_nul.t ├── 06save_c_utf8.t ├── 07save_c_pic.t ├── test.pl ├── z_kwalitee.t ├── z_meta.t ├── z_pod-coverage.t └── z_pod.t /.gdbinit: -------------------------------------------------------------------------------- 1 | add-auto-load-safe-path /lib/x86_64-linux-gnu/libthread_db-1.0.so 2 | 3 | define sdump 4 | p/x *$arg0 5 | call Perl_sv_dump($arg0) 6 | end 7 | document sdump 8 | sdump sv => p/x *sv; Perl_sv_dump(sv) 9 | see `help tsdump` 10 | end 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.bs 3 | *.o 4 | .coveralls.yml 5 | /Debian_CPANTS.txt 6 | /Perfect-Hash-*.tar.gz 7 | /cmph-*.tar.gz 8 | cmph-2.0/CMPH.c 9 | cmph-2.0/Makefile.orig 10 | cmph-2.0/TAGS 11 | cmph-2.0/autom4te.cache/ 12 | examples/words 13 | script/pperf 14 | META.json 15 | META.yml 16 | MYMETA.json 17 | MYMETA.yml 18 | Makefile 19 | Makefile.old 20 | Hash.c 21 | Hash.i 22 | Hash.o 23 | Hash.obj 24 | Hash.c.gcov 25 | Hash.xs.gcov 26 | CMPH.c.gcov 27 | CMPH.xs.gcov 28 | Hash.gcda 29 | Hash.gcno 30 | pm_to_blib 31 | /blib 32 | /cover_db 33 | /perf.data 34 | /log.test-* 35 | /log.bench-* 36 | cmph-2.0/CMPH.gcda 37 | cmph-2.0/CMPH.gcno 38 | cmph-2.0/bin/ 39 | cmph-2.0/blib/ 40 | cmph-2.0/cmph.pc 41 | cmph-2.0/config.h 42 | cmph-2.0/config.log 43 | cmph-2.0/config.status 44 | cmph-2.0/cxxmph.pc 45 | cmph-2.0/cxxmph/.deps/ 46 | cmph-2.0/examples/.deps/ 47 | cmph-2.0/examples/.libs/ 48 | cmph-2.0/examples/file_adapter_ex2 49 | cmph-2.0/examples/struct_vector_adapter_ex3 50 | cmph-2.0/examples/vector_adapter_ex1 51 | cmph-2.0/include/ 52 | cmph-2.0/lib/ 53 | cmph-2.0/libtool 54 | cmph-2.0/share/ 55 | cmph-2.0/src/.deps/ 56 | cmph-2.0/src/.libs/ 57 | cmph-2.0/src/bdz.lo 58 | cmph-2.0/src/bdz_ph.lo 59 | cmph-2.0/src/bm_numbers 60 | cmph-2.0/src/bmz.lo 61 | cmph-2.0/src/bmz8.lo 62 | cmph-2.0/src/brz.lo 63 | cmph-2.0/src/buffer_entry.lo 64 | cmph-2.0/src/buffer_manager.lo 65 | cmph-2.0/src/chd.lo 66 | cmph-2.0/src/chd_ph.lo 67 | cmph-2.0/src/chm.lo 68 | cmph-2.0/src/cmph 69 | cmph-2.0/src/cmph.lo 70 | cmph-2.0/src/cmph_benchmark.lo 71 | cmph-2.0/src/cmph_structs.lo 72 | cmph-2.0/src/compressed_rank.lo 73 | cmph-2.0/src/compressed_seq.lo 74 | cmph-2.0/src/fch.lo 75 | cmph-2.0/src/fch_buckets.lo 76 | cmph-2.0/src/graph.lo 77 | cmph-2.0/src/hash.lo 78 | cmph-2.0/src/jenkins_hash.lo 79 | cmph-2.0/src/libcmph.la 80 | cmph-2.0/src/linear_string_map.lo 81 | cmph-2.0/src/miller_rabin.lo 82 | cmph-2.0/src/select.lo 83 | cmph-2.0/src/vqueue.lo 84 | cmph-2.0/src/vstack.lo 85 | cmph-2.0/stamp-h1 86 | cmph-2.0/tests/.deps/ 87 | cmph-2.0/tests/.libs/ 88 | cmph-2.0/tests/cmph_benchmark_test 89 | cmph-2.0/tests/compressed_rank_tests 90 | cmph-2.0/tests/compressed_seq_tests 91 | cmph-2.0/tests/graph_tests 92 | cmph-2.0/tests/mphf_tests 93 | cmph-2.0/tests/packed_mphf_tests 94 | cmph-2.0/tests/select_tests 95 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "jenkins-minimal-perfect-hash"] 2 | path = bob 3 | url = git://github.com/rurban/jenkins-minimal-perfect-hash.git 4 | [submodule "nbperf"] 5 | path = nbperf 6 | url = https://github.com/rurban/nbperf 7 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: "perl" 2 | sudo: false 3 | perl: 4 | # - "5.6.2" 5 | - "5.8" 6 | - "5.10" 7 | - "5.12" 8 | - "5.14" 9 | - "5.16" 10 | - "5.18" 11 | - "5.20" 12 | - "5.22" 13 | - "5.22-thr" 14 | - "5.22-dbg" 15 | - "5.22-thr-dbg" 16 | - "5.22-mb" 17 | - "dev" 18 | - "blead" 19 | 20 | addons: 21 | apt: 22 | packages: 23 | - gperf 24 | 25 | # blead and 5.6 stumble over YAML and more missing dependencies 26 | # for Devel::Cover::Report::Coveralls 27 | # cpanm does not do 5.6 28 | before_install: 29 | - mkdir /home/travis/bin || true 30 | - ln -s `which true` /home/travis/bin/cpansign 31 | - eval $(curl https://travis-perl.github.io/init) --auto 32 | install: 33 | - export AUTOMATED_TESTING=1 HARNESS_TIMER=1 AUTHOR_TESTING=0 RELEASE_TESTING=0 34 | - cpan-install --deps # installs prereqs, including recommends 35 | - cpan-install --coverage # installs converage prereqs, if enabled 36 | 37 | before_script: 38 | - gperf --version 39 | - coverage-setup 40 | 41 | notifications: 42 | email: 43 | on_success: change 44 | on_failure: always 45 | 46 | matrix: 47 | fast_finish: true 48 | allow_failures: 49 | - perl: "dev" 50 | - perl: "blead" 51 | include: 52 | - perl: 5.18 53 | env: COVERAGE=1 # enables coverage+coveralls reporting 54 | 55 | # Hack to not run on tag pushes: 56 | branches: 57 | except: 58 | - /^v?[0-9]+\.[0-9]+/ 59 | -------------------------------------------------------------------------------- /.whitesource: -------------------------------------------------------------------------------- 1 | { 2 | "generalSettings": { 3 | "shouldScanRepo": true 4 | }, 5 | "checkRunSettings": { 6 | "vulnerableCheckRunConclusionLevel": "failure" 7 | } 8 | } -------------------------------------------------------------------------------- /Changes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/Changes -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | bob/Makefile.PL 2 | bob/Bob.xs 3 | Changes 4 | cmph-2.0.tar.gz 5 | cmph-2.0/Makefile.PL 6 | cmph-2.0/CMPH.xs 7 | examples/all-bench.sh 8 | examples/bench.pl 9 | examples/epmh.py 10 | examples/utf8 11 | examples/words20 12 | examples/words500 13 | inc/Devel/CheckLib.pm 14 | lib/Perfect/Hash.pm 15 | lib/Perfect/Hash/Bob.pm 16 | lib/Perfect/Hash/C.pm 17 | lib/Perfect/Hash/CMPH.pm 18 | lib/Perfect/Hash/CMPH/CHM.pm 19 | lib/Perfect/Hash/CMPH/BDZ_PH.pm 20 | lib/Perfect/Hash/CMPH/BDZ.pm 21 | lib/Perfect/Hash/CMPH/BMZ.pm 22 | lib/Perfect/Hash/CMPH/BMZ8.pm 23 | lib/Perfect/Hash/CMPH/BRZ.pm 24 | lib/Perfect/Hash/CMPH/CHD.pm 25 | lib/Perfect/Hash/CMPH/CHD_PH.pm 26 | lib/Perfect/Hash/CMPH/FCH.pm 27 | lib/Perfect/Hash/Cuckoo.pm 28 | lib/Perfect/Hash/Gperf.pm 29 | lib/Perfect/Hash/Hanov.pm 30 | lib/Perfect/Hash/HanovPP.pm 31 | lib/Perfect/Hash/MoreHashes.pm 32 | lib/Perfect/Hash/Pearson.pm 33 | lib/Perfect/Hash/PearsonNP.pm 34 | lib/Perfect/Hash/Pearson16.pm 35 | lib/Perfect/Hash/Pearson32.pm 36 | lib/Perfect/Hash/Pearson8.pm 37 | lib/Perfect/Hash/Switch.pm 38 | lib/Perfect/Hash/Urban.pm 39 | lib/Perfect/Hash/XS.pm 40 | Makefile.PL 41 | MANIFEST 42 | META.json 43 | META.yml 44 | ppport.h 45 | README.md 46 | script/pperf.PL 47 | t/00basic.t 48 | t/01words.t 49 | t/02sysdict.t 50 | t/03false-positives.t 51 | t/04save_c.t 52 | t/05save_c_nul.t 53 | t/06save_c_utf8.t 54 | t/07save_c_pic.t 55 | t/z_kwalitee.t 56 | t/z_meta.t 57 | t/z_pod-coverage.t 58 | t/z_pod.t 59 | -------------------------------------------------------------------------------- /cmph-2.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/cmph-2.0.tar.gz -------------------------------------------------------------------------------- /cmph-2.0/AUTHORS: -------------------------------------------------------------------------------- 1 | Davi de Castro Reis davi@users.sourceforge.net 2 | Djamel Belazzougui db8192@users.sourceforge.net 3 | Fabiano Cupertino Botelho fc_botelho@users.sourceforge.net 4 | Nivio Ziviani nivio@dcc.ufmg.br 5 | -------------------------------------------------------------------------------- /cmph-2.0/CMPH.xs: -------------------------------------------------------------------------------- 1 | /* -*- mode:C tab-width:4 -*- */ 2 | #define PERL_NO_GET_CONTEXT 3 | #include "EXTERN.h" 4 | #include "perl.h" 5 | #include "XSUB.h" 6 | 7 | #include "cmph.h" 8 | 9 | #if PERL_VERSION < 10 10 | # define USE_PPPORT_H 11 | #endif 12 | 13 | #ifdef USE_PPPORT_H 14 | # include "../ppport.h" 15 | #endif 16 | 17 | MODULE = Perfect::Hash::CMPH PACKAGE = Perfect::Hash::CMPH 18 | 19 | SV* 20 | _new(class, keyfile, ...) 21 | SV* class 22 | SV* keyfile 23 | CODE: 24 | { 25 | int i; 26 | UV size; 27 | AV *result; 28 | HV *options; 29 | FILE * keys_fd = NULL; 30 | cmph_io_adapter_t *key_source; 31 | cmph_config_t *mph; 32 | cmph_t *mphf; 33 | unsigned char *packed; 34 | CMPH_ALGO algo = CMPH_CHM; 35 | const char *classname = SvPVX(class); 36 | 37 | if (SvPOK(keyfile)) { 38 | keys_fd = fopen(SvPVX(keyfile), "r"); 39 | key_source = cmph_io_nlfile_adapter(keys_fd); 40 | } else { 41 | if (SvTYPE(keyfile) == SVt_PVAV) { 42 | } else if (SvTYPE(keyfile) == SVt_PVHV) { 43 | } 44 | /* XXX support arrayrefs at least, probably created via nvecset 45 | and use the io_vector or io_byte_vector adapter */ 46 | warn("CMPH only accepts filenames yet\n"); 47 | /*keys_fd = fopen("examples/words500", "r"); 48 | key_source = cmph_io_nlfile_adapter(keys_fd);*/ 49 | XSRETURN_UNDEF; 50 | } 51 | if (!strcmp(classname, "Perfect::Hash::CMPH::CHM")) algo = CMPH_CHM; 52 | else if (!strcmp(classname, "Perfect::Hash::CMPH::BMZ")) algo = CMPH_BMZ; 53 | else if (!strcmp(classname, "Perfect::Hash::CMPH::BMZ8")) algo = CMPH_BMZ8; 54 | else if (!strcmp(classname, "Perfect::Hash::CMPH::BRZ")) algo = CMPH_BRZ; 55 | else if (!strcmp(classname, "Perfect::Hash::CMPH::FCH")) algo = CMPH_FCH; 56 | else if (!strcmp(classname, "Perfect::Hash::CMPH::BDZ")) algo = CMPH_BDZ; 57 | else if (!strcmp(classname, "Perfect::Hash::CMPH::BDZ_PH")) algo = CMPH_BDZ_PH; 58 | else if (!strcmp(classname, "Perfect::Hash::CMPH::CHD")) algo = CMPH_CHD; 59 | else if (!strcmp(classname, "Perfect::Hash::CMPH::CHD_PH")) algo = CMPH_CHD_PH; 60 | mph = cmph_config_new(key_source); 61 | if (algo != CMPH_CHM) 62 | cmph_config_set_algo(mph, algo); 63 | mphf = cmph_new(mph); 64 | if (!mphf) { 65 | fprintf(stderr, "Failed to create mphf for algorithm %s", classname); 66 | XSRETURN_UNDEF; 67 | } 68 | result = newAV(); 69 | av_push(result, newSViv(PTR2IV(mphf))); /* mphf in [0] */ 70 | size = cmph_packed_size(mphf); 71 | if (!size) { 72 | fprintf(stderr, "Failed to calculate cmph_packed_size for algorithm %s", classname); 73 | XSRETURN_UNDEF; 74 | } 75 | packed = (unsigned char *)malloc(size); 76 | cmph_pack(mphf, packed); 77 | av_push(result, newSVpvn((char *)packed, size)); /* packed in [1] */ 78 | options = newHV(); 79 | for (i=2; i 'TRUE') 7 | if (($PERL_VERSION >= 5.005) and ($OSNAME eq 'MSWin32') 8 | and ($Config{archname} =~ /-object\b/i)); 9 | 10 | push(@extras, 11 | ABSTRACT => "XS interface to cmph", 12 | AUTHOR => 'Reini Urban ') 13 | if $EUMM_VER gt '5.4301'; 14 | push(@extras, 15 | META_MERGE => { 16 | license => [ 'mozilla_1_1', 'lgpl_2_1' ] 17 | }) 18 | if $EUMM_VER gt '6.46'; 19 | 20 | #TODO: integrate into proper build and depend rules 21 | if (! -e "bin/cmph".$Config{exe_ext}) { 22 | print "building cmph-2.0\n"; 23 | system("autoreconf -i"); 24 | system("./configure --prefix=`pwd`"); 25 | system($Config{'make'}, "-s"); 26 | system($Config{'make'}, "-s", "install"); 27 | rename "Makefile", "Makefile.orig"; 28 | } 29 | 30 | WriteMakefile( 31 | NAME => 'Perfect::Hash::CMPH', 32 | XS => { 'CMPH.xs' => 'CMPH.c' }, 33 | XSPROTOARG => '-noprototypes', 34 | INC => '-Iinclude', 35 | LIBS => '-Llib -lcmph', 36 | VERSION_FROM => '../lib/Perfect/Hash/CMPH.pm', 37 | clean => { FILES => "*.gcov *.gcda *.gcno" }, 38 | @extras 39 | ); 40 | 41 | -------------------------------------------------------------------------------- /cmph-2.0/Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = src tests examples man $(CXXMPH) 2 | EXTRA_DIST = cmph.spec configure.ac cmph.pc.in cxxmph.pc.in LGPL-2 MPL-1.1 3 | pkgconfig_DATA = cmph.pc 4 | if USE_CXXMPH 5 | pkgconfig_DATA += cxxmph.pc 6 | endif 7 | 8 | pkgconfigdir = $(libdir)/pkgconfig 9 | -------------------------------------------------------------------------------- /cmph-2.0/NEWS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rurban/Perfect-Hash/074531b94ef68883014b15bdbe7c80bd85c45e6f/cmph-2.0/NEWS -------------------------------------------------------------------------------- /cmph-2.0/cmph.pc.in: -------------------------------------------------------------------------------- 1 | url=http://cmph.sourceforge.net/ 2 | prefix=@prefix@ 3 | exec_prefix=@exec_prefix@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: cmph 8 | Description: minimal perfect hashing library 9 | Version: @VERSION@ 10 | Libs: -L${libdir} -lcmph 11 | Cflags: -I${includedir} 12 | URL: ${url} 13 | -------------------------------------------------------------------------------- /cmph-2.0/cmph.spec: -------------------------------------------------------------------------------- 1 | %define name cmph 2 | %define version 0.4 3 | %define release 3 4 | 5 | Name: %{name} 6 | Version: %{version} 7 | Release: %{release} 8 | Summary: C Minimal perfect hash library 9 | Source: %{name}-%{version}.tar.gz 10 | License: Proprietary 11 | URL: http://www.akwan.com.br 12 | BuildArch: i386 13 | Group: Sitesearch 14 | BuildRoot: %{_tmppath}/%{name}-root 15 | 16 | %description 17 | C Minimal perfect hash library 18 | 19 | %prep 20 | rm -Rf $RPM_BUILD_ROOT 21 | rm -rf $RPM_BUILD_ROOT 22 | %setup 23 | mkdir $RPM_BUILD_ROOT 24 | mkdir $RPM_BUILD_ROOT/usr 25 | CXXFLAGS="-O2" ./configure --prefix=/usr/ 26 | 27 | %build 28 | make 29 | 30 | %install 31 | DESTDIR=$RPM_BUILD_ROOT make install 32 | 33 | %files 34 | %defattr(755,root,root) 35 | / 36 | 37 | %changelog 38 | * Tue Jun 1 2004 Davi de Castro Reis 39 | + Initial build 40 | -------------------------------------------------------------------------------- /cmph-2.0/config.h.in: -------------------------------------------------------------------------------- 1 | /* config.h.in. Generated from configure.ac by autoheader. */ 2 | 3 | /* Define to 1 if you have the header file. */ 4 | #undef HAVE_DLFCN_H 5 | 6 | /* Define to 1 if you have the header file. */ 7 | #undef HAVE_GETOPT_H 8 | 9 | /* Define to 1 if you have the header file. */ 10 | #undef HAVE_INTTYPES_H 11 | 12 | /* Define to 1 if you have the `check' library (-lcheck). */ 13 | #undef HAVE_LIBCHECK 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #undef HAVE_MATH_H 17 | 18 | /* Define to 1 if you have the header file. */ 19 | #undef HAVE_MEMORY_H 20 | 21 | /* Define if g++ supports C++0x features. */ 22 | #undef HAVE_STDCXX_0X 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #undef HAVE_STDINT_H 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #undef HAVE_STDLIB_H 29 | 30 | /* Define to 1 if you have the header file. */ 31 | #undef HAVE_STRINGS_H 32 | 33 | /* Define to 1 if you have the header file. */ 34 | #undef HAVE_STRING_H 35 | 36 | /* Define to 1 if you have the header file. */ 37 | #undef HAVE_SYS_STAT_H 38 | 39 | /* Define to 1 if you have the header file. */ 40 | #undef HAVE_SYS_TYPES_H 41 | 42 | /* Define to 1 if you have the header file. */ 43 | #undef HAVE_UNISTD_H 44 | 45 | /* Define to the sub-directory where libtool stores uninstalled libraries. */ 46 | #undef LT_OBJDIR 47 | 48 | /* Name of package */ 49 | #undef PACKAGE 50 | 51 | /* Define to the address where bug reports for this package should be sent. */ 52 | #undef PACKAGE_BUGREPORT 53 | 54 | /* Define to the full name of this package. */ 55 | #undef PACKAGE_NAME 56 | 57 | /* Define to the full name and version of this package. */ 58 | #undef PACKAGE_STRING 59 | 60 | /* Define to the one symbol short name of this package. */ 61 | #undef PACKAGE_TARNAME 62 | 63 | /* Define to the home page for this package. */ 64 | #undef PACKAGE_URL 65 | 66 | /* Define to the version of this package. */ 67 | #undef PACKAGE_VERSION 68 | 69 | /* Define to 1 if you have the ANSI C header files. */ 70 | #undef STDC_HEADERS 71 | 72 | /* Version number of package */ 73 | #undef VERSION 74 | 75 | /* Number of bits in a file offset, on hosts where this is settable. */ 76 | #undef _FILE_OFFSET_BITS 77 | 78 | /* Define to make fseeko etc. visible, on some hosts. */ 79 | #undef _LARGEFILE_SOURCE 80 | 81 | /* Define for large files, on AIX-style hosts. */ 82 | #undef _LARGE_FILES 83 | -------------------------------------------------------------------------------- /cmph-2.0/configure.ac: -------------------------------------------------------------------------------- 1 | dnl Process this file with autoconf to produce a configure script. 2 | AC_INIT 3 | AC_CONFIG_SRCDIR([Makefile.am]) 4 | AM_INIT_AUTOMAKE(cmph, 2.0) 5 | AC_CONFIG_HEADERS([config.h]) 6 | AC_CONFIG_MACRO_DIR([m4]) 7 | 8 | dnl Checks for programs. 9 | AC_PROG_AWK 10 | AC_PROG_CC 11 | AC_PROG_INSTALL 12 | AC_PROG_LN_S 13 | LT_INIT 14 | AC_SYS_EXTRA_LARGEFILE 15 | if test "x$ac_cv_sys_largefile_CFLAGS" = "xno" ; then 16 | ac_cv_sys_largefile_CFLAGS="" 17 | fi 18 | if test "x$ac_cv_sys_largefile_LDFLAGS" = "xno" ; then 19 | ac_cv_sys_largefile_LDFLAGS="" 20 | fi 21 | if test "x$ac_cv_sys_largefile_LIBS" = "xno" ; then 22 | ac_cv_sys_largefile_LIBS="" 23 | fi 24 | CFLAGS="$ac_cv_sys_largefile_CFLAGS $CFLAGS" 25 | LDFLAGS="$ac_cv_sys_largefile_LDFLAGS $LDFLAGS" 26 | LIBS="$LIBS $ac_cv_sys_largefile_LIBS" 27 | 28 | dnl Checks for headers 29 | AC_CHECK_HEADERS([getopt.h math.h]) 30 | 31 | dnl Checks for libraries. 32 | LT_LIB_M 33 | LDFLAGS="$LIBM $LDFLAGS" 34 | CFLAGS="-Wall" 35 | 36 | AC_PROG_CXX 37 | CXXFLAGS="-Wall -Wno-unused-function -DNDEBUG -O3 -fomit-frame-pointer $CXXFLAGS" 38 | AC_ENABLE_CXXMPH 39 | if test x$cxxmph = xtrue; then 40 | AC_COMPILE_STDCXX_0X 41 | if test x$ac_cv_cxx_compile_cxx0x_native = "xno"; then 42 | if test x$ac_cv_cxx_compile_cxx0x_cxx = "xyes"; then 43 | CXXFLAGS="$CXXFLAGS -std=c++0x" 44 | elif test x$ac_cv_cxx_compile_cxx0x_gxx = "xyes"; then 45 | CXXFLAGS="$CXXFLAGS -std=gnu++0x" 46 | else 47 | AC_MSG_ERROR("cxxmph demands a working c++0x compiler.") 48 | fi 49 | fi 50 | AC_SUBST([CXXMPH], "cxxmph") 51 | fi 52 | AM_CONDITIONAL([USE_CXXMPH], [test "$cxxmph" = true]) 53 | 54 | # Unit tests based on the check library. Disabled by default. 55 | # We do not use pkg-config because it is inconvenient for all developers to 56 | # have check library installed. 57 | AC_ARG_ENABLE(check, AS_HELP_STRING( 58 | [--enable-check], 59 | [Build unit tests depending on check library (default: disabled)])) 60 | AS_IF([test "x$enable_check" = "xyes"], 61 | [ AC_CHECK_LIB([check], [tcase_create]) 62 | AS_IF([test "$ac_cv_lib_check_tcase_create" = yes], [CHECK_LIBS="-lcheck"], 63 | [AC_MSG_ERROR("Failed to find check library (http://check.sf.net).")]) 64 | AC_CHECK_HEADER(check.h,[], 65 | [AC_MSG_ERROR("Failed to find check library header (http://check.sf.net).")]) 66 | ]) 67 | AM_CONDITIONAL([USE_LIBCHECK], [test "$ac_cv_lib_check_tcase_create" = yes]) 68 | AC_SUBST(CHECK_LIBS) 69 | AC_SUBST(CHECK_CFLAGS) 70 | 71 | AC_CHECK_SPOON 72 | AC_CONFIG_FILES([Makefile src/Makefile cxxmph/Makefile tests/Makefile examples/Makefile man/Makefile cmph.pc cxxmph.pc]) 73 | AC_OUTPUT 74 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph.pc.in: -------------------------------------------------------------------------------- 1 | url=http://cmph.sourceforge.net/ 2 | prefix=@prefix@ 3 | exec_prefix=@exec_prefix@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: cxxmph 8 | Description: minimal perfect hashing c++11 library 9 | Version: @VERSION@ 10 | Libs: -L${libdir} -lcxxmph 11 | Cflags: -std=c++0x -I${includedir} 12 | URL: ${url} 13 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/Makefile.am: -------------------------------------------------------------------------------- 1 | TESTS = $(check_PROGRAMS) 2 | check_PROGRAMS = seeded_hash_test mph_bits_test hollow_iterator_test mph_index_test trigraph_test 3 | if USE_LIBCHECK 4 | check_PROGRAMS += test_test map_tester_test mph_map_test dense_hash_map_test string_util_test 5 | check_LTLIBRARIES = libcxxmph_test.la 6 | endif 7 | 8 | noinst_PROGRAMS = bm_map # bm_index - disabled because of cmph dependency 9 | bin_PROGRAMS = cxxmph 10 | 11 | cxxmph_includedir = $(includedir)/cxxmph/ 12 | cxxmph_include_HEADERS = mph_bits.h mph_map.h mph_index.h MurmurHash3.h trigraph.h seeded_hash.h stringpiece.h hollow_iterator.h string_util.h 13 | 14 | noinst_LTLIBRARIES = libcxxmph_bm.la 15 | lib_LTLIBRARIES = libcxxmph.la 16 | libcxxmph_la_SOURCES = MurmurHash3.cpp trigraph.cc mph_bits.cc mph_index.cc benchmark.h benchmark.cc string_util.cc 17 | libcxxmph_la_LDFLAGS = -version-info 0:0:0 18 | libcxxmph_test_la_SOURCES = test.h test.cc 19 | libcxxmph_test_la_LIBADD = libcxxmph.la 20 | libcxxmph_bm_la_SOURCES = benchmark.h benchmark.cc bm_common.h bm_common.cc 21 | libcxxmph_bm_la_LIBADD = libcxxmph.la 22 | 23 | test_test_SOURCES = test_test.cc 24 | test_test_LDADD = libcxxmph_test.la $(CHECK_LIBS) 25 | 26 | mph_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS) 27 | mph_map_test_SOURCES = mph_map_test.cc 28 | dense_hash_map_test_LDADD = libcxxmph_test.la $(CHECK_LIBS) 29 | dense_hash_map_test_SOURCES = dense_hash_map_test.cc 30 | 31 | mph_index_test_LDADD = libcxxmph.la 32 | mph_index_test_SOURCES = mph_index_test.cc 33 | 34 | trigraph_test_LDADD = libcxxmph.la 35 | trigraph_test_SOURCES = trigraph_test.cc 36 | 37 | # Bad dependency, do not compile by default. 38 | # bm_index_LDADD = libcxxmph_bm.la -lcmph 39 | # bm_index_SOURCES = bm_index.cc 40 | 41 | bm_map_LDADD = libcxxmph_bm.la 42 | bm_map_SOURCES = bm_map.cc 43 | 44 | cxxmph_LDADD = libcxxmph.la 45 | cxxmph_SOURCES = cxxmph.cc 46 | 47 | hollow_iterator_test_SOURCES = hollow_iterator_test.cc 48 | 49 | seeded_hash_test_SOURCES = seeded_hash_test.cc 50 | seeded_hash_test_LDADD = libcxxmph.la 51 | 52 | mph_bits_test_SOURCES = mph_bits_test.cc 53 | mph_bits_test_LDADD = libcxxmph.la 54 | 55 | string_util_test_SOURCES = string_util_test.cc 56 | string_util_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS) 57 | 58 | map_tester_test_SOURCES = map_tester.h map_tester.cc map_tester_test.cc 59 | map_tester_test_LDADD = libcxxmph.la libcxxmph_test.la $(CHECK_LIBS) 60 | 61 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/MurmurHash3.h: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | #ifndef _MURMURHASH3_H_ 6 | #define _MURMURHASH3_H_ 7 | 8 | //----------------------------------------------------------------------------- 9 | // Platform-specific functions and macros 10 | 11 | // Microsoft Visual Studio 12 | 13 | #if defined(_MSC_VER) 14 | 15 | typedef unsigned char uint8_t; 16 | typedef unsigned long uint32_t; 17 | typedef unsigned __int64 uint64_t; 18 | 19 | // Other compilers 20 | 21 | #else // defined(_MSC_VER) 22 | 23 | #include 24 | 25 | #endif // !defined(_MSC_VER) 26 | 27 | //----------------------------------------------------------------------------- 28 | 29 | void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out ); 30 | 31 | void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); 32 | 33 | void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); 34 | 35 | //----------------------------------------------------------------------------- 36 | 37 | #endif // _MURMURHASH3_H_ 38 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/benchmark.cc: -------------------------------------------------------------------------------- 1 | #include "benchmark.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | using std::cerr; 16 | using std::cout; 17 | using std::endl; 18 | using std::setfill; 19 | using std::setw; 20 | using std::string; 21 | using std::ostringstream; 22 | using std::vector; 23 | 24 | namespace { 25 | 26 | /* Subtract the `struct timeval' values X and Y, 27 | storing the result in RESULT. 28 | Return 1 if the difference is negative, otherwise 0. */ 29 | int timeval_subtract ( 30 | struct timeval *result, struct timeval *x, struct timeval* y) { 31 | /* Perform the carry for the later subtraction by updating y. */ 32 | if (x->tv_usec < y->tv_usec) { 33 | int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; 34 | y->tv_usec -= 1000000 * nsec; 35 | y->tv_sec += nsec; 36 | } 37 | if (x->tv_usec - y->tv_usec > 1000000) { 38 | int nsec = (x->tv_usec - y->tv_usec) / 1000000; 39 | y->tv_usec += 1000000 * nsec; 40 | y->tv_sec -= nsec; 41 | } 42 | 43 | /* Compute the time remaining to wait. 44 | tv_usec is certainly positive. */ 45 | result->tv_sec = x->tv_sec - y->tv_sec; 46 | result->tv_usec = x->tv_usec - y->tv_usec; 47 | 48 | /* Return 1 if result is negative. */ 49 | return x->tv_sec < y->tv_sec; 50 | } 51 | 52 | // C++ iostream is terrible for formatting. 53 | string timeval_to_string(timeval tv) { 54 | ostringstream out; 55 | out << setfill(' ') << setw(3) << tv.tv_sec << '.'; 56 | out << setfill('0') << setw(6) << tv.tv_usec; 57 | return out.str(); 58 | } 59 | 60 | struct rusage getrusage_or_die() { 61 | struct rusage rs; 62 | int ret = getrusage(RUSAGE_SELF, &rs); 63 | if (ret != 0) { 64 | cerr << "rusage failed: " << strerror(errno) << endl; 65 | exit(-1); 66 | } 67 | return rs; 68 | } 69 | 70 | struct timeval gettimeofday_or_die() { 71 | struct timeval tv; 72 | int ret = gettimeofday(&tv, NULL); 73 | if (ret != 0) { 74 | cerr << "gettimeofday failed: " << strerror(errno) << endl; 75 | exit(-1); 76 | } 77 | return tv; 78 | } 79 | 80 | #ifdef HAVE_CXA_DEMANGLE 81 | string demangle(const string& name) { 82 | char buf[1024]; 83 | unsigned int size = 1024; 84 | int status; 85 | char* res = abi::__cxa_demangle( 86 | name.c_str(), buf, &size, &status); 87 | return res; 88 | } 89 | #else 90 | string demangle(const string& name) { return name; } 91 | #endif 92 | 93 | 94 | static vector g_benchmarks; 95 | 96 | } // anonymous namespace 97 | 98 | namespace cxxmph { 99 | 100 | /* static */ void Benchmark::Register(Benchmark* bm) { 101 | if (bm->name().empty()) { 102 | string name = demangle(typeid(*bm).name()); 103 | bm->set_name(name); 104 | } 105 | g_benchmarks.push_back(bm); 106 | } 107 | 108 | /* static */ void Benchmark::RunAll() { 109 | for (uint32_t i = 0; i < g_benchmarks.size(); ++i) { 110 | std::auto_ptr bm(g_benchmarks[i]); 111 | if (!bm->SetUp()) { 112 | cerr << "Set up phase for benchmark " 113 | << bm->name() << " failed." << endl; 114 | continue; 115 | } 116 | bm->MeasureRun(); 117 | bm->TearDown(); 118 | } 119 | } 120 | 121 | void Benchmark::MeasureRun() { 122 | struct timeval walltime_begin = gettimeofday_or_die(); 123 | struct rusage begin = getrusage_or_die(); 124 | Run(); 125 | struct rusage end = getrusage_or_die(); 126 | struct timeval walltime_end = gettimeofday_or_die(); 127 | 128 | struct timeval utime; 129 | timeval_subtract(&utime, &end.ru_utime, &begin.ru_utime); 130 | struct timeval stime; 131 | timeval_subtract(&stime, &end.ru_stime, &begin.ru_stime); 132 | struct timeval wtime; 133 | timeval_subtract(&wtime, &walltime_end, &walltime_begin); 134 | 135 | cout << "Benchmark: " << name_ << endl; 136 | cout << "CPU User time : " << timeval_to_string(utime) << endl; 137 | cout << "CPU System time: " << timeval_to_string(stime) << endl; 138 | cout << "Wall clock time: " << timeval_to_string(wtime) << endl; 139 | cout << endl; 140 | } 141 | 142 | } // namespace cxxmph 143 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/benchmark.h: -------------------------------------------------------------------------------- 1 | #ifndef __CXXMPH_BENCHMARK_H__ 2 | #define __CXXMPH_BENCHMARK_H__ 3 | 4 | #include 5 | #include 6 | 7 | namespace cxxmph { 8 | 9 | class Benchmark { 10 | public: 11 | Benchmark() {} 12 | virtual ~Benchmark() {} 13 | 14 | const std::string& name() { return name_; } 15 | void set_name(const std::string& name) { name_ = name; } 16 | 17 | static void Register(Benchmark* bm); 18 | static void RunAll(); 19 | 20 | protected: 21 | virtual bool SetUp() { return true; }; 22 | virtual void Run() = 0; 23 | virtual bool TearDown() { return true; }; 24 | 25 | private: 26 | std::string name_; 27 | void MeasureRun(); 28 | }; 29 | 30 | } // namespace cxxmph 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/bm_common.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "bm_common.h" 8 | 9 | using std::cerr; 10 | using std::endl; 11 | using std::set; 12 | using std::string; 13 | using std::vector; 14 | 15 | namespace cxxmph { 16 | 17 | UrlsBenchmark::~UrlsBenchmark() {} 18 | bool UrlsBenchmark::SetUp() { 19 | vector urls; 20 | std::ifstream f(urls_file_.c_str()); 21 | if (!f.is_open()) { 22 | cerr << "Failed to open urls file " << urls_file_ << endl; 23 | return false; 24 | } 25 | string buffer; 26 | while(std::getline(f, buffer)) urls.push_back(buffer); 27 | set unique(urls.begin(), urls.end()); 28 | if (unique.size() != urls.size()) { 29 | cerr << "Input file has repeated keys." << endl; 30 | return false; 31 | } 32 | urls.swap(urls_); 33 | return true; 34 | } 35 | 36 | SearchUrlsBenchmark::~SearchUrlsBenchmark() {} 37 | bool SearchUrlsBenchmark::SetUp() { 38 | if (!UrlsBenchmark::SetUp()) return false; 39 | int32_t miss_ratio_int32 = std::numeric_limits::max() * miss_ratio_; 40 | forced_miss_urls_.resize(nsearches_); 41 | random_.resize(nsearches_); 42 | for (uint32_t i = 0; i < nsearches_; ++i) { 43 | random_[i] = urls_[random() % urls_.size()]; 44 | if (random() < miss_ratio_int32) { 45 | forced_miss_urls_[i] = random_[i].as_string() + ".force_miss"; 46 | random_[i] = forced_miss_urls_[i]; 47 | } 48 | } 49 | return true; 50 | } 51 | 52 | Uint64Benchmark::~Uint64Benchmark() {} 53 | bool Uint64Benchmark::SetUp() { 54 | set unique; 55 | for (uint32_t i = 0; i < count_; ++i) { 56 | uint64_t v; 57 | do { v = random(); } while (unique.find(v) != unique.end()); 58 | values_.push_back(v); 59 | unique.insert(v); 60 | } 61 | return true; 62 | } 63 | 64 | SearchUint64Benchmark::~SearchUint64Benchmark() {} 65 | bool SearchUint64Benchmark::SetUp() { 66 | if (!Uint64Benchmark::SetUp()) return false; 67 | random_.resize(nsearches_); 68 | for (uint32_t i = 0; i < nsearches_; ++i) { 69 | uint32_t pos = random() % values_.size(); 70 | random_[i] = values_[pos]; 71 | } 72 | return true; 73 | } 74 | 75 | } // namespace cxxmph 76 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/bm_common.h: -------------------------------------------------------------------------------- 1 | #ifndef __CXXMPH_BM_COMMON_H__ 2 | #define __CXXMPH_BM_COMMON_H__ 3 | 4 | #include "stringpiece.h" 5 | 6 | #include 7 | #include 8 | #include // std::hash 9 | #include "MurmurHash3.h" 10 | 11 | #include "benchmark.h" 12 | 13 | namespace std { 14 | template <> struct hash { 15 | uint32_t operator()(const cxxmph::StringPiece& k) const { 16 | uint32_t out; 17 | MurmurHash3_x86_32(k.data(), k.length(), 1, &out); 18 | return out; 19 | } 20 | }; 21 | } // namespace std 22 | 23 | namespace cxxmph { 24 | 25 | class UrlsBenchmark : public Benchmark { 26 | public: 27 | UrlsBenchmark(const std::string& urls_file) : urls_file_(urls_file) { } 28 | virtual ~UrlsBenchmark(); 29 | protected: 30 | virtual bool SetUp(); 31 | const std::string urls_file_; 32 | std::vector urls_; 33 | }; 34 | 35 | class SearchUrlsBenchmark : public UrlsBenchmark { 36 | public: 37 | SearchUrlsBenchmark(const std::string& urls_file, uint32_t nsearches, float miss_ratio) 38 | : UrlsBenchmark(urls_file), nsearches_(nsearches), miss_ratio_(miss_ratio) {} 39 | virtual ~SearchUrlsBenchmark(); 40 | protected: 41 | virtual bool SetUp(); 42 | const uint32_t nsearches_; 43 | float miss_ratio_; 44 | std::vector forced_miss_urls_; 45 | std::vector random_; 46 | }; 47 | 48 | class Uint64Benchmark : public Benchmark { 49 | public: 50 | Uint64Benchmark(uint32_t count) : count_(count) { } 51 | virtual ~Uint64Benchmark(); 52 | virtual void Run() {} 53 | protected: 54 | virtual bool SetUp(); 55 | const uint32_t count_; 56 | std::vector values_; 57 | }; 58 | 59 | class SearchUint64Benchmark : public Uint64Benchmark { 60 | public: 61 | SearchUint64Benchmark(uint32_t count, uint32_t nsearches) 62 | : Uint64Benchmark(count), nsearches_(nsearches) { } 63 | virtual ~SearchUint64Benchmark(); 64 | virtual void Run() {}; 65 | protected: 66 | virtual bool SetUp(); 67 | const uint32_t nsearches_; 68 | std::vector random_; 69 | }; 70 | 71 | } // namespace cxxmph 72 | 73 | #endif // __CXXMPH_BM_COMMON_H__ 74 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/cxxmph.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2010 Google Inc. All Rights Reserved. 2 | // Author: davi@google.com (Davi Reis) 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "mph_map.h" 12 | #include "config.h" 13 | 14 | using std::cerr; 15 | using std::cout; 16 | using std::endl; 17 | using std::getline; 18 | using std::ifstream; 19 | using std::string; 20 | using std::vector; 21 | 22 | using cxxmph::mph_map; 23 | 24 | void usage(const char* prg) { 25 | cerr << "usage: " << prg << " [-v] [-h] [-V] " << endl; 26 | } 27 | void usage_long(const char* prg) { 28 | usage(prg); 29 | cerr << " -h\t print this help message" << endl; 30 | cerr << " -V\t print version number and exit" << endl; 31 | cerr << " -v\t increase verbosity (may be used multiple times)" << endl; 32 | } 33 | 34 | int main(int argc, char** argv) { 35 | 36 | int verbosity = 0; 37 | while (1) { 38 | char ch = (char)getopt(argc, argv, "hvV"); 39 | if (ch == -1) break; 40 | switch (ch) { 41 | case 'h': 42 | usage_long(argv[0]); 43 | return 0; 44 | case 'V': 45 | std::cout << VERSION << std::endl; 46 | return 0; 47 | case 'v': 48 | ++verbosity; 49 | break; 50 | } 51 | } 52 | if (optind != argc - 1) { 53 | usage(argv[0]); 54 | return 1; 55 | } 56 | vector keys; 57 | ifstream f(argv[optind]); 58 | if (!f.is_open()) { 59 | std::cerr << "Failed to open " << argv[optind] << std::endl; 60 | exit(-1); 61 | } 62 | string buffer; 63 | while (!getline(f, buffer).eof()) keys.push_back(buffer); 64 | for (uint32_t i = 0; i < keys.size(); ++i) string s = keys[i]; 65 | mph_map table; 66 | 67 | for (uint32_t i = 0; i < keys.size(); ++i) table[keys[i]] = keys[i]; 68 | mph_map::const_iterator it = table.begin(); 69 | mph_map::const_iterator end = table.end(); 70 | for (int i = 0; it != end; ++it, ++i) { 71 | cout << i << ": " << it->first 72 | <<" -> " << it->second << endl; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/dense_hash_map_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mph_map.h" 7 | #include "map_tester.h" 8 | #include "test.h" 9 | 10 | using namespace cxxmph; 11 | 12 | typedef MapTester Tester; 13 | 14 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert); 15 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert); 16 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search); 17 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search); 18 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search); 19 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search); 20 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero); 21 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size); 22 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value); 23 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator); 24 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/hollow_iterator.h: -------------------------------------------------------------------------------- 1 | #ifndef __CXXMPH_HOLLOW_ITERATOR_H__ 2 | #define __CXXMPH_HOLLOW_ITERATOR_H__ 3 | 4 | #include 5 | 6 | namespace cxxmph { 7 | 8 | using std::vector; 9 | 10 | template 11 | struct is_empty { 12 | public: 13 | is_empty() : c_(NULL), p_(NULL) {}; 14 | is_empty(const container_type* c, const vector* p) : c_(c), p_(p) {}; 15 | bool operator()(typename container_type::const_iterator it) const { 16 | if (it == c_->end()) return false; 17 | return !(*p_)[it - c_->begin()]; 18 | } 19 | private: 20 | const container_type* c_; 21 | const vector* p_; 22 | }; 23 | 24 | template 25 | struct hollow_iterator_base 26 | : public std::iterator { 28 | public: 29 | typedef hollow_iterator_base self_type; 30 | typedef self_type& self_reference; 31 | typedef typename iterator::reference reference; 32 | typedef typename iterator::pointer pointer; 33 | inline hollow_iterator_base() : it_(), empty_() { } 34 | inline hollow_iterator_base(iterator it, is_empty empty, bool solid) : it_(it), empty_(empty) { 35 | if (!solid) advance(); 36 | } 37 | // Same as above, assumes solid==true. 38 | inline hollow_iterator_base(iterator it, is_empty empty) : it_(it), empty_(empty) {} 39 | inline hollow_iterator_base(const self_type& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; } 40 | template 41 | hollow_iterator_base(const hollow_iterator_base& rhs) { it_ = rhs.it_; empty_ = rhs.empty_; } 42 | 43 | reference operator*() { return *it_; } 44 | pointer operator->() { return &(*it_); } 45 | self_reference operator++() { ++it_; advance(); return *this; } 46 | // self_type operator++() { auto tmp(*this); ++tmp; return tmp; } 47 | 48 | template 49 | bool operator==(const hollow_iterator_base& rhs) { return rhs.it_ == it_; } 50 | template 51 | bool operator!=(const hollow_iterator_base& rhs) { return rhs.it_ != it_; } 52 | 53 | // should be friend 54 | iterator it_; 55 | is_empty empty_; 56 | 57 | private: 58 | void advance() { 59 | while (empty_(it_)) ++it_; 60 | } 61 | }; 62 | 63 | template 64 | inline auto make_solid( 65 | container_type* v, const vector* p, iterator it) -> 66 | hollow_iterator_base> { 67 | return hollow_iterator_base>( 68 | it, is_empty(v, p)); 69 | } 70 | 71 | template 72 | inline auto make_hollow( 73 | container_type* v, const vector* p, iterator it) -> 74 | hollow_iterator_base> { 75 | return hollow_iterator_base>( 76 | it, is_empty(v, p), false); 77 | } 78 | 79 | } // namespace cxxmph 80 | 81 | #endif // __CXXMPH_HOLLOW_ITERATOR_H__ 82 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/hollow_iterator_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | using std::cerr; 8 | using std::endl; 9 | using std::vector; 10 | #include "hollow_iterator.h" 11 | using cxxmph::hollow_iterator_base; 12 | using cxxmph::make_hollow; 13 | using cxxmph::is_empty; 14 | 15 | int main(int argc, char** argv) { 16 | vector v; 17 | vector p; 18 | for (int i = 0; i < 100; ++i) { 19 | v.push_back(i); 20 | p.push_back(i % 2 == 0); 21 | } 22 | auto begin = make_hollow(&v, &p, v.begin()); 23 | auto end = make_hollow(&v, &p, v.end()); 24 | for (auto it = begin; it != end; ++it) { 25 | if (((*it) % 2) != 0) exit(-1); 26 | } 27 | const vector* cv(&v); 28 | auto cbegin(make_hollow(cv, &p, cv->begin())); 29 | auto cend(make_hollow(cv, &p, cv->begin())); 30 | for (auto it = cbegin; it != cend; ++it) { 31 | if (((*it) % 2) != 0) exit(-1); 32 | } 33 | const vector* cp(&p); 34 | cbegin = make_hollow(cv, cp, v.begin()); 35 | cend = make_hollow(cv, cp, cv->end()); 36 | 37 | vector::iterator vit1 = v.begin(); 38 | vector::const_iterator vit2 = v.begin(); 39 | if (vit1 != vit2) exit(-1); 40 | auto it1 = make_hollow(&v, &p, vit1); 41 | auto it2 = make_hollow(&v, &p, vit2); 42 | if (it1 != it2) exit(-1); 43 | 44 | typedef is_empty> iev; 45 | hollow_iterator_base::iterator, iev> default_constructed; 46 | default_constructed = make_hollow(&v, &p, v.begin()); 47 | return 0; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/map_tester.cc: -------------------------------------------------------------------------------- 1 | #include "map_tester.h" 2 | 3 | namespace cxxxmph { 4 | } 5 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/map_tester_test.cc: -------------------------------------------------------------------------------- 1 | #include "map_tester.h" 2 | #include "test.h" 3 | 4 | using namespace cxxmph; 5 | 6 | typedef MapTester Tester; 7 | 8 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert); 9 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert); 10 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search); 11 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search); 12 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search); 13 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search); 14 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero); 15 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size); 16 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value); 17 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator); 18 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/mph_bits.cc: -------------------------------------------------------------------------------- 1 | #include "mph_bits.h" 2 | 3 | namespace cxxmph { 4 | 5 | const uint8_t dynamic_2bitset::vmask[] = { 0xfc, 0xf3, 0xcf, 0x3f}; 6 | dynamic_2bitset::dynamic_2bitset() : size_(0), fill_(false) {} 7 | dynamic_2bitset::dynamic_2bitset(uint32_t size, bool fill) 8 | : size_(size), fill_(fill), data_(ceil(size / 4.0), ones()*fill) {} 9 | dynamic_2bitset::~dynamic_2bitset() {} 10 | 11 | } 12 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/mph_bits.h: -------------------------------------------------------------------------------- 1 | #ifndef __CXXMPH_MPH_BITS_H__ 2 | #define __CXXMPH_MPH_BITS_H__ 3 | 4 | #include // for uint32_t and friends 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | namespace cxxmph { 17 | 18 | class dynamic_2bitset { 19 | public: 20 | dynamic_2bitset(); 21 | ~dynamic_2bitset(); 22 | dynamic_2bitset(uint32_t size, bool fill = false); 23 | 24 | const uint8_t operator[](uint32_t i) const { return get(i); } 25 | const uint8_t get(uint32_t i) const { 26 | assert(i < size()); 27 | assert((i >> 2) < data_.size()); 28 | return (data_[(i >> 2)] >> (((i & 3) << 1)) & 3); 29 | } 30 | void set(uint32_t i, uint8_t v) { 31 | assert((i >> 2) < data_.size()); 32 | data_[(i >> 2)] |= ones() ^ dynamic_2bitset::vmask[i & 3]; 33 | data_[(i >> 2)] &= ((v << ((i & 3) << 1)) | dynamic_2bitset::vmask[i & 3]); 34 | assert(v <= 3); 35 | assert(get(i) == v); 36 | } 37 | void resize(uint32_t size) { 38 | size_ = size; 39 | data_.resize(size >> 2, fill_*ones()); 40 | } 41 | void swap(dynamic_2bitset& other) { 42 | std::swap(other.size_, size_); 43 | std::swap(other.fill_, fill_); 44 | other.data_.swap(data_); 45 | } 46 | void clear() { data_.clear(); size_ = 0; } 47 | 48 | uint32_t size() const { return size_; } 49 | static const uint8_t vmask[]; 50 | const std::vector& data() const { return data_; } 51 | private: 52 | uint32_t size_; 53 | bool fill_; 54 | std::vector data_; 55 | const uint8_t ones() { return std::numeric_limits::max(); } 56 | }; 57 | 58 | static uint32_t nextpoweroftwo(uint32_t k) { 59 | if (k == 0) return 1; 60 | k--; 61 | for (uint32_t i=1; i> i; 62 | return k+1; 63 | } 64 | // Interesting bit tricks that might end up here: 65 | // http://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord 66 | // Fast a % (k*2^t) 67 | // http://www.azillionmonkeys.com/qed/adiv.html 68 | // rank and select: 69 | // http://vigna.dsi.unimi.it/ftp/papers/Broadword.pdf 70 | 71 | } // namespace cxxmph 72 | 73 | #endif 74 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/mph_bits_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "mph_bits.h" 5 | 6 | using cxxmph::dynamic_2bitset; 7 | using cxxmph::nextpoweroftwo; 8 | 9 | int main(int argc, char** argv) { 10 | dynamic_2bitset small(256, true); 11 | for (uint32_t i = 0; i < small.size(); ++i) small.set(i, i % 4); 12 | for (uint32_t i = 0; i < small.size(); ++i) { 13 | if (small[i] != i % 4) { 14 | fprintf(stderr, "wrong bits %d at %d expected %d\n", small[i], i, i % 4); 15 | exit(-1); 16 | } 17 | } 18 | 19 | uint32_t size = 256; 20 | dynamic_2bitset bits(size, true /* fill with ones */); 21 | for (uint32_t i = 0; i < size; ++i) { 22 | if (bits[i] != 3) { 23 | fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 3); 24 | exit(-1); 25 | } 26 | } 27 | for (uint32_t i = 0; i < size; ++i) bits.set(i, 0); 28 | for (uint32_t i = 0; i < size; ++i) { 29 | if (bits[i] != 0) { 30 | fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, 0); 31 | exit(-1); 32 | } 33 | } 34 | for (uint32_t i = 0; i < size; ++i) bits.set(i, i % 4); 35 | for (uint32_t i = 0; i < size; ++i) { 36 | if (bits[i] != i % 4) { 37 | fprintf(stderr, "wrong bits %d at %d expected %d\n", bits[i], i, i % 4); 38 | exit(-1); 39 | } 40 | } 41 | dynamic_2bitset size_corner1(1); 42 | if (size_corner1.size() != 1) exit(-1); 43 | dynamic_2bitset size_corner2(2); 44 | if (size_corner2.size() != 2) exit(-1); 45 | (dynamic_2bitset(4, true)).swap(size_corner2); 46 | if (size_corner2.size() != 4) exit(-1); 47 | for (uint32_t i = 0; i < size_corner2.size(); ++i) { 48 | if (size_corner2[i] != 3) exit(-1); 49 | } 50 | size_corner2.clear(); 51 | if (size_corner2.size() != 0) exit(-1); 52 | 53 | dynamic_2bitset empty; 54 | empty.clear(); 55 | dynamic_2bitset large(1000, true); 56 | empty.swap(large); 57 | 58 | if (nextpoweroftwo(3) != 4) exit(-1); 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/mph_index_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mph_index.h" 7 | 8 | using std::string; 9 | using std::vector; 10 | using namespace cxxmph; 11 | 12 | int main(int argc, char** argv) { 13 | 14 | srand(1); 15 | vector keys; 16 | keys.push_back("davi"); 17 | keys.push_back("paulo"); 18 | keys.push_back("joao"); 19 | keys.push_back("maria"); 20 | keys.push_back("bruno"); 21 | keys.push_back("paula"); 22 | keys.push_back("diego"); 23 | keys.push_back("diogo"); 24 | keys.push_back("algume"); 25 | 26 | SimpleMPHIndex mph_index; 27 | if (!mph_index.Reset(keys.begin(), keys.end(), keys.size())) { exit(-1); } 28 | vector ids; 29 | for (vector::size_type i = 0; i < keys.size(); ++i) { 30 | ids.push_back(mph_index.index(keys[i])); 31 | cerr << " " << *(ids.end() - 1); 32 | } 33 | cerr << endl; 34 | sort(ids.begin(), ids.end()); 35 | for (vector::size_type i = 0; i < ids.size(); ++i) assert(ids[i] == static_cast::value_type>(i)); 36 | 37 | FlexibleMPHIndex>::hash_function> square_empty; 38 | auto id = square_empty.index(1); 39 | FlexibleMPHIndex>::hash_function> unordered_empty; 40 | id ^= unordered_empty.index(1); 41 | FlexibleMPHIndex>::hash_function> minimal_empty; 42 | id ^= minimal_empty.index(1); 43 | } 44 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/mph_map_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "mph_map.h" 7 | #include "map_tester.h" 8 | #include "test.h" 9 | 10 | using namespace cxxmph; 11 | 12 | typedef MapTester Tester; 13 | 14 | /* 15 | CXXMPH_CXX_TEST_CASE(small_insert, Tester::small_insert); 16 | CXXMPH_CXX_TEST_CASE(large_insert, Tester::large_insert); 17 | CXXMPH_CXX_TEST_CASE(small_search, Tester::small_search); 18 | CXXMPH_CXX_TEST_CASE(default_search, Tester::default_search); 19 | CXXMPH_CXX_TEST_CASE(large_search, Tester::large_search); 20 | CXXMPH_CXX_TEST_CASE(string_search, Tester::string_search); 21 | CXXMPH_CXX_TEST_CASE(rehash_zero, Tester::rehash_zero); 22 | CXXMPH_CXX_TEST_CASE(rehash_size, Tester::rehash_size); 23 | CXXMPH_CXX_TEST_CASE(erase_value, Tester::erase_value); 24 | */ 25 | CXXMPH_CXX_TEST_CASE(erase_iterator, Tester::erase_iterator); 26 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/seeded_hash_test.cc: -------------------------------------------------------------------------------- 1 | #include "seeded_hash.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | using std::cerr; 8 | using std::endl; 9 | using std::string; 10 | using std::unordered_map; 11 | using namespace cxxmph; 12 | 13 | int main(int argc, char** argv) { 14 | auto hasher = seeded_hash_function(); 15 | string key1("0"); 16 | string key2("1"); 17 | auto h1 = hasher.hash128(key1, 1); 18 | auto h2 = hasher.hash128(key2, 1); 19 | if (h1 == h2) { 20 | fprintf(stderr, "unexpected murmur collision\n"); 21 | exit(-1); 22 | } 23 | 24 | unordered_map g; 25 | for (int i = 0; i < 1000; ++i) g[i] = i; 26 | for (int i = 0; i < 1000; ++i) if (g[i] != i) exit(-1); 27 | 28 | auto inthasher = seeded_hash_function>(); 29 | unordered_map g2; 30 | for (uint64_t i = 0; i < 1000; ++i) { 31 | auto h = inthasher.hash128(i, 0); 32 | if (g2.find(h) != g2.end()) { 33 | std::cerr << "Incorrectly found " << i << std::endl; 34 | exit(-1); 35 | } 36 | if (h128::hash32()(h) != h[3]) { 37 | cerr << "Buggy hash method." << endl; 38 | exit(-1); 39 | } 40 | auto h2 = inthasher.hash128(i, 0); 41 | if (!(h == h2)) { 42 | cerr << "h 64(0) " << h.get64(0) << " h 64(1) " << h.get64(1) << endl; 43 | cerr << " h2 64(0) " << h2.get64(0) << " h2 64(1) " << h2.get64(1) << endl; 44 | cerr << "Broken equality for h128" << endl; 45 | exit(-1); 46 | } 47 | if (h128::hash32()(h) != h128::hash32()(h2)) { 48 | cerr << "Inconsistent hash method." << endl; 49 | exit(-1); 50 | } 51 | g2[h] = i; 52 | if (g2.find(h) == g2.end()) { 53 | std::cerr << "Incorrectly missed " << i << std::endl; 54 | exit(-1); 55 | } 56 | } 57 | 58 | for (uint64_t i = 0; i < 1000; ++i) if (g2[inthasher.hash128(i, 0)] != i) exit(-1); 59 | } 60 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/string_util.cc: -------------------------------------------------------------------------------- 1 | #include "string_util.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | namespace cxxmph { 11 | 12 | bool stream_printf( 13 | const std::string& format_string, uint32_t offset, std::ostream* out) { 14 | if (offset == format_string.length()) return true; 15 | assert(offset < format_string.length()); 16 | cerr << "length:" << format_string.length() << endl; 17 | cerr << "offset:" << offset << endl; 18 | auto txt = format_string.substr(offset, format_string.length() - offset); 19 | *out << txt; 20 | return true; 21 | } 22 | 23 | } // namespace cxxmph 24 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/string_util_test.cc: -------------------------------------------------------------------------------- 1 | #include "string_util.h" 2 | #include "test.h" 3 | 4 | using namespace cxxmph; 5 | 6 | bool test_format() { 7 | string expected = " %% 4 foo 0x0A bar "; 8 | string foo = "foo"; 9 | string fmt = format(" %%%% %v %v 0x%.2X bar ", 4, foo, 10); 10 | fail_unless(fmt == expected, "expected\n-%s-\n got \n-%s-", expected.c_str(), fmt.c_str()); 11 | return true; 12 | } 13 | 14 | bool test_infoln() { 15 | infoln(string("%s:%d: MY INFO LINE"), __FILE__, __LINE__); 16 | return true; 17 | } 18 | 19 | 20 | bool test_macro() { 21 | CXXMPH_DEBUGLN("here i am")(); 22 | return true; 23 | } 24 | 25 | CXXMPH_TEST_CASE(test_format) 26 | CXXMPH_TEST_CASE(test_infoln) 27 | CXXMPH_TEST_CASE(test_macro) 28 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/test.cc: -------------------------------------------------------------------------------- 1 | #include // For EXIT_SUCCESS, EXIT_FAILURE 2 | 3 | #include "test.h" 4 | 5 | Suite* global_suite = suite_create("cxxmph_test_suite"); 6 | TCase* global_tc_core = tcase_create("Core"); 7 | 8 | int main (void) { 9 | suite_add_tcase(global_suite, global_tc_core); 10 | int number_failed; 11 | SRunner *sr = srunner_create (global_suite); 12 | srunner_run_all (sr, CK_NORMAL); 13 | number_failed = srunner_ntests_failed (sr); 14 | srunner_free (sr); 15 | return (number_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; 16 | } 17 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/test.h: -------------------------------------------------------------------------------- 1 | #ifndef __CXXMPH_TEST_H__ 2 | #define __CXXMPH_TEST_H__ 3 | 4 | // Thin wrapper on top of check.h to get rid of boilerplate in tests. Assumes a 5 | // single test suite and test case per file, with each fixture represented by a 6 | // parameter-less boolean function. 7 | // 8 | // The check.h header macro-clashes with c++ libraries so this file needs to be 9 | // included last. 10 | 11 | #include 12 | 13 | extern Suite* global_suite; 14 | extern TCase* global_tc_core; 15 | 16 | // Creates a new test case calling boolean_function. Name must be a valid, 17 | // unique c identifier when prefixed with tc_. 18 | #define CXXMPH_CXX_TEST_CASE(name, boolean_function) \ 19 | START_TEST(tc_ ## name) \ 20 | { fail_unless(boolean_function()); } END_TEST \ 21 | static TestCase global_cxxmph_tc_ ## name(tc_ ## name); 22 | 23 | #define CXXMPH_TEST_CASE(name) CXXMPH_CXX_TEST_CASE(name, name) 24 | 25 | struct TestCase { 26 | TestCase(void (*f)(int)) { 27 | tcase_add_test(global_tc_core, f); 28 | } 29 | }; 30 | 31 | #endif // __CXXMPH_TEST_H__ 32 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/test_test.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | bool tautology() { return true; } 4 | CXXMPH_TEST_CASE(tautology) 5 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/trigraph.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "trigraph.h" 6 | 7 | using std::cerr; 8 | using std::endl; 9 | using std::vector; 10 | 11 | namespace { 12 | static const uint32_t kInvalidEdge = std::numeric_limits::max(); 13 | } 14 | 15 | namespace cxxmph { 16 | 17 | TriGraph::TriGraph(uint32_t nvertices, uint32_t nedges) 18 | : nedges_(0), 19 | edges_(nedges), 20 | next_edge_(nedges), 21 | first_edge_(nvertices, kInvalidEdge), 22 | vertex_degree_(nvertices, 0) { } 23 | TriGraph::~TriGraph() {} 24 | 25 | void TriGraph::ExtractEdgesAndClear(vector* edges) { 26 | vector().swap(next_edge_); 27 | vector().swap(first_edge_); 28 | vector().swap(vertex_degree_); 29 | nedges_ = 0; 30 | edges->swap(edges_); 31 | } 32 | void TriGraph::AddEdge(const Edge& edge) { 33 | edges_[nedges_] = edge; 34 | assert(first_edge_.size() > edge[0]); 35 | assert(first_edge_.size() > edge[1]); 36 | assert(first_edge_.size() > edge[0]); 37 | assert(first_edge_.size() > edge[1]); 38 | assert(first_edge_.size() > edge[2]); 39 | assert(next_edge_.size() > nedges_); 40 | next_edge_[nedges_] = Edge( 41 | first_edge_[edge[0]], first_edge_[edge[1]], first_edge_[edge[2]]); 42 | first_edge_[edge[0]] = first_edge_[edge[1]] = first_edge_[edge[2]] = nedges_; 43 | ++vertex_degree_[edge[0]]; 44 | ++vertex_degree_[edge[1]]; 45 | ++vertex_degree_[edge[2]]; 46 | ++nedges_; 47 | } 48 | 49 | void TriGraph::RemoveEdge(uint32_t current_edge) { 50 | // cerr << "Removing edge " << current_edge << " from " << nedges_ << " existing edges " << endl; 51 | for (int i = 0; i < 3; ++i) { 52 | uint32_t vertex = edges_[current_edge][i]; 53 | uint32_t edge1 = first_edge_[vertex]; 54 | uint32_t edge2 = kInvalidEdge; 55 | uint32_t j = 0; 56 | while (edge1 != current_edge && edge1 != kInvalidEdge) { 57 | edge2 = edge1; 58 | if (edges_[edge1][0] == vertex) j = 0; 59 | else if (edges_[edge1][1] == vertex) j = 1; 60 | else j = 2; 61 | edge1 = next_edge_[edge1][j]; 62 | } 63 | assert(edge1 != kInvalidEdge); 64 | if (edge2 != kInvalidEdge) next_edge_[edge2][j] = next_edge_[edge1][i]; 65 | else first_edge_[vertex] = next_edge_[edge1][i]; 66 | --vertex_degree_[vertex]; 67 | } 68 | } 69 | 70 | void TriGraph::DebugGraph() const { 71 | uint32_t i; 72 | for(i = 0; i < edges_.size(); i++){ 73 | cerr << i << " " << edges_[i][0] << " " << edges_[i][1] << " " << edges_[i][2] 74 | << " nexts " << next_edge_[i][0] << " " << next_edge_[i][1] << " " << next_edge_[i][2] << endl; 75 | } 76 | for(i = 0; i < first_edge_.size();i++){ 77 | cerr << "first for vertice " < // for uint32_t and friends 10 | 11 | #include 12 | 13 | namespace cxxmph { 14 | 15 | class TriGraph { 16 | public: 17 | struct Edge { 18 | Edge() { } 19 | Edge(uint32_t v0, uint32_t v1, uint32_t v2) { 20 | vertices[0] = v0; 21 | vertices[1] = v1; 22 | vertices[2] = v2; 23 | } 24 | uint32_t& operator[](uint8_t v) { return vertices[v]; } 25 | const uint32_t& operator[](uint8_t v) const { return vertices[v]; } 26 | uint32_t vertices[3]; 27 | }; 28 | TriGraph(uint32_t nedges, uint32_t nvertices); 29 | ~TriGraph(); 30 | void AddEdge(const Edge& edge); 31 | void RemoveEdge(uint32_t edge_id); 32 | void ExtractEdgesAndClear(std::vector* edges); 33 | void DebugGraph() const; 34 | 35 | const std::vector& edges() const { return edges_; } 36 | const std::vector& vertex_degree() const { return vertex_degree_; } 37 | const std::vector& first_edge() const { return first_edge_; } 38 | 39 | private: 40 | uint32_t nedges_; // total number of edges 41 | std::vector edges_; 42 | std::vector next_edge_; // for implementing removal 43 | std::vector first_edge_; // the first edge for this vertex 44 | std::vector vertex_degree_; // number of edges for this vertex 45 | }; 46 | 47 | } // namespace cxxmph 48 | 49 | #endif // __CXXMPH_TRIGRAPH_H__ 50 | -------------------------------------------------------------------------------- /cmph-2.0/cxxmph/trigraph_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "trigraph.h" 4 | 5 | using cxxmph::TriGraph; 6 | 7 | int main(int argc, char** argv) { 8 | TriGraph g(4, 2); 9 | g.AddEdge(TriGraph::Edge(0, 1, 2)); 10 | g.AddEdge(TriGraph::Edge(1, 3, 2)); 11 | assert(g.vertex_degree()[0] == 1); 12 | assert(g.vertex_degree()[1] == 2); 13 | assert(g.vertex_degree()[2] == 2); 14 | assert(g.vertex_degree()[3] == 1); 15 | g.RemoveEdge(0); 16 | assert(g.vertex_degree()[0] == 0); 17 | assert(g.vertex_degree()[1] == 1); 18 | assert(g.vertex_degree()[2] == 1); 19 | assert(g.vertex_degree()[3] == 1); 20 | std::vector edges; 21 | g.ExtractEdgesAndClear(&edges); 22 | } 23 | -------------------------------------------------------------------------------- /cmph-2.0/examples/Makefile.am: -------------------------------------------------------------------------------- 1 | noinst_PROGRAMS = vector_adapter_ex1 file_adapter_ex2 struct_vector_adapter_ex3 2 | 3 | INCLUDES = -I../src/ 4 | 5 | vector_adapter_ex1_LDADD = ../src/libcmph.la 6 | vector_adapter_ex1_SOURCES = vector_adapter_ex1.c 7 | 8 | file_adapter_ex2_LDADD = ../src/libcmph.la 9 | file_adapter_ex2_SOURCES = file_adapter_ex2.c 10 | 11 | struct_vector_adapter_ex3_LDADD = ../src/libcmph.la 12 | struct_vector_adapter_ex3_SOURCES = struct_vector_adapter_ex3.c 13 | -------------------------------------------------------------------------------- /cmph-2.0/examples/file_adapter_ex2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | // Create minimal perfect hash function from in-disk keys using BDZ algorithm 5 | int main(int argc, char **argv) 6 | { 7 | //Open file with newline separated list of keys 8 | FILE * keys_fd = fopen("keys.txt", "r"); 9 | cmph_t *hash = NULL; 10 | if (keys_fd == NULL) 11 | { 12 | fprintf(stderr, "File \"keys.txt\" not found\n"); 13 | exit(1); 14 | } 15 | // Source of keys 16 | cmph_io_adapter_t *source = cmph_io_nlfile_adapter(keys_fd); 17 | 18 | cmph_config_t *config = cmph_config_new(source); 19 | cmph_config_set_algo(config, CMPH_BDZ); 20 | hash = cmph_new(config); 21 | cmph_config_destroy(config); 22 | 23 | //Find key 24 | const char *key = "jjjjjjjjjj"; 25 | unsigned int id = cmph_search(hash, key, (cmph_uint32)strlen(key)); 26 | fprintf(stderr, "Id:%u\n", id); 27 | //Destroy hash 28 | cmph_destroy(hash); 29 | cmph_io_nlfile_adapter_destroy(source); 30 | fclose(keys_fd); 31 | return 0; 32 | } 33 | -------------------------------------------------------------------------------- /cmph-2.0/examples/struct_vector_adapter_ex3.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // Create minimal perfect hash function from in-memory vector 4 | 5 | #pragma pack(1) 6 | typedef struct { 7 | cmph_uint32 id; 8 | char key[11]; 9 | cmph_uint32 year; 10 | } rec_t; 11 | #pragma pack(0) 12 | 13 | int main(int argc, char **argv) 14 | { 15 | // Creating a filled vector 16 | unsigned int i = 0; 17 | rec_t vector[10] = {{1, "aaaaaaaaaa", 1999}, {2, "bbbbbbbbbb", 2000}, {3, "cccccccccc", 2001}, 18 | {4, "dddddddddd", 2002}, {5, "eeeeeeeeee", 2003}, {6, "ffffffffff", 2004}, 19 | {7, "gggggggggg", 2005}, {8, "hhhhhhhhhh", 2006}, {9, "iiiiiiiiii", 2007}, 20 | {10,"jjjjjjjjjj", 2008}}; 21 | unsigned int nkeys = 10; 22 | FILE* mphf_fd = fopen("temp_struct_vector.mph", "w"); 23 | // Source of keys 24 | cmph_io_adapter_t *source = cmph_io_struct_vector_adapter(vector, (cmph_uint32)sizeof(rec_t), (cmph_uint32)sizeof(cmph_uint32), 11, nkeys); 25 | 26 | //Create minimal perfect hash function using the BDZ algorithm. 27 | cmph_config_t *config = cmph_config_new(source); 28 | cmph_config_set_algo(config, CMPH_BDZ); 29 | cmph_config_set_mphf_fd(config, mphf_fd); 30 | cmph_t *hash = cmph_new(config); 31 | cmph_config_destroy(config); 32 | cmph_dump(hash, mphf_fd); 33 | cmph_destroy(hash); 34 | fclose(mphf_fd); 35 | 36 | //Find key 37 | mphf_fd = fopen("temp_struct_vector.mph", "r"); 38 | hash = cmph_load(mphf_fd); 39 | while (i < nkeys) { 40 | const char *key = vector[i].key; 41 | unsigned int id = cmph_search(hash, key, 11); 42 | fprintf(stderr, "key:%s -- hash:%u\n", key, id); 43 | i++; 44 | } 45 | 46 | //Destroy hash 47 | cmph_destroy(hash); 48 | cmph_io_vector_adapter_destroy(source); 49 | fclose(mphf_fd); 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /cmph-2.0/examples/vector_adapter_ex1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // Create minimal perfect hash function from in-memory vector 4 | int main(int argc, char **argv) 5 | { 6 | 7 | // Creating a filled vector 8 | unsigned int i = 0; 9 | const char *vector[] = {"aaaaaaaaaa", "bbbbbbbbbb", "cccccccccc", "dddddddddd", "eeeeeeeeee", 10 | "ffffffffff", "gggggggggg", "hhhhhhhhhh", "iiiiiiiiii", "jjjjjjjjjj"}; 11 | unsigned int nkeys = 10; 12 | FILE* mphf_fd = fopen("temp.mph", "w"); 13 | // Source of keys 14 | cmph_io_adapter_t *source = cmph_io_vector_adapter((char **)vector, nkeys); 15 | 16 | //Create minimal perfect hash function using the brz algorithm. 17 | cmph_config_t *config = cmph_config_new(source); 18 | cmph_config_set_algo(config, CMPH_BRZ); 19 | cmph_config_set_mphf_fd(config, mphf_fd); 20 | cmph_t *hash = cmph_new(config); 21 | cmph_config_destroy(config); 22 | cmph_dump(hash, mphf_fd); 23 | cmph_destroy(hash); 24 | fclose(mphf_fd); 25 | 26 | //Find key 27 | mphf_fd = fopen("temp.mph", "r"); 28 | hash = cmph_load(mphf_fd); 29 | while (i < nkeys) { 30 | const char *key = vector[i]; 31 | unsigned int id = cmph_search(hash, key, (cmph_uint32)strlen(key)); 32 | fprintf(stderr, "key:%s -- hash:%u\n", key, id); 33 | i++; 34 | } 35 | 36 | //Destroy hash 37 | cmph_destroy(hash); 38 | cmph_io_vector_adapter_destroy(source); 39 | fclose(mphf_fd); 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /cmph-2.0/m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 4179 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.6]) 16 | m4_define([LT_PACKAGE_REVISION], [2.4.6]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.6' 20 | macro_revision='2.4.6' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /cmph-2.0/man/Makefile.am: -------------------------------------------------------------------------------- 1 | man_MANS = cmph.1 2 | EXTRA_DIST=cmph.1 3 | -------------------------------------------------------------------------------- /cmph-2.0/man/cmph.1: -------------------------------------------------------------------------------- 1 | .TH CMPH "1" "October 2007" "cmph 0.6" "User Commands" 2 | .SH NAME 3 | cmph \- minimum perfect hashing tool 4 | .SH SYNOPSIS 5 | .B cmph 6 | [-v] [-h] [-V] [-k nkeys] [-f hash_function] [-g [-c value][-s seed] ] [-a algorithm] [-M memory_in_MB] [-b BRZ_parameter] [-d tmp_dir] [-m file.mph] keysfile 7 | .SH DESCRIPTION 8 | .PP 9 | Command line tool to generate and query minimal perfect hash functions. 10 | .PP 11 | Please refer to http://cmph.sf.net for full documentation. 12 | .TP 13 | \fB\-h\fR 14 | Print an help message 15 | .TP 16 | \fB\-c\fR 17 | This value determines: the number of vertices in the graph for the algorithms BMZ and CHM; the number of bits per key required in the FCH algorithm 18 | .TP 19 | \fB\-a\fR 20 | Algorithm. Valid values are: bmz, bmz8, chm, brz, fch 21 | .TP 22 | \fB\-f\fR 23 | hash function (may be used multiple times). valid values are: djb2, fnv, jenkins, sdbm 24 | .TP 25 | \fB\-V\fR 26 | Print version number and exit 27 | .TP 28 | \fB\-v\fR 29 | Increase verbosity (may be used multiple times) 30 | .TP 31 | \fB\-k\fR 32 | Number of keys 33 | .TP 34 | \fB\-g\fR 35 | Generation mode 36 | .TP 37 | \fB\-s\fR 38 | Random seed 39 | .TP 40 | \fB\-m\fR 41 | Minimum perfect hash function file 42 | .TP 43 | \fB\-M\fR 44 | Main memory availability (in MB) 45 | .TP 46 | \fB\-d\fR 47 | Temporary directory used in brz algorithm 48 | .TP 49 | \fB\-b\fR 50 | Parameter of BRZ algorithm to make the maximal number of keys in a bucket lower than 256 51 | .TP 52 | \fBkeysfile\fR 53 | Line separated file with keys 54 | .SH EXAMPLE 55 | $ # Using the default algorithm (chm) for constructing a mphf 56 | .br 57 | $ # for keys in file keys_file. Lines in keys_file _must_ be unique. 58 | .br 59 | $ ./cmph -v -g keys_file 60 | .br 61 | $ # Query id of keys in the file keys_query 62 | .br 63 | $ ./cmph -v -m keys_file.mph keys_query 64 | .SH AUTHOR 65 | This manual page was written by Enrico Tassi , 66 | for the Debian project (but may be used by others). 67 | 68 | -------------------------------------------------------------------------------- /cmph-2.0/src/Makefile.am: -------------------------------------------------------------------------------- 1 | bin_PROGRAMS = cmph 2 | noinst_PROGRAMS = bm_numbers 3 | lib_LTLIBRARIES = libcmph.la 4 | include_HEADERS = cmph.h cmph_types.h cmph_time.h chd_ph.h 5 | libcmph_la_SOURCES = hash.h hash.c \ 6 | jenkins_hash.h jenkins_hash.c \ 7 | hash_state.h debug.h \ 8 | vstack.h vstack.c vqueue.h vqueue.c\ 9 | graph.h graph.c bitbool.h \ 10 | cmph.h cmph.c cmph_structs.h cmph_structs.c\ 11 | chm.h chm.c chm_structs.h \ 12 | bmz.h bmz.c bmz_structs.h \ 13 | bmz8.h bmz8.c bmz8_structs.h \ 14 | bdz.h bdz.c bdz_structs.h \ 15 | bdz_ph.h bdz_ph.c bdz_structs_ph.h \ 16 | brz.h brz.c brz_structs.h \ 17 | fch.h fch.c fch_structs.h \ 18 | fch_buckets.h fch_buckets.c \ 19 | chd.h chd.c chd_structs.h \ 20 | chd_ph.h chd_ph.c chd_structs_ph.h \ 21 | miller_rabin.h miller_rabin.c \ 22 | buffer_manager.h buffer_manager.c \ 23 | buffer_entry.h buffer_entry.c\ 24 | select.h select.c select_lookup_tables.h \ 25 | compressed_seq.h compressed_seq.c \ 26 | compressed_rank.h compressed_rank.c \ 27 | linear_string_map.h linear_string_map.c \ 28 | cmph_benchmark.h cmph_benchmark.c \ 29 | cmph_time.h 30 | 31 | libcmph_la_LDFLAGS = -version-info 0:0:0 32 | 33 | cmph_SOURCES = main.c wingetopt.h wingetopt.c 34 | cmph_LDADD = libcmph.la 35 | 36 | bm_numbers_SOURCES = bm_numbers.c 37 | bm_numbers_LDADD = libcmph.la 38 | -------------------------------------------------------------------------------- /cmph-2.0/src/bdz.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BDZ_H__ 2 | #define __CMPH_BDZ_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __bdz_data_t bdz_data_t; 7 | typedef struct __bdz_config_data_t bdz_config_data_t; 8 | 9 | bdz_config_data_t *bdz_config_new(void); 10 | void bdz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void bdz_config_destroy(cmph_config_t *mph); 12 | void bdz_config_set_b(cmph_config_t *mph, cmph_uint32 b); 13 | cmph_t *bdz_new(cmph_config_t *mph, double c); 14 | 15 | void bdz_load(FILE *f, cmph_t *mphf); 16 | int bdz_dump(cmph_t *mphf, FILE *f); 17 | void bdz_destroy(cmph_t *mphf); 18 | cmph_uint32 bdz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 19 | 20 | /** \fn void bdz_pack(cmph_t *mphf, void *packed_mphf); 21 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 22 | * \param mphf pointer to the resulting mphf 23 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 24 | */ 25 | void bdz_pack(cmph_t *mphf, void *packed_mphf); 26 | 27 | /** \fn cmph_uint32 bdz_packed_size(cmph_t *mphf); 28 | * \brief Return the amount of space needed to pack mphf. 29 | * \param mphf pointer to a mphf 30 | * \return the size of the packed function or zero for failures 31 | */ 32 | cmph_uint32 bdz_packed_size(cmph_t *mphf); 33 | 34 | /** cmph_uint32 bdz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 35 | * \brief Use the packed mphf to do a search. 36 | * \param packed_mphf pointer to the packed mphf 37 | * \param key key to be hashed 38 | * \param keylen key legth in bytes 39 | * \return The mphf value 40 | */ 41 | cmph_uint32 bdz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /cmph-2.0/src/bdz_ph.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BDZ_PH_H__ 2 | #define __CMPH_BDZ_PH_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __bdz_ph_data_t bdz_ph_data_t; 7 | typedef struct __bdz_ph_config_data_t bdz_ph_config_data_t; 8 | 9 | bdz_ph_config_data_t *bdz_ph_config_new(void); 10 | void bdz_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void bdz_ph_config_destroy(cmph_config_t *mph); 12 | cmph_t *bdz_ph_new(cmph_config_t *mph, double c); 13 | 14 | void bdz_ph_load(FILE *f, cmph_t *mphf); 15 | int bdz_ph_dump(cmph_t *mphf, FILE *f); 16 | void bdz_ph_destroy(cmph_t *mphf); 17 | cmph_uint32 bdz_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 18 | 19 | /** \fn void bdz_ph_pack(cmph_t *mphf, void *packed_mphf); 20 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 21 | * \param mphf pointer to the resulting mphf 22 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 23 | */ 24 | void bdz_ph_pack(cmph_t *mphf, void *packed_mphf); 25 | 26 | /** \fn cmph_uint32 bdz_ph_packed_size(cmph_t *mphf); 27 | * \brief Return the amount of space needed to pack mphf. 28 | * \param mphf pointer to a mphf 29 | * \return the size of the packed function or zero for failures 30 | */ 31 | cmph_uint32 bdz_ph_packed_size(cmph_t *mphf); 32 | 33 | /** cmph_uint32 bdz_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 34 | * \brief Use the packed mphf to do a search. 35 | * \param packed_mphf pointer to the packed mphf 36 | * \param key key to be hashed 37 | * \param keylen key legth in bytes 38 | * \return The mphf value 39 | */ 40 | cmph_uint32 bdz_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cmph-2.0/src/bdz_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BDZ_STRUCTS_H__ 2 | #define __CMPH_BDZ_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __bdz_data_t 7 | { 8 | cmph_uint32 m; //edges (words) count 9 | cmph_uint32 n; //vertex count 10 | cmph_uint32 r; //partition vertex count 11 | cmph_uint8 *g; 12 | hash_state_t *hl; // linear hashing 13 | 14 | cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$ 15 | cmph_uint8 b; // number of bits of k 16 | cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$ 17 | cmph_uint32 *ranktable; // rank table 18 | }; 19 | 20 | 21 | struct __bdz_config_data_t 22 | { 23 | cmph_uint32 m; //edges (words) count 24 | cmph_uint32 n; //vertex count 25 | cmph_uint32 r; //partition vertex count 26 | cmph_uint8 *g; 27 | hash_state_t *hl; // linear hashing 28 | 29 | cmph_uint32 k; //kth index in ranktable, $k = log_2(n=3r)/\varepsilon$ 30 | cmph_uint8 b; // number of bits of k 31 | cmph_uint32 ranktablesize; //number of entries in ranktable, $n/k +1$ 32 | cmph_uint32 *ranktable; // rank table 33 | CMPH_HASH hashfunc; 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /cmph-2.0/src/bdz_structs_ph.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BDZ_STRUCTS_PH_H__ 2 | #define __CMPH_BDZ_STRUCTS_PH_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __bdz_ph_data_t 7 | { 8 | cmph_uint32 m; //edges (words) count 9 | cmph_uint32 n; //vertex count 10 | cmph_uint32 r; //partition vertex count 11 | cmph_uint8 *g; 12 | hash_state_t *hl; // linear hashing 13 | }; 14 | 15 | 16 | struct __bdz_ph_config_data_t 17 | { 18 | CMPH_HASH hashfunc; 19 | cmph_uint32 m; //edges (words) count 20 | cmph_uint32 n; //vertex count 21 | cmph_uint32 r; //partition vertex count 22 | cmph_uint8 *g; 23 | hash_state_t *hl; // linear hashing 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /cmph-2.0/src/bmz.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BMZ_H__ 2 | #define __CMPH_BMZ_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __bmz_data_t bmz_data_t; 7 | typedef struct __bmz_config_data_t bmz_config_data_t; 8 | 9 | bmz_config_data_t *bmz_config_new(void); 10 | void bmz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void bmz_config_destroy(cmph_config_t *mph); 12 | cmph_t *bmz_new(cmph_config_t *mph, double c); 13 | 14 | void bmz_load(FILE *f, cmph_t *mphf); 15 | int bmz_dump(cmph_t *mphf, FILE *f); 16 | void bmz_destroy(cmph_t *mphf); 17 | cmph_uint32 bmz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 18 | 19 | /** \fn void bmz_pack(cmph_t *mphf, void *packed_mphf); 20 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 21 | * \param mphf pointer to the resulting mphf 22 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 23 | */ 24 | void bmz_pack(cmph_t *mphf, void *packed_mphf); 25 | 26 | /** \fn cmph_uint32 bmz_packed_size(cmph_t *mphf); 27 | * \brief Return the amount of space needed to pack mphf. 28 | * \param mphf pointer to a mphf 29 | * \return the size of the packed function or zero for failures 30 | */ 31 | cmph_uint32 bmz_packed_size(cmph_t *mphf); 32 | 33 | /** cmph_uint32 bmz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 34 | * \brief Use the packed mphf to do a search. 35 | * \param packed_mphf pointer to the packed mphf 36 | * \param key key to be hashed 37 | * \param keylen key legth in bytes 38 | * \return The mphf value 39 | */ 40 | cmph_uint32 bmz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cmph-2.0/src/bmz8.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BMZ8_H__ 2 | #define __CMPH_BMZ8_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __bmz8_data_t bmz8_data_t; 7 | typedef struct __bmz8_config_data_t bmz8_config_data_t; 8 | 9 | bmz8_config_data_t *bmz8_config_new(void); 10 | void bmz8_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void bmz8_config_destroy(cmph_config_t *mph); 12 | cmph_t *bmz8_new(cmph_config_t *mph, double c); 13 | 14 | void bmz8_load(FILE *f, cmph_t *mphf); 15 | int bmz8_dump(cmph_t *mphf, FILE *f); 16 | void bmz8_destroy(cmph_t *mphf); 17 | cmph_uint8 bmz8_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 18 | 19 | /** \fn void bmz8_pack(cmph_t *mphf, void *packed_mphf); 20 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 21 | * \param mphf pointer to the resulting mphf 22 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 23 | */ 24 | void bmz8_pack(cmph_t *mphf, void *packed_mphf); 25 | 26 | /** \fn cmph_uint32 bmz8_packed_size(cmph_t *mphf); 27 | * \brief Return the amount of space needed to pack mphf. 28 | * \param mphf pointer to a mphf 29 | * \return the size of the packed function or zero for failures 30 | */ 31 | cmph_uint32 bmz8_packed_size(cmph_t *mphf); 32 | 33 | /** cmph_uint8 bmz8_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 34 | * \brief Use the packed mphf to do a search. 35 | * \param packed_mphf pointer to the packed mphf 36 | * \param key key to be hashed 37 | * \param keylen key legth in bytes 38 | * \return The mphf value 39 | */ 40 | cmph_uint8 bmz8_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cmph-2.0/src/bmz8_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BMZ8_STRUCTS_H__ 2 | #define __CMPH_BMZ8_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __bmz8_data_t 7 | { 8 | cmph_uint8 m; //edges (words) count 9 | cmph_uint8 n; //vertex count 10 | cmph_uint8 *g; 11 | hash_state_t **hashes; 12 | }; 13 | 14 | 15 | struct __bmz8_config_data_t 16 | { 17 | CMPH_HASH hashfuncs[2]; 18 | cmph_uint8 m; //edges (words) count 19 | cmph_uint8 n; //vertex count 20 | graph_t *graph; 21 | cmph_uint8 *g; 22 | hash_state_t **hashes; 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /cmph-2.0/src/bmz_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BMZ_STRUCTS_H__ 2 | #define __CMPH_BMZ_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __bmz_data_t 7 | { 8 | cmph_uint32 m; //edges (words) count 9 | cmph_uint32 n; //vertex count 10 | cmph_uint32 *g; 11 | hash_state_t **hashes; 12 | }; 13 | 14 | 15 | struct __bmz_config_data_t 16 | { 17 | CMPH_HASH hashfuncs[2]; 18 | cmph_uint32 m; //edges (words) count 19 | cmph_uint32 n; //vertex count 20 | graph_t *graph; 21 | cmph_uint32 *g; 22 | hash_state_t **hashes; 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /cmph-2.0/src/brz.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BRZ_H__ 2 | #define __CMPH_BRZ_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __brz_data_t brz_data_t; 7 | typedef struct __brz_config_data_t brz_config_data_t; 8 | 9 | brz_config_data_t *brz_config_new(void); 10 | void brz_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void brz_config_set_tmp_dir(cmph_config_t *mph, cmph_uint8 *tmp_dir); 12 | void brz_config_set_mphf_fd(cmph_config_t *mph, FILE *mphf_fd); 13 | void brz_config_set_b(cmph_config_t *mph, cmph_uint32 b); 14 | void brz_config_set_algo(cmph_config_t *mph, CMPH_ALGO algo); 15 | void brz_config_set_memory_availability(cmph_config_t *mph, cmph_uint32 memory_availability); 16 | void brz_config_destroy(cmph_config_t *mph); 17 | cmph_t *brz_new(cmph_config_t *mph, double c); 18 | 19 | void brz_load(FILE *f, cmph_t *mphf); 20 | int brz_dump(cmph_t *mphf, FILE *f); 21 | void brz_destroy(cmph_t *mphf); 22 | cmph_uint32 brz_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 23 | 24 | /** \fn void brz_pack(cmph_t *mphf, void *packed_mphf); 25 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 26 | * \param mphf pointer to the resulting mphf 27 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 28 | */ 29 | void brz_pack(cmph_t *mphf, void *packed_mphf); 30 | 31 | /** \fn cmph_uint32 brz_packed_size(cmph_t *mphf); 32 | * \brief Return the amount of space needed to pack mphf. 33 | * \param mphf pointer to a mphf 34 | * \return the size of the packed function or zero for failures 35 | */ 36 | cmph_uint32 brz_packed_size(cmph_t *mphf); 37 | 38 | /** cmph_uint32 brz_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 39 | * \brief Use the packed mphf to do a search. 40 | * \param packed_mphf pointer to the packed mphf 41 | * \param key key to be hashed 42 | * \param keylen key legth in bytes 43 | * \return The mphf value 44 | */ 45 | cmph_uint32 brz_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /cmph-2.0/src/brz_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BRZ_STRUCTS_H__ 2 | #define __CMPH_BRZ_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __brz_data_t 7 | { 8 | CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8) 9 | cmph_uint32 m; // edges (words) count 10 | double c; // constant c 11 | cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...]. 12 | cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1]. 13 | cmph_uint8 **g; // g function. 14 | cmph_uint32 k; // number of components 15 | hash_state_t **h1; 16 | hash_state_t **h2; 17 | hash_state_t * h0; 18 | }; 19 | 20 | struct __brz_config_data_t 21 | { 22 | CMPH_HASH hashfuncs[3]; 23 | CMPH_ALGO algo; // CMPH algo for generating the MPHFs for the buckets (Just CMPH_FCH and CMPH_BMZ8) 24 | double c; // constant c 25 | cmph_uint32 m; // edges (words) count 26 | cmph_uint8 *size; // size[i] stores the number of edges represented by g[i][...]. 27 | cmph_uint32 *offset; // offset[i] stores the sum: size[0] + size[1] + ... size[i-1]. 28 | cmph_uint8 **g; // g function. 29 | cmph_uint8 b; // parameter b. 30 | cmph_uint32 k; // number of components 31 | hash_state_t **h1; 32 | hash_state_t **h2; 33 | hash_state_t * h0; 34 | cmph_uint32 memory_availability; 35 | cmph_uint8 * tmp_dir; // temporary directory 36 | FILE * mphf_fd; // mphf file 37 | }; 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /cmph-2.0/src/buffer_entry.c: -------------------------------------------------------------------------------- 1 | #include "buffer_entry.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | struct __buffer_entry_t 8 | { 9 | FILE *fd; 10 | cmph_uint8 * buff; 11 | cmph_uint32 capacity, // buffer entry capacity 12 | nbytes, // buffer entry used bytes 13 | pos; // current read position in buffer entry 14 | cmph_uint8 eof; // flag to indicate end of file 15 | }; 16 | 17 | buffer_entry_t * buffer_entry_new(cmph_uint32 capacity) 18 | { 19 | buffer_entry_t *buff_entry = (buffer_entry_t *)malloc(sizeof(buffer_entry_t)); 20 | if (!buff_entry) return NULL; 21 | buff_entry->fd = NULL; 22 | buff_entry->buff = NULL; 23 | buff_entry->capacity = capacity; 24 | buff_entry->nbytes = capacity; 25 | buff_entry->pos = capacity; 26 | buff_entry->eof = 0; 27 | return buff_entry; 28 | } 29 | 30 | void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename) 31 | { 32 | buffer_entry->fd = fopen(filename, "rb"); 33 | } 34 | 35 | void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity) 36 | { 37 | buffer_entry->capacity = capacity; 38 | } 39 | 40 | 41 | cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry) 42 | { 43 | return buffer_entry->capacity; 44 | } 45 | 46 | static void buffer_entry_load(buffer_entry_t * buffer_entry) 47 | { 48 | free(buffer_entry->buff); 49 | buffer_entry->buff = (cmph_uint8 *)calloc((size_t)buffer_entry->capacity, sizeof(cmph_uint8)); 50 | buffer_entry->nbytes = (cmph_uint32)fread(buffer_entry->buff, (size_t)1, (size_t)buffer_entry->capacity, buffer_entry->fd); 51 | if (buffer_entry->nbytes != buffer_entry->capacity) buffer_entry->eof = 1; 52 | buffer_entry->pos = 0; 53 | } 54 | 55 | cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen) 56 | { 57 | cmph_uint8 * buf = NULL; 58 | cmph_uint32 lacked_bytes = sizeof(*keylen); 59 | cmph_uint32 copied_bytes = 0; 60 | if(buffer_entry->eof && (buffer_entry->pos == buffer_entry->nbytes)) // end 61 | { 62 | free(buf); 63 | return NULL; 64 | } 65 | if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) 66 | { 67 | copied_bytes = buffer_entry->nbytes - buffer_entry->pos; 68 | lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; 69 | if (copied_bytes != 0) memcpy(keylen, buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes); 70 | buffer_entry_load(buffer_entry); 71 | } 72 | memcpy(keylen + copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); 73 | buffer_entry->pos += lacked_bytes; 74 | 75 | lacked_bytes = *keylen; 76 | copied_bytes = 0; 77 | buf = (cmph_uint8 *)malloc(*keylen + sizeof(*keylen)); 78 | memcpy(buf, keylen, sizeof(*keylen)); 79 | if((buffer_entry->pos + lacked_bytes) > buffer_entry->nbytes) { 80 | copied_bytes = buffer_entry->nbytes - buffer_entry->pos; 81 | lacked_bytes = (buffer_entry->pos + lacked_bytes) - buffer_entry->nbytes; 82 | if (copied_bytes != 0) { 83 | memcpy(buf + sizeof(*keylen), buffer_entry->buff + buffer_entry->pos, (size_t)copied_bytes); 84 | } 85 | buffer_entry_load(buffer_entry); 86 | } 87 | memcpy(buf+sizeof(*keylen)+copied_bytes, buffer_entry->buff + buffer_entry->pos, (size_t)lacked_bytes); 88 | buffer_entry->pos += lacked_bytes; 89 | return buf; 90 | } 91 | 92 | void buffer_entry_destroy(buffer_entry_t * buffer_entry) 93 | { 94 | fclose(buffer_entry->fd); 95 | buffer_entry->fd = NULL; 96 | free(buffer_entry->buff); 97 | buffer_entry->buff = NULL; 98 | buffer_entry->capacity = 0; 99 | buffer_entry->nbytes = 0; 100 | buffer_entry->pos = 0; 101 | buffer_entry->eof = 0; 102 | free(buffer_entry); 103 | } 104 | -------------------------------------------------------------------------------- /cmph-2.0/src/buffer_entry.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BUFFER_ENTRY_H__ 2 | #define __CMPH_BUFFER_ENTRY_H__ 3 | 4 | #include "cmph_types.h" 5 | #include 6 | typedef struct __buffer_entry_t buffer_entry_t; 7 | 8 | buffer_entry_t * buffer_entry_new(cmph_uint32 capacity); 9 | void buffer_entry_set_capacity(buffer_entry_t * buffer_entry, cmph_uint32 capacity); 10 | cmph_uint32 buffer_entry_get_capacity(buffer_entry_t * buffer_entry); 11 | void buffer_entry_open(buffer_entry_t * buffer_entry, char * filename); 12 | cmph_uint8 * buffer_entry_read_key(buffer_entry_t * buffer_entry, cmph_uint32 * keylen); 13 | void buffer_entry_destroy(buffer_entry_t * buffer_entry); 14 | #endif 15 | -------------------------------------------------------------------------------- /cmph-2.0/src/buffer_manager.c: -------------------------------------------------------------------------------- 1 | #include "buffer_manager.h" 2 | #include "buffer_entry.h" 3 | #include 4 | #include 5 | #include 6 | struct __buffer_manager_t 7 | { 8 | cmph_uint32 memory_avail; // memory available 9 | buffer_entry_t ** buffer_entries; // buffer entries to be managed 10 | cmph_uint32 nentries; // number of entries to be managed 11 | cmph_uint32 *memory_avail_list; // memory available list 12 | int pos_avail_list; // current position in memory available list 13 | }; 14 | 15 | buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries) 16 | { 17 | cmph_uint32 memory_avail_entry, i; 18 | buffer_manager_t *buff_manager = (buffer_manager_t *)malloc(sizeof(buffer_manager_t)); 19 | if (!buff_manager) return NULL; 20 | buff_manager->memory_avail = memory_avail; 21 | buff_manager->buffer_entries = (buffer_entry_t **)calloc((size_t)nentries, sizeof(buffer_entry_t *)); 22 | buff_manager->memory_avail_list = (cmph_uint32 *)calloc((size_t)nentries, sizeof(cmph_uint32)); 23 | buff_manager->pos_avail_list = -1; 24 | buff_manager->nentries = nentries; 25 | memory_avail_entry = buff_manager->memory_avail/buff_manager->nentries + 1; 26 | for(i = 0; i < buff_manager->nentries; i++) 27 | { 28 | buff_manager->buffer_entries[i] = buffer_entry_new(memory_avail_entry); 29 | } 30 | return buff_manager; 31 | } 32 | 33 | void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename) 34 | { 35 | buffer_entry_open(buffer_manager->buffer_entries[index], filename); 36 | } 37 | 38 | cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen) 39 | { 40 | cmph_uint8 * key = NULL; 41 | if (buffer_manager->pos_avail_list >= 0 ) // recovering memory 42 | { 43 | cmph_uint32 new_capacity = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]) + buffer_manager->memory_avail_list[(buffer_manager->pos_avail_list)--]; 44 | buffer_entry_set_capacity(buffer_manager->buffer_entries[index], new_capacity); 45 | } 46 | key = buffer_entry_read_key(buffer_manager->buffer_entries[index], keylen); 47 | if (key == NULL) // storing memory to be recovered 48 | { 49 | buffer_manager->memory_avail_list[++(buffer_manager->pos_avail_list)] = buffer_entry_get_capacity(buffer_manager->buffer_entries[index]); 50 | } 51 | return key; 52 | } 53 | 54 | void buffer_manager_destroy(buffer_manager_t * buffer_manager) 55 | { 56 | cmph_uint32 i; 57 | for(i = 0; i < buffer_manager->nentries; i++) 58 | { 59 | buffer_entry_destroy(buffer_manager->buffer_entries[i]); 60 | } 61 | free(buffer_manager->memory_avail_list); 62 | free(buffer_manager->buffer_entries); 63 | free(buffer_manager); 64 | } 65 | -------------------------------------------------------------------------------- /cmph-2.0/src/buffer_manager.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BUFFER_MANAGE_H__ 2 | #define __CMPH_BUFFER_MANAGE_H__ 3 | 4 | #include "cmph_types.h" 5 | #include 6 | typedef struct __buffer_manager_t buffer_manager_t; 7 | 8 | buffer_manager_t * buffer_manager_new(cmph_uint32 memory_avail, cmph_uint32 nentries); 9 | void buffer_manager_open(buffer_manager_t * buffer_manager, cmph_uint32 index, char * filename); 10 | cmph_uint8 * buffer_manager_read_key(buffer_manager_t * buffer_manager, cmph_uint32 index, cmph_uint32 * keylen); 11 | void buffer_manager_destroy(buffer_manager_t * buffer_manager); 12 | #endif 13 | -------------------------------------------------------------------------------- /cmph-2.0/src/chd.h: -------------------------------------------------------------------------------- 1 | #ifndef _CMPH_CHD_H__ 2 | #define _CMPH_CHD_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __chd_data_t chd_data_t; 7 | typedef struct __chd_config_data_t chd_config_data_t; 8 | 9 | /* Config API */ 10 | chd_config_data_t *chd_config_new(cmph_config_t * mph); 11 | void chd_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 12 | 13 | /** \fn void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin); 14 | * \brief Allows to set the number of keys per bin. 15 | * \param mph pointer to the configuration structure 16 | * \param keys_per_bin value for the number of keys per bin 17 | */ 18 | void chd_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin); 19 | 20 | /** \fn void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket); 21 | * \brief Allows to set the number of keys per bucket. 22 | * \param mph pointer to the configuration structure 23 | * \param keys_per_bucket value for the number of keys per bucket 24 | */ 25 | void chd_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket); 26 | void chd_config_destroy(cmph_config_t *mph); 27 | 28 | 29 | /* Chd algorithm API */ 30 | cmph_t *chd_new(cmph_config_t *mph, double c); 31 | void chd_load(FILE *fd, cmph_t *mphf); 32 | int chd_dump(cmph_t *mphf, FILE *fd); 33 | void chd_destroy(cmph_t *mphf); 34 | cmph_uint32 chd_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 35 | 36 | /** \fn void chd_pack(cmph_t *mphf, void *packed_mphf); 37 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 38 | * \param mphf pointer to the resulting mphf 39 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 40 | */ 41 | void chd_pack(cmph_t *mphf, void *packed_mphf); 42 | 43 | /** \fn cmph_uint32 chd_packed_size(cmph_t *mphf); 44 | * \brief Return the amount of space needed to pack mphf. 45 | * \param mphf pointer to a mphf 46 | * \return the size of the packed function or zero for failures 47 | */ 48 | cmph_uint32 chd_packed_size(cmph_t *mphf); 49 | 50 | /** cmph_uint32 chd_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 51 | * \brief Use the packed mphf to do a search. 52 | * \param packed_mphf pointer to the packed mphf 53 | * \param key key to be hashed 54 | * \param keylen key legth in bytes 55 | * \return The mphf value 56 | */ 57 | cmph_uint32 chd_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /cmph-2.0/src/chd_ph.h: -------------------------------------------------------------------------------- 1 | #ifndef _CMPH_CHD_PH_H__ 2 | #define _CMPH_CHD_PH_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __chd_ph_data_t chd_ph_data_t; 7 | typedef struct __chd_ph_config_data_t chd_ph_config_data_t; 8 | 9 | /* Config API */ 10 | chd_ph_config_data_t *chd_ph_config_new(void); 11 | void chd_ph_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 12 | 13 | /** \fn void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin); 14 | * \brief Allows to set the number of keys per bin. 15 | * \param mph pointer to the configuration structure 16 | * \param keys_per_bin value for the number of keys per bin 17 | */ 18 | void chd_ph_config_set_keys_per_bin(cmph_config_t *mph, cmph_uint32 keys_per_bin); 19 | 20 | /** \fn void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket); 21 | * \brief Allows to set the number of keys per bucket. 22 | * \param mph pointer to the configuration structure 23 | * \param keys_per_bucket value for the number of keys per bucket 24 | */ 25 | void chd_ph_config_set_b(cmph_config_t *mph, cmph_uint32 keys_per_bucket); 26 | void chd_ph_config_destroy(cmph_config_t *mph); 27 | 28 | 29 | /* Chd algorithm API */ 30 | cmph_t *chd_ph_new(cmph_config_t *mph, double c); 31 | void chd_ph_load(FILE *fd, cmph_t *mphf); 32 | int chd_ph_dump(cmph_t *mphf, FILE *fd); 33 | void chd_ph_destroy(cmph_t *mphf); 34 | cmph_uint32 chd_ph_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 35 | 36 | /** \fn void chd_ph_pack(cmph_t *mphf, void *packed_mphf); 37 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 38 | * \param mphf pointer to the resulting mphf 39 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 40 | */ 41 | void chd_ph_pack(cmph_t *mphf, void *packed_mphf); 42 | 43 | /** \fn cmph_uint32 chd_ph_packed_size(cmph_t *mphf); 44 | * \brief Return the amount of space needed to pack mphf. 45 | * \param mphf pointer to a mphf 46 | * \return the size of the packed function or zero for failures 47 | */ 48 | cmph_uint32 chd_ph_packed_size(cmph_t *mphf); 49 | 50 | /** cmph_uint32 chd_ph_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 51 | * \brief Use the packed mphf to do a search. 52 | * \param packed_mphf pointer to the packed mphf 53 | * \param key key to be hashed 54 | * \param keylen key legth in bytes 55 | * \return The mphf value 56 | */ 57 | cmph_uint32 chd_ph_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 58 | 59 | #endif 60 | -------------------------------------------------------------------------------- /cmph-2.0/src/chd_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_CHD_STRUCTS_H__ 2 | #define __CMPH_CHD_STRUCTS_H__ 3 | 4 | #include "chd_structs_ph.h" 5 | #include "chd_ph.h" 6 | #include "compressed_rank.h" 7 | 8 | struct __chd_data_t 9 | { 10 | cmph_uint32 packed_cr_size; 11 | cmph_uint8 * packed_cr; // packed compressed rank structure to control the number of zeros in a bit vector 12 | 13 | cmph_uint32 packed_chd_phf_size; 14 | cmph_uint8 * packed_chd_phf; 15 | }; 16 | 17 | struct __chd_config_data_t 18 | { 19 | cmph_config_t *chd_ph; // chd_ph algorithm must be used here 20 | }; 21 | #endif 22 | -------------------------------------------------------------------------------- /cmph-2.0/src/chd_structs_ph.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_CHD_PH_STRUCTS_H__ 2 | #define __CMPH_CHD_PH_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | #include "compressed_seq.h" 6 | 7 | struct __chd_ph_data_t 8 | { 9 | compressed_seq_t * cs; // compressed displacement values 10 | cmph_uint32 nbuckets; // number of buckets 11 | cmph_uint32 n; // number of bins 12 | hash_state_t *hl; // linear hash function 13 | }; 14 | 15 | struct __chd_ph_config_data_t 16 | { 17 | CMPH_HASH hashfunc; // linear hash function to be used 18 | compressed_seq_t * cs; // compressed displacement values 19 | cmph_uint32 nbuckets; // number of buckets 20 | cmph_uint32 n; // number of bins 21 | hash_state_t *hl; // linear hash function 22 | 23 | cmph_uint32 m; // number of keys 24 | cmph_uint8 use_h; // flag to indicate the of use of a heuristic (use_h = 1) 25 | cmph_uint32 keys_per_bin;//maximum number of keys per bin 26 | cmph_uint32 keys_per_bucket; // average number of keys per bucket 27 | cmph_uint8 *occup_table; // table that indicates occupied positions 28 | }; 29 | #endif 30 | -------------------------------------------------------------------------------- /cmph-2.0/src/chm.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_CHM_H__ 2 | #define __CMPH_CHM_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __chm_data_t chm_data_t; 7 | typedef struct __chm_config_data_t chm_config_data_t; 8 | 9 | chm_config_data_t *chm_config_new(void); 10 | void chm_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 11 | void chm_config_destroy(cmph_config_t *mph); 12 | cmph_t *chm_new(cmph_config_t *mph, double c); 13 | 14 | void chm_load(FILE *f, cmph_t *mphf); 15 | int chm_dump(cmph_t *mphf, FILE *f); 16 | void chm_destroy(cmph_t *mphf); 17 | cmph_uint32 chm_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 18 | 19 | /** \fn void chm_pack(cmph_t *mphf, void *packed_mphf); 20 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 21 | * \param mphf pointer to the resulting mphf 22 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 23 | */ 24 | void chm_pack(cmph_t *mphf, void *packed_mphf); 25 | 26 | /** \fn cmph_uint32 chm_packed_size(cmph_t *mphf); 27 | * \brief Return the amount of space needed to pack mphf. 28 | * \param mphf pointer to a mphf 29 | * \return the size of the packed function or zero for failures 30 | */ 31 | cmph_uint32 chm_packed_size(cmph_t *mphf); 32 | 33 | /** cmph_uint32 chm_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 34 | * \brief Use the packed mphf to do a search. 35 | * \param packed_mphf pointer to the packed mphf 36 | * \param key key to be hashed 37 | * \param keylen key legth in bytes 38 | * \return The mphf value 39 | */ 40 | cmph_uint32 chm_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cmph-2.0/src/chm_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_CHM_STRUCTS_H__ 2 | #define __CMPH_CHM_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __chm_data_t 7 | { 8 | cmph_uint32 m; //edges (words) count 9 | cmph_uint32 n; //vertex count 10 | cmph_uint32 *g; 11 | hash_state_t **hashes; 12 | }; 13 | 14 | struct __chm_config_data_t 15 | { 16 | CMPH_HASH hashfuncs[2]; 17 | cmph_uint32 m; //edges (words) count 18 | cmph_uint32 n; //vertex count 19 | graph_t *graph; 20 | cmph_uint32 *g; 21 | hash_state_t **hashes; 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_benchmark.c: -------------------------------------------------------------------------------- 1 | // A simple benchmark tool around getrusage 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "cmph_benchmark.h" 10 | 11 | typedef struct { 12 | const char* name; 13 | void (*func)(int); 14 | int iters; 15 | struct rusage begin; 16 | struct rusage end; 17 | } benchmark_t; 18 | 19 | static benchmark_t* global_benchmarks = NULL; 20 | 21 | /* Subtract the `struct timeval' values X and Y, 22 | storing the result in RESULT. 23 | Return 1 if the difference is negative, otherwise 0. */ 24 | 25 | int timeval_subtract ( 26 | struct timeval *result, struct timeval *x, struct timeval* y) { 27 | /* Perform the carry for the later subtraction by updating y. */ 28 | if (x->tv_usec < y->tv_usec) { 29 | int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; 30 | y->tv_usec -= 1000000 * nsec; 31 | y->tv_sec += nsec; 32 | } 33 | if (x->tv_usec - y->tv_usec > 1000000) { 34 | int nsec = (x->tv_usec - y->tv_usec) / 1000000; 35 | y->tv_usec += 1000000 * nsec; 36 | y->tv_sec -= nsec; 37 | } 38 | 39 | /* Compute the time remaining to wait. 40 | tv_usec is certainly positive. */ 41 | result->tv_sec = x->tv_sec - y->tv_sec; 42 | result->tv_usec = x->tv_usec - y->tv_usec; 43 | 44 | /* Return 1 if result is negative. */ 45 | return x->tv_sec < y->tv_sec; 46 | } 47 | 48 | benchmark_t* find_benchmark(const char* name) { 49 | benchmark_t* benchmark = global_benchmarks; 50 | while (benchmark && benchmark->name != NULL) { 51 | if (strcmp(benchmark->name, name) == 0) break; 52 | ++benchmark; 53 | } 54 | if (!benchmark || !benchmark->name) return NULL; 55 | return benchmark; 56 | } 57 | 58 | int global_benchmarks_length() { 59 | benchmark_t* benchmark = global_benchmarks; 60 | int length = 0; 61 | if (benchmark == NULL) return 0; 62 | while (benchmark->name != NULL) ++length, ++benchmark; 63 | return length; 64 | } 65 | 66 | void bm_register(const char* name, void (*func)(int), int iters) { 67 | benchmark_t benchmark; 68 | int length = global_benchmarks_length(); 69 | benchmark.name = name; 70 | benchmark.func = func; 71 | benchmark.iters = iters; 72 | assert(!find_benchmark(name)); 73 | global_benchmarks = (benchmark_t *)realloc( 74 | global_benchmarks, (length + 2)*sizeof(benchmark_t)); 75 | global_benchmarks[length] = benchmark; 76 | memset(&benchmark, 0, sizeof(benchmark_t)); // pivot 77 | global_benchmarks[length + 1] = benchmark; 78 | } 79 | 80 | void bm_start(const char* name) { 81 | benchmark_t* benchmark; 82 | struct rusage rs; 83 | 84 | benchmark = find_benchmark(name); 85 | assert(benchmark); 86 | int ret = getrusage(RUSAGE_SELF, &rs); 87 | if (ret != 0) { 88 | perror("rusage failed"); 89 | exit(-1); 90 | } 91 | benchmark->begin = rs; 92 | (*benchmark->func)(benchmark->iters); 93 | } 94 | 95 | void bm_end(const char* name) { 96 | benchmark_t* benchmark; 97 | struct rusage rs; 98 | 99 | int ret = getrusage(RUSAGE_SELF, &rs); 100 | if (ret != 0) { 101 | perror("rusage failed"); 102 | exit(-1); 103 | } 104 | 105 | benchmark = find_benchmark(name); 106 | benchmark->end = rs; 107 | 108 | struct timeval utime; 109 | timeval_subtract(&utime, &benchmark->end.ru_utime, &benchmark->begin.ru_utime); 110 | struct timeval stime; 111 | timeval_subtract(&stime, &benchmark->end.ru_stime, &benchmark->begin.ru_stime); 112 | 113 | printf("Benchmark: %s\n", benchmark->name); 114 | printf("User time used : %ld.%06ld\n", 115 | utime.tv_sec, (long int)utime.tv_usec); 116 | printf("System time used: %ld.%06ld\n", 117 | stime.tv_sec, (long int)stime.tv_usec); 118 | printf("\n"); 119 | } 120 | 121 | void run_benchmarks(int argc, char** argv) { 122 | benchmark_t* benchmark = global_benchmarks; 123 | while (benchmark && benchmark->name != NULL) { 124 | bm_start(benchmark->name); 125 | bm_end(benchmark->name); 126 | ++benchmark; 127 | } 128 | } 129 | 130 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_benchmark.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_BENCHMARK_H__ 2 | #define __CMPH_BENCHMARK_H__ 3 | 4 | #include 5 | #include 6 | 7 | #ifdef __cplusplus 8 | extern "C" 9 | { 10 | #endif 11 | 12 | #define BM_REGISTER(func, iters) bm_register(#func, func, iters) 13 | void bm_register(const char* name, void (*func)(int), int iters); 14 | void run_benchmarks(int argc, char** argv); 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #endif // __CMPH_BENCHMARK_H__ 21 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_structs.c: -------------------------------------------------------------------------------- 1 | #include "cmph_structs.h" 2 | 3 | #include 4 | 5 | //#define DEBUG 6 | #include "debug.h" 7 | 8 | cmph_config_t *__config_new(cmph_io_adapter_t *key_source) 9 | { 10 | cmph_config_t *mph = (cmph_config_t *)malloc(sizeof(cmph_config_t)); 11 | if (mph == NULL) return NULL; 12 | memset(mph, 0, sizeof(cmph_config_t)); 13 | mph->key_source = key_source; 14 | mph->verbosity = 0; 15 | mph->data = NULL; 16 | mph->c = 0; 17 | return mph; 18 | } 19 | 20 | void __config_destroy(cmph_config_t *mph) 21 | { 22 | free(mph); 23 | } 24 | 25 | void __cmph_dump(cmph_t *mphf, FILE *fd) 26 | { 27 | register size_t nbytes; 28 | nbytes = fwrite(cmph_names[mphf->algo], (size_t)(strlen(cmph_names[mphf->algo]) + 1), (size_t)1, fd); 29 | nbytes = fwrite(&(mphf->size), sizeof(mphf->size), (size_t)1, fd); 30 | } 31 | cmph_t *__cmph_load(FILE *f) 32 | { 33 | cmph_t *mphf = NULL; 34 | cmph_uint32 i; 35 | char algo_name[BUFSIZ]; 36 | char *ptr = algo_name; 37 | CMPH_ALGO algo = CMPH_COUNT; 38 | register size_t nbytes; 39 | 40 | DEBUGP("Loading mphf\n"); 41 | while(1) 42 | { 43 | size_t c = fread(ptr, (size_t)1, (size_t)1, f); 44 | if (c != 1) return NULL; 45 | if (*ptr == 0) break; 46 | ++ptr; 47 | } 48 | for(i = 0; i < CMPH_COUNT; ++i) 49 | { 50 | if (strcmp(algo_name, cmph_names[i]) == 0) 51 | { 52 | algo = (CMPH_ALGO)(i); 53 | } 54 | } 55 | if (algo == CMPH_COUNT) 56 | { 57 | DEBUGP("Algorithm %s not found\n", algo_name); 58 | return NULL; 59 | } 60 | mphf = (cmph_t *)malloc(sizeof(cmph_t)); 61 | mphf->algo = algo; 62 | nbytes = fread(&(mphf->size), sizeof(mphf->size), (size_t)1, f); 63 | mphf->data = NULL; 64 | DEBUGP("Algorithm is %s and mphf is sized %u\n", cmph_names[algo], mphf->size); 65 | 66 | return mphf; 67 | } 68 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_STRUCTS_H__ 2 | #define __CMPH_STRUCTS_H__ 3 | 4 | #include "cmph.h" 5 | 6 | /** Hash generation algorithm data 7 | */ 8 | struct __config_t 9 | { 10 | CMPH_ALGO algo; 11 | cmph_io_adapter_t *key_source; 12 | cmph_uint32 verbosity; 13 | double c; 14 | void *data; // algorithm dependent data 15 | }; 16 | 17 | /** Hash querying algorithm data 18 | */ 19 | struct __cmph_t 20 | { 21 | CMPH_ALGO algo; 22 | cmph_uint32 size; 23 | cmph_io_adapter_t *key_source; 24 | void *data; // algorithm dependent data 25 | }; 26 | 27 | cmph_config_t *__config_new(cmph_io_adapter_t *key_source); 28 | void __config_destroy(cmph_config_t*); 29 | void __cmph_dump(cmph_t *mphf, FILE *); 30 | cmph_t *__cmph_load(FILE *f); 31 | 32 | 33 | #endif 34 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_time.h: -------------------------------------------------------------------------------- 1 | #ifdef ELAPSED_TIME_IN_SECONDS 2 | #undef ELAPSED_TIME_IN_SECONDS 3 | #endif 4 | 5 | #ifdef ELAPSED_TIME_IN_uSECONDS 6 | #undef ELAPSED_TIME_IN_uSECONDS 7 | #endif 8 | 9 | #ifdef WIN32 10 | // include headers to use gettimeofday 11 | #else 12 | #ifdef __GNUC__ 13 | #include 14 | #include 15 | #endif 16 | #endif 17 | 18 | #ifdef __GNUC__ 19 | #ifndef __CMPH_TIME_H__ 20 | #define __CMPH_TIME_H__ 21 | static inline void elapsed_time_in_seconds(double * elapsed_time) 22 | { 23 | struct timeval e_time; 24 | if (gettimeofday(&e_time, NULL) < 0) { 25 | return; 26 | } 27 | *elapsed_time = (double)e_time.tv_sec + ((double)e_time.tv_usec/1000000.0); 28 | } 29 | static inline void dummy_elapsed_time_in_seconds() 30 | { 31 | } 32 | static inline void elapsed_time_in_useconds(cmph_uint64 * elapsed_time) 33 | { 34 | struct timeval e_time; 35 | if (gettimeofday(&e_time, NULL) < 0) { 36 | return; 37 | } 38 | *elapsed_time = (cmph_uint64)(e_time.tv_sec*1000000 + e_time.tv_usec); 39 | } 40 | static inline void dummy_elapsed_time_in_useconds() 41 | { 42 | } 43 | #endif 44 | #endif 45 | 46 | #ifdef CMPH_TIMING 47 | #ifdef __GNUC__ 48 | #define ELAPSED_TIME_IN_SECONDS elapsed_time_in_seconds 49 | #define ELAPSED_TIME_IN_uSECONDS elapsed_time_in_useconds 50 | #else 51 | #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds 52 | #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds 53 | #endif 54 | #else 55 | #ifdef __GNUC__ 56 | #define ELAPSED_TIME_IN_SECONDS 57 | #define ELAPSED_TIME_IN_uSECONDS 58 | #else 59 | #define ELAPSED_TIME_IN_SECONDS dummy_elapsed_time_in_seconds 60 | #define ELAPSED_TIME_IN_uSECONDS dummy_elapsed_time_in_useconds 61 | #endif 62 | #endif 63 | -------------------------------------------------------------------------------- /cmph-2.0/src/cmph_types.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_TYPES_H__ 2 | #define __CMPH_TYPES_H__ 3 | 4 | typedef char cmph_int8; 5 | typedef unsigned char cmph_uint8; 6 | 7 | typedef short cmph_int16; 8 | typedef unsigned short cmph_uint16; 9 | 10 | typedef int cmph_int32; 11 | typedef unsigned int cmph_uint32; 12 | 13 | #if defined(__ia64) || defined(__x86_64__) 14 | /** \typedef long cmph_int64; 15 | * \brief 64-bit integer for a 64-bit achitecture. 16 | */ 17 | typedef long cmph_int64; 18 | 19 | /** \typedef unsigned long cmph_uint64; 20 | * \brief Unsigned 64-bit integer for a 64-bit achitecture. 21 | */ 22 | typedef unsigned long cmph_uint64; 23 | #else 24 | /** \typedef long long cmph_int64; 25 | * \brief 64-bit integer for a 32-bit achitecture. 26 | */ 27 | typedef long long cmph_int64; 28 | 29 | /** \typedef unsigned long long cmph_uint64; 30 | * \brief Unsigned 64-bit integer for a 32-bit achitecture. 31 | */ 32 | typedef unsigned long long cmph_uint64; 33 | #endif 34 | 35 | typedef enum { CMPH_HASH_JENKINS, CMPH_HASH_COUNT } CMPH_HASH; 36 | extern const char *cmph_hash_names[]; 37 | typedef enum { CMPH_BMZ, CMPH_BMZ8, CMPH_CHM, CMPH_BRZ, CMPH_FCH, 38 | CMPH_BDZ, CMPH_BDZ_PH, 39 | CMPH_CHD_PH, CMPH_CHD, CMPH_COUNT } CMPH_ALGO; 40 | extern const char *cmph_names[]; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /cmph-2.0/src/compressed_rank.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_COMPRESSED_RANK_H__ 2 | #define __CMPH_COMPRESSED_RANK_H__ 3 | 4 | #include "select.h" 5 | 6 | struct _compressed_rank_t 7 | { 8 | cmph_uint32 max_val; 9 | cmph_uint32 n; // number of values stored in vals_rems 10 | // The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure 11 | // the remaining LSBs are stored in a table of n cells, each one of rem_r bits. 12 | cmph_uint32 rem_r; 13 | select_t sel; 14 | cmph_uint32 * vals_rems; 15 | }; 16 | 17 | typedef struct _compressed_rank_t compressed_rank_t; 18 | 19 | void compressed_rank_init(compressed_rank_t * cr); 20 | 21 | void compressed_rank_destroy(compressed_rank_t * cr); 22 | 23 | void compressed_rank_generate(compressed_rank_t * cr, cmph_uint32 * vals_table, cmph_uint32 n); 24 | 25 | cmph_uint32 compressed_rank_query(compressed_rank_t * cr, cmph_uint32 idx); 26 | 27 | cmph_uint32 compressed_rank_get_space_usage(compressed_rank_t * cr); 28 | 29 | void compressed_rank_dump(compressed_rank_t * cr, char **buf, cmph_uint32 *buflen); 30 | 31 | void compressed_rank_load(compressed_rank_t * cr, const char *buf, cmph_uint32 buflen); 32 | 33 | 34 | /** \fn void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed); 35 | * \brief Support the ability to pack a compressed_rank structure into a preallocated contiguous memory space pointed by cr_packed. 36 | * \param cr points to the compressed_rank structure 37 | * \param cr_packed pointer to the contiguous memory area used to store the compressed_rank structure. The size of cr_packed must be at least @see compressed_rank_packed_size 38 | */ 39 | void compressed_rank_pack(compressed_rank_t *cr, void *cr_packed); 40 | 41 | /** \fn cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr); 42 | * \brief Return the amount of space needed to pack a compressed_rank structure. 43 | * \return the size of the packed compressed_rank structure or zero for failures 44 | */ 45 | cmph_uint32 compressed_rank_packed_size(compressed_rank_t *cr); 46 | 47 | 48 | /** \fn cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx); 49 | * \param cr_packed is a pointer to a contiguous memory area 50 | * \param idx is an index to compute the rank 51 | * \return an integer that represents the compressed_rank value. 52 | */ 53 | cmph_uint32 compressed_rank_query_packed(void * cr_packed, cmph_uint32 idx); 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /cmph-2.0/src/compressed_seq.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_COMPRESSED_SEQ_H__ 2 | #define __CMPH_COMPRESSED_SEQ_H__ 3 | 4 | #include"select.h" 5 | 6 | struct _compressed_seq_t 7 | { 8 | cmph_uint32 n; // number of values stored in store_table 9 | // The length in bits of each value is decomposed into two compnents: the lg(n) MSBs are stored in rank_select data structure 10 | // the remaining LSBs are stored in a table of n cells, each one of rem_r bits. 11 | cmph_uint32 rem_r; 12 | cmph_uint32 total_length; // total length in bits of stored_table 13 | select_t sel; 14 | cmph_uint32 * length_rems; 15 | cmph_uint32 * store_table; 16 | }; 17 | 18 | typedef struct _compressed_seq_t compressed_seq_t; 19 | 20 | /** \fn void compressed_seq_init(compressed_seq_t * cs); 21 | * \brief Initialize a compressed sequence structure. 22 | * \param cs points to the compressed sequence structure to be initialized 23 | */ 24 | void compressed_seq_init(compressed_seq_t * cs); 25 | 26 | /** \fn void compressed_seq_destroy(compressed_seq_t * cs); 27 | * \brief Destroy a compressed sequence given as input. 28 | * \param cs points to the compressed sequence structure to be destroyed 29 | */ 30 | void compressed_seq_destroy(compressed_seq_t * cs); 31 | 32 | /** \fn void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n); 33 | * \brief Generate a compressed sequence from an input array with n values. 34 | * \param cs points to the compressed sequence structure 35 | * \param vals_table poiter to the array given as input 36 | * \param n number of values in @see vals_table 37 | */ 38 | void compressed_seq_generate(compressed_seq_t * cs, cmph_uint32 * vals_table, cmph_uint32 n); 39 | 40 | 41 | /** \fn cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx); 42 | * \brief Returns the value stored at index @see idx of the compressed sequence structure. 43 | * \param cs points to the compressed sequence structure 44 | * \param idx index to retrieve the value from 45 | * \return the value stored at index @see idx of the compressed sequence structure 46 | */ 47 | cmph_uint32 compressed_seq_query(compressed_seq_t * cs, cmph_uint32 idx); 48 | 49 | 50 | /** \fn cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs); 51 | * \brief Returns amount of space (in bits) to store the compressed sequence. 52 | * \param cs points to the compressed sequence structure 53 | * \return the amount of space (in bits) to store @see cs 54 | */ 55 | cmph_uint32 compressed_seq_get_space_usage(compressed_seq_t * cs); 56 | 57 | void compressed_seq_dump(compressed_seq_t * cs, char ** buf, cmph_uint32 * buflen); 58 | 59 | void compressed_seq_load(compressed_seq_t * cs, const char * buf, cmph_uint32 buflen); 60 | 61 | 62 | /** \fn void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed); 63 | * \brief Support the ability to pack a compressed sequence structure into a preallocated contiguous memory space pointed by cs_packed. 64 | * \param cs points to the compressed sequence structure 65 | * \param cs_packed pointer to the contiguous memory area used to store the compressed sequence structure. The size of cs_packed must be at least @see compressed_seq_packed_size 66 | */ 67 | void compressed_seq_pack(compressed_seq_t *cs, void *cs_packed); 68 | 69 | /** \fn cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs); 70 | * \brief Return the amount of space needed to pack a compressed sequence structure. 71 | * \return the size of the packed compressed sequence structure or zero for failures 72 | */ 73 | cmph_uint32 compressed_seq_packed_size(compressed_seq_t *cs); 74 | 75 | 76 | /** \fn cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx); 77 | * \brief Returns the value stored at index @see idx of the packed compressed sequence structure. 78 | * \param cs_packed is a pointer to a contiguous memory area 79 | * \param idx is the index to retrieve the value from 80 | * \return the value stored at index @see idx of the packed compressed sequence structure 81 | */ 82 | cmph_uint32 compressed_seq_query_packed(void * cs_packed, cmph_uint32 idx); 83 | 84 | #endif 85 | -------------------------------------------------------------------------------- /cmph-2.0/src/debug.h: -------------------------------------------------------------------------------- 1 | #ifdef DEBUGP 2 | #undef DEBUGP 3 | #endif 4 | 5 | #ifdef __cplusplus 6 | #include 7 | #ifdef WIN32 8 | #include 9 | #endif 10 | #else 11 | #include 12 | #ifdef WIN32 13 | #include 14 | #endif 15 | #endif 16 | 17 | #ifndef __GNUC__ 18 | #ifndef __DEBUG_H__ 19 | #define __DEBUG_H__ 20 | #include 21 | static void debugprintf(const char *format, ...) 22 | { 23 | va_list ap; 24 | char *f = NULL; 25 | const char *p="%s:%d "; 26 | size_t plen = strlen(p); 27 | va_start(ap, format); 28 | f = (char *)malloc(plen + strlen(format) + 1); 29 | if (!f) return; 30 | memcpy(f, p, plen); 31 | memcpy(f + plen, format, strlen(format) + 1); 32 | vfprintf(stderr, f, ap); 33 | va_end(ap); 34 | free(f); 35 | } 36 | static void dummyprintf(const char *format, ...) 37 | {} 38 | #endif 39 | #endif 40 | 41 | #ifdef DEBUG 42 | #ifndef __GNUC__ 43 | #define DEBUGP debugprintf 44 | #else 45 | #define DEBUGP(args...) do { fprintf(stderr, "%s:%d ", __FILE__, __LINE__); fprintf(stderr, ## args); } while(0) 46 | #endif 47 | #else 48 | #ifndef __GNUC__ 49 | #define DEBUGP dummyprintf 50 | #else 51 | #define DEBUGP(args...) 52 | #endif 53 | #endif 54 | -------------------------------------------------------------------------------- /cmph-2.0/src/fch.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_FCH_H__ 2 | #define __CMPH_FCH_H__ 3 | 4 | #include "cmph.h" 5 | 6 | typedef struct __fch_data_t fch_data_t; 7 | typedef struct __fch_config_data_t fch_config_data_t; 8 | 9 | /* Parameters calculation */ 10 | cmph_uint32 fch_calc_b(double c, cmph_uint32 m); 11 | double fch_calc_p1(cmph_uint32 m); 12 | double fch_calc_p2(cmph_uint32 b); 13 | cmph_uint32 mixh10h11h12(cmph_uint32 b, double p1, double p2, cmph_uint32 initial_index); 14 | 15 | fch_config_data_t *fch_config_new(void); 16 | void fch_config_set_hashfuncs(cmph_config_t *mph, CMPH_HASH *hashfuncs); 17 | void fch_config_destroy(cmph_config_t *mph); 18 | cmph_t *fch_new(cmph_config_t *mph, double c); 19 | 20 | void fch_load(FILE *f, cmph_t *mphf); 21 | int fch_dump(cmph_t *mphf, FILE *f); 22 | void fch_destroy(cmph_t *mphf); 23 | cmph_uint32 fch_search(cmph_t *mphf, const char *key, cmph_uint32 keylen); 24 | 25 | /** \fn void fch_pack(cmph_t *mphf, void *packed_mphf); 26 | * \brief Support the ability to pack a perfect hash function into a preallocated contiguous memory space pointed by packed_mphf. 27 | * \param mphf pointer to the resulting mphf 28 | * \param packed_mphf pointer to the contiguous memory area used to store the resulting mphf. The size of packed_mphf must be at least cmph_packed_size() 29 | */ 30 | void fch_pack(cmph_t *mphf, void *packed_mphf); 31 | 32 | /** \fn cmph_uint32 fch_packed_size(cmph_t *mphf); 33 | * \brief Return the amount of space needed to pack mphf. 34 | * \param mphf pointer to a mphf 35 | * \return the size of the packed function or zero for failures 36 | */ 37 | cmph_uint32 fch_packed_size(cmph_t *mphf); 38 | 39 | /** cmph_uint32 fch_search(void *packed_mphf, const char *key, cmph_uint32 keylen); 40 | * \brief Use the packed mphf to do a search. 41 | * \param packed_mphf pointer to the packed mphf 42 | * \param key key to be hashed 43 | * \param keylen key legth in bytes 44 | * \return The mphf value 45 | */ 46 | cmph_uint32 fch_search_packed(void *packed_mphf, const char *key, cmph_uint32 keylen); 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /cmph-2.0/src/fch_buckets.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_FCH_BUCKETS_H__ 2 | #define __CMPH_FCH_BUCKETS_H__ 3 | 4 | #include "cmph_types.h" 5 | typedef struct __fch_buckets_t fch_buckets_t; 6 | 7 | fch_buckets_t * fch_buckets_new(cmph_uint32 nbuckets); 8 | 9 | cmph_uint8 fch_buckets_is_empty(fch_buckets_t * buckets, cmph_uint32 index); 10 | 11 | void fch_buckets_insert(fch_buckets_t * buckets, cmph_uint32 index, char * key, cmph_uint32 length); 12 | 13 | cmph_uint32 fch_buckets_get_size(fch_buckets_t * buckets, cmph_uint32 index); 14 | 15 | char * fch_buckets_get_key(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key); 16 | 17 | cmph_uint32 fch_buckets_get_keylength(fch_buckets_t * buckets, cmph_uint32 index, cmph_uint32 index_key); 18 | 19 | // returns the size of biggest bucket. 20 | cmph_uint32 fch_buckets_get_max_size(fch_buckets_t * buckets); 21 | 22 | // returns the number of buckets. 23 | cmph_uint32 fch_buckets_get_nbuckets(fch_buckets_t * buckets); 24 | 25 | cmph_uint32 * fch_buckets_get_indexes_sorted_by_size(fch_buckets_t * buckets); 26 | 27 | void fch_buckets_print(fch_buckets_t * buckets); 28 | 29 | void fch_buckets_destroy(fch_buckets_t * buckets); 30 | #endif 31 | -------------------------------------------------------------------------------- /cmph-2.0/src/fch_structs.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_FCH_STRUCTS_H__ 2 | #define __CMPH_FCH_STRUCTS_H__ 3 | 4 | #include "hash_state.h" 5 | 6 | struct __fch_data_t 7 | { 8 | cmph_uint32 m; // words count 9 | double c; // constant c 10 | cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored 11 | double p1; // constant p1 = ceil(0.6*m). Don't need to be stored 12 | double p2; // constant p2 = ceil(0.3*b). Don't need to be stored 13 | cmph_uint32 *g; // g function. 14 | hash_state_t *h1; // h10 function. 15 | hash_state_t *h2; // h20 function. 16 | }; 17 | 18 | struct __fch_config_data_t 19 | { 20 | CMPH_HASH hashfuncs[2]; 21 | cmph_uint32 m; // words count 22 | double c; // constant c 23 | cmph_uint32 b; // parameter b = ceil(c*m/(log(m)/log(2) + 1)). Don't need to be stored 24 | double p1; // constant p1 = ceil(0.6*m). Don't need to be stored 25 | double p2; // constant p2 = ceil(0.3*b). Don't need to be stored 26 | cmph_uint32 *g; // g function. 27 | hash_state_t *h1; // h10 function. 28 | hash_state_t *h2; // h20 function. 29 | }; 30 | #endif 31 | -------------------------------------------------------------------------------- /cmph-2.0/src/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef _CMPH_GRAPH_H__ 2 | #define _CMPH_GRAPH_H__ 3 | 4 | #include 5 | #include "cmph_types.h" 6 | 7 | #define GRAPH_NO_NEIGHBOR UINT_MAX 8 | 9 | typedef struct __graph_t graph_t; 10 | typedef struct __graph_iterator_t graph_iterator_t; 11 | struct __graph_iterator_t 12 | { 13 | cmph_uint32 vertex; 14 | cmph_uint32 edge; 15 | }; 16 | 17 | 18 | 19 | graph_t *graph_new(cmph_uint32 nnodes, cmph_uint32 nedges); 20 | void graph_destroy(graph_t *graph); 21 | 22 | void graph_add_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2); 23 | void graph_del_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2); 24 | void graph_clear_edges(graph_t *g); 25 | cmph_uint32 graph_edge_id(graph_t *g, cmph_uint32 v1, cmph_uint32 v2); 26 | cmph_uint8 graph_contains_edge(graph_t *g, cmph_uint32 v1, cmph_uint32 v2); 27 | 28 | graph_iterator_t graph_neighbors_it(graph_t *g, cmph_uint32 v); 29 | cmph_uint32 graph_next_neighbor(graph_t *g, graph_iterator_t* it); 30 | 31 | void graph_obtain_critical_nodes(graph_t *g); /* included -- Fabiano*/ 32 | cmph_uint8 graph_node_is_critical(graph_t * g, cmph_uint32 v); /* included -- Fabiano */ 33 | cmph_uint32 graph_ncritical_nodes(graph_t *g); /* included -- Fabiano*/ 34 | cmph_uint32 graph_vertex_id(graph_t *g, cmph_uint32 e, cmph_uint32 id); /* included -- Fabiano*/ 35 | 36 | int graph_is_cyclic(graph_t *g); 37 | 38 | void graph_print(graph_t *); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /cmph-2.0/src/hash.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_HASH_H__ 2 | #define __CMPH_HASH_H__ 3 | 4 | #include "cmph_types.h" 5 | 6 | typedef union __hash_state_t hash_state_t; 7 | 8 | hash_state_t *hash_state_new(CMPH_HASH, cmph_uint32 hashsize); 9 | 10 | /** \fn cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen); 11 | * \param state is a pointer to a hash_state_t structure 12 | * \param key is a pointer to a key 13 | * \param keylen is the key length 14 | * \return an integer that represents a hash value of 32 bits. 15 | */ 16 | cmph_uint32 hash(hash_state_t *state, const char *key, cmph_uint32 keylen); 17 | 18 | /** \fn void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes); 19 | * \param state is a pointer to a hash_state_t structure 20 | * \param key is a pointer to a key 21 | * \param keylen is the key length 22 | * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. 23 | */ 24 | void hash_vector(hash_state_t *state, const char *key, cmph_uint32 keylen, cmph_uint32 * hashes); 25 | 26 | void hash_state_dump(hash_state_t *state, char **buf, cmph_uint32 *buflen); 27 | 28 | hash_state_t * hash_state_copy(hash_state_t *src_state); 29 | 30 | hash_state_t *hash_state_load(const char *buf, cmph_uint32 buflen); 31 | 32 | void hash_state_destroy(hash_state_t *state); 33 | 34 | /** \fn void hash_state_pack(hash_state_t *state, void *hash_packed); 35 | * \brief Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. 36 | * \param state points to the hash function 37 | * \param hash_packed pointer to the contiguous memory area used to store the hash function. The size of hash_packed must be at least hash_state_packed_size() 38 | * 39 | * Support the ability to pack a hash function into a preallocated contiguous memory space pointed by hash_packed. 40 | * However, the hash function type must be packed outside. 41 | */ 42 | void hash_state_pack(hash_state_t *state, void *hash_packed); 43 | 44 | /** \fn cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen); 45 | * \param hash_packed is a pointer to a contiguous memory area 46 | * \param hashfunc is the type of the hash function packed in hash_packed 47 | * \param key is a pointer to a key 48 | * \param keylen is the key length 49 | * \return an integer that represents a hash value of 32 bits. 50 | */ 51 | cmph_uint32 hash_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen); 52 | 53 | /** \fn cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc) 54 | * \brief Return the amount of space needed to pack a hash function. 55 | * \param hashfunc function type 56 | * \return the size of the packed function or zero for failures 57 | */ 58 | cmph_uint32 hash_state_packed_size(CMPH_HASH hashfunc); 59 | 60 | 61 | /** \fn hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 62 | * \param hash_packed is a pointer to a contiguous memory area 63 | * \param key is a pointer to a key 64 | * \param keylen is the key length 65 | * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. 66 | */ 67 | void hash_vector_packed(void *hash_packed, CMPH_HASH hashfunc, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 68 | 69 | 70 | /** \fn CMPH_HASH hash_get_type(hash_state_t *state); 71 | * \param state is a pointer to a hash_state_t structure 72 | * \return the hash function type pointed by state 73 | */ 74 | CMPH_HASH hash_get_type(hash_state_t *state); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /cmph-2.0/src/hash_state.h: -------------------------------------------------------------------------------- 1 | #ifndef __HASH_STATE_H__ 2 | #define __HASH_STATE_H__ 3 | 4 | #include "hash.h" 5 | #include "jenkins_hash.h" 6 | union __hash_state_t 7 | { 8 | CMPH_HASH hashfunc; 9 | jenkins_state_t jenkins; 10 | }; 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /cmph-2.0/src/jenkins_hash.h: -------------------------------------------------------------------------------- 1 | #ifndef __JEKINS_HASH_H__ 2 | #define __JEKINS_HASH_H__ 3 | 4 | #include "hash.h" 5 | 6 | typedef struct __jenkins_state_t 7 | { 8 | CMPH_HASH hashfunc; 9 | cmph_uint32 seed; 10 | } jenkins_state_t; 11 | 12 | jenkins_state_t *jenkins_state_new(cmph_uint32 size); //size of hash table 13 | 14 | /** \fn cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen); 15 | * \param state is a pointer to a jenkins_state_t structure 16 | * \param key is a pointer to a key 17 | * \param keylen is the key length 18 | * \return an integer that represents a hash value of 32 bits. 19 | */ 20 | cmph_uint32 jenkins_hash(jenkins_state_t *state, const char *k, cmph_uint32 keylen); 21 | 22 | /** \fn void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 23 | * \param state is a pointer to a jenkins_state_t structure 24 | * \param key is a pointer to a key 25 | * \param keylen is the key length 26 | * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. 27 | */ 28 | void jenkins_hash_vector_(jenkins_state_t *state, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 29 | 30 | void jenkins_state_dump(jenkins_state_t *state, char **buf, cmph_uint32 *buflen); 31 | jenkins_state_t *jenkins_state_copy(jenkins_state_t *src_state); 32 | jenkins_state_t *jenkins_state_load(const char *buf, cmph_uint32 buflen); 33 | void jenkins_state_destroy(jenkins_state_t *state); 34 | 35 | /** \fn void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed); 36 | * \brief Support the ability to pack a jenkins function into a preallocated contiguous memory space pointed by jenkins_packed. 37 | * \param state points to the jenkins function 38 | * \param jenkins_packed pointer to the contiguous memory area used to store the jenkins function. The size of jenkins_packed must be at least jenkins_state_packed_size() 39 | */ 40 | void jenkins_state_pack(jenkins_state_t *state, void *jenkins_packed); 41 | 42 | /** \fn cmph_uint32 jenkins_state_packed_size(); 43 | * \brief Return the amount of space needed to pack a jenkins function. 44 | * \return the size of the packed function or zero for failures 45 | */ 46 | cmph_uint32 jenkins_state_packed_size(void); 47 | 48 | 49 | /** \fn cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen); 50 | * \param jenkins_packed is a pointer to a contiguous memory area 51 | * \param key is a pointer to a key 52 | * \param keylen is the key length 53 | * \return an integer that represents a hash value of 32 bits. 54 | */ 55 | cmph_uint32 jenkins_hash_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen); 56 | 57 | /** \fn jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 58 | * \param jenkins_packed is a pointer to a contiguous memory area 59 | * \param key is a pointer to a key 60 | * \param keylen is the key length 61 | * \param hashes is a pointer to a memory large enough to fit three 32-bit integers. 62 | */ 63 | void jenkins_hash_vector_packed(void *jenkins_packed, const char *k, cmph_uint32 keylen, cmph_uint32 * hashes); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /cmph-2.0/src/linear_string_map.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "linear_string_map.h" 6 | 7 | struct __linear_string_map_t { 8 | const char *key; 9 | void *value; 10 | struct __linear_string_map_t* next; 11 | }; 12 | 13 | lsmap_t *lsmap_new() { 14 | lsmap_t* lsmap = (lsmap_t*)malloc(sizeof(lsmap_t)); 15 | if (!lsmap) return NULL; 16 | lsmap->key = "dummy node"; 17 | lsmap->next = NULL; 18 | return lsmap; 19 | } 20 | 21 | int lsmap_size(lsmap_t *lsmap) { 22 | int size = 0; 23 | while (lsmap->next != NULL) ++size; 24 | return size; 25 | } 26 | 27 | void lsmap_append(lsmap_t *lsmap, const char *key, void *value) { 28 | while (lsmap->next != NULL) lsmap = lsmap->next; 29 | lsmap->next = (lsmap_t*)malloc(sizeof(lsmap_t)); 30 | lsmap->key = key; 31 | lsmap->value = value; 32 | lsmap = lsmap->next; 33 | lsmap->key = "dummy node"; 34 | lsmap->next = NULL; 35 | } 36 | 37 | void* lsmap_search(lsmap_t *lsmap, const char *key) { 38 | while (lsmap->next != NULL) { 39 | if (strcmp(lsmap->key, key) == 0) { 40 | return lsmap->value; 41 | } 42 | lsmap = lsmap->next; 43 | } 44 | return NULL; 45 | } 46 | 47 | void lsmap_foreach_key(lsmap_t *lsmap, void (*f)(const char*)) { 48 | while (lsmap->next != NULL) { 49 | f(lsmap->key); 50 | lsmap = lsmap->next; 51 | } 52 | } 53 | 54 | void lsmap_foreach_value(lsmap_t *lsmap, void (*f)(void*)) { 55 | while (lsmap->next != NULL) { 56 | f(lsmap->value); 57 | lsmap = lsmap->next; 58 | } 59 | } 60 | 61 | void lsmap_destroy(lsmap_t *lsmap) { 62 | while (lsmap->next != NULL) { 63 | lsmap_t* freeme = lsmap; 64 | lsmap = lsmap->next; 65 | free(freeme); 66 | } 67 | free(lsmap); 68 | } 69 | -------------------------------------------------------------------------------- /cmph-2.0/src/linear_string_map.h: -------------------------------------------------------------------------------- 1 | // A simple linked list based dynamic sized associative map from const char* to 2 | // void*. Designed to maximize ease of use instead of performance. Should be 3 | // used in benchmarks and tests only, not to be distributed with the cmph 4 | // runtime headers. 5 | 6 | typedef struct __linear_string_map_t lsmap_t; 7 | 8 | lsmap_t *lsmap_new(); 9 | void lsmap_append(lsmap_t *lsmap, const char *key, void *value); 10 | void* lsmap_search(lsmap_t *lsmap, const char *key); 11 | void lsmap_foreach_key(lsmap_t* lsmap, void (*f)(const char*)); 12 | void lsmap_foreach_value(lsmap_t* lsmap, void (*f)(void*)); 13 | void lsmap_destroy(lsmap_t* lsmap); 14 | -------------------------------------------------------------------------------- /cmph-2.0/src/miller_rabin.c: -------------------------------------------------------------------------------- 1 | #include "miller_rabin.h" 2 | 3 | static inline cmph_uint64 int_pow(cmph_uint64 a, cmph_uint64 d, cmph_uint64 n) 4 | { 5 | cmph_uint64 a_pow = a; 6 | cmph_uint64 res = 1; 7 | while(d > 0) 8 | { 9 | if((d & 1) == 1) 10 | res =(((cmph_uint64)res) * a_pow) % n; 11 | a_pow = (((cmph_uint64)a_pow) * a_pow) % n; 12 | d /= 2; 13 | }; 14 | return res; 15 | }; 16 | 17 | static inline cmph_uint8 check_witness(cmph_uint64 a_exp_d, cmph_uint64 n, cmph_uint64 s) 18 | { 19 | cmph_uint64 i; 20 | cmph_uint64 a_exp = a_exp_d; 21 | if(a_exp == 1 || a_exp == (n - 1)) 22 | return 1; 23 | for(i = 1; i < s; i++) 24 | { 25 | a_exp = (((cmph_uint64)a_exp) * a_exp) % n; 26 | if(a_exp == (n - 1)) 27 | return 1; 28 | }; 29 | return 0; 30 | }; 31 | 32 | cmph_uint8 check_primality(cmph_uint64 n) 33 | { 34 | cmph_uint64 a, d, s, a_exp_d; 35 | if((n % 2) == 0) 36 | return 0; 37 | if((n % 3) == 0) 38 | return 0; 39 | if((n % 5) == 0) 40 | return 0; 41 | if((n % 7 ) == 0) 42 | return 0; 43 | //we decompoe the number n - 1 into 2^s*d 44 | s = 0; 45 | d = n - 1; 46 | do 47 | { 48 | s++; 49 | d /= 2; 50 | }while((d % 2) == 0); 51 | 52 | a = 2; 53 | a_exp_d = int_pow(a, d, n); 54 | if(check_witness(a_exp_d, n, s) == 0) 55 | return 0; 56 | a = 7; 57 | a_exp_d = int_pow(a, d, n); 58 | if(check_witness(a_exp_d, n, s) == 0) 59 | return 0; 60 | a = 61; 61 | a_exp_d = int_pow(a, d, n); 62 | if(check_witness(a_exp_d, n, s) == 0) 63 | return 0; 64 | return 1; 65 | }; 66 | 67 | 68 | -------------------------------------------------------------------------------- /cmph-2.0/src/miller_rabin.h: -------------------------------------------------------------------------------- 1 | #ifndef _CMPH_MILLER_RABIN_H__ 2 | #define _CMPH_MILLER_RABIN_H__ 3 | #include "cmph_types.h" 4 | cmph_uint8 check_primality(cmph_uint64 n); 5 | #endif 6 | -------------------------------------------------------------------------------- /cmph-2.0/src/select.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_SELECT_H__ 2 | #define __CMPH_SELECT_H__ 3 | 4 | #include "cmph_types.h" 5 | 6 | struct _select_t 7 | { 8 | cmph_uint32 n,m; 9 | cmph_uint32 * bits_vec; 10 | cmph_uint32 * select_table; 11 | }; 12 | 13 | typedef struct _select_t select_t; 14 | 15 | void select_init(select_t * sel); 16 | 17 | void select_destroy(select_t * sel); 18 | 19 | void select_generate(select_t * sel, cmph_uint32 * keys_vec, cmph_uint32 n, cmph_uint32 m); 20 | 21 | cmph_uint32 select_query(select_t * sel, cmph_uint32 one_idx); 22 | 23 | cmph_uint32 select_next_query(select_t * sel, cmph_uint32 vec_bit_idx); 24 | 25 | cmph_uint32 select_get_space_usage(select_t * sel); 26 | 27 | void select_dump(select_t *sel, char **buf, cmph_uint32 *buflen); 28 | 29 | void select_load(select_t * sel, const char *buf, cmph_uint32 buflen); 30 | 31 | 32 | /** \fn void select_pack(select_t *sel, void *sel_packed); 33 | * \brief Support the ability to pack a select structure into a preallocated contiguous memory space pointed by sel_packed. 34 | * \param sel points to the select structure 35 | * \param sel_packed pointer to the contiguous memory area used to store the select structure. The size of sel_packed must be at least @see select_packed_size 36 | */ 37 | void select_pack(select_t *sel, void *sel_packed); 38 | 39 | /** \fn cmph_uint32 select_packed_size(select_t *sel); 40 | * \brief Return the amount of space needed to pack a select structure. 41 | * \return the size of the packed select structure or zero for failures 42 | */ 43 | cmph_uint32 select_packed_size(select_t *sel); 44 | 45 | 46 | /** \fn cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx); 47 | * \param sel_packed is a pointer to a contiguous memory area 48 | * \param one_idx is the rank for which we want to calculate the inverse function select 49 | * \return an integer that represents the select value of rank idx. 50 | */ 51 | cmph_uint32 select_query_packed(void * sel_packed, cmph_uint32 one_idx); 52 | 53 | 54 | /** \fn cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx); 55 | * \param sel_packed is a pointer to a contiguous memory area 56 | * \param vec_bit_idx is a value prior computed by @see select_query_packed 57 | * \return an integer that represents the next select value greater than @see vec_bit_idx. 58 | */ 59 | cmph_uint32 select_next_query_packed(void * sel_packed, cmph_uint32 vec_bit_idx); 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /cmph-2.0/src/vqueue.c: -------------------------------------------------------------------------------- 1 | #include "vqueue.h" 2 | #include 3 | #include 4 | #include 5 | struct __vqueue_t 6 | { 7 | cmph_uint32 * values; 8 | cmph_uint32 beg, end, capacity; 9 | }; 10 | 11 | vqueue_t * vqueue_new(cmph_uint32 capacity) 12 | { 13 | size_t capacity_plus_one = capacity + 1; 14 | vqueue_t *q = (vqueue_t *)malloc(sizeof(vqueue_t)); 15 | if (!q) return NULL; 16 | q->values = (cmph_uint32 *)calloc(capacity_plus_one, sizeof(cmph_uint32)); 17 | q->beg = q->end = 0; 18 | q->capacity = (cmph_uint32) capacity_plus_one; 19 | return q; 20 | } 21 | 22 | cmph_uint8 vqueue_is_empty(vqueue_t * q) 23 | { 24 | return (cmph_uint8)(q->beg == q->end); 25 | } 26 | 27 | void vqueue_insert(vqueue_t * q, cmph_uint32 val) 28 | { 29 | assert((q->end + 1)%q->capacity != q->beg); // Is queue full? 30 | q->end = (q->end + 1)%q->capacity; 31 | q->values[q->end] = val; 32 | } 33 | 34 | cmph_uint32 vqueue_remove(vqueue_t * q) 35 | { 36 | assert(!vqueue_is_empty(q)); // Is queue empty? 37 | q->beg = (q->beg + 1)%q->capacity; 38 | return q->values[q->beg]; 39 | } 40 | 41 | void vqueue_print(vqueue_t * q) 42 | { 43 | cmph_uint32 i; 44 | for (i = q->beg; i != q->end; i = (i + 1)%q->capacity) 45 | fprintf(stderr, "%u\n", q->values[(i + 1)%q->capacity]); 46 | } 47 | 48 | void vqueue_destroy(vqueue_t *q) 49 | { 50 | free(q->values); q->values = NULL; free(q); 51 | } 52 | -------------------------------------------------------------------------------- /cmph-2.0/src/vqueue.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_VQUEUE_H__ 2 | #define __CMPH_VQUEUE_H__ 3 | 4 | #include "cmph_types.h" 5 | typedef struct __vqueue_t vqueue_t; 6 | 7 | vqueue_t * vqueue_new(cmph_uint32 capacity); 8 | 9 | cmph_uint8 vqueue_is_empty(vqueue_t * q); 10 | 11 | void vqueue_insert(vqueue_t * q, cmph_uint32 val); 12 | 13 | cmph_uint32 vqueue_remove(vqueue_t * q); 14 | 15 | void vqueue_print(vqueue_t * q); 16 | 17 | void vqueue_destroy(vqueue_t * q); 18 | #endif 19 | -------------------------------------------------------------------------------- /cmph-2.0/src/vstack.c: -------------------------------------------------------------------------------- 1 | #include "vstack.h" 2 | 3 | #include 4 | #include 5 | 6 | //#define DEBUG 7 | #include "debug.h" 8 | 9 | struct __vstack_t 10 | { 11 | cmph_uint32 pointer; 12 | cmph_uint32 *values; 13 | cmph_uint32 capacity; 14 | }; 15 | 16 | vstack_t *vstack_new(void) 17 | { 18 | vstack_t *stack = (vstack_t *)malloc(sizeof(vstack_t)); 19 | assert(stack); 20 | stack->pointer = 0; 21 | stack->values = NULL; 22 | stack->capacity = 0; 23 | return stack; 24 | } 25 | 26 | void vstack_destroy(vstack_t *stack) 27 | { 28 | assert(stack); 29 | free(stack->values); 30 | free(stack); 31 | } 32 | 33 | void vstack_push(vstack_t *stack, cmph_uint32 val) 34 | { 35 | assert(stack); 36 | vstack_reserve(stack, stack->pointer + 1); 37 | stack->values[stack->pointer] = val; 38 | ++(stack->pointer); 39 | } 40 | void vstack_pop(vstack_t *stack) 41 | { 42 | assert(stack); 43 | assert(stack->pointer > 0); 44 | --(stack->pointer); 45 | } 46 | 47 | cmph_uint32 vstack_top(vstack_t *stack) 48 | { 49 | assert(stack); 50 | assert(stack->pointer > 0); 51 | return stack->values[(stack->pointer - 1)]; 52 | } 53 | int vstack_empty(vstack_t *stack) 54 | { 55 | assert(stack); 56 | return stack->pointer == 0; 57 | } 58 | cmph_uint32 vstack_size(vstack_t *stack) 59 | { 60 | return stack->pointer; 61 | } 62 | void vstack_reserve(vstack_t *stack, cmph_uint32 size) 63 | { 64 | assert(stack); 65 | if (stack->capacity < size) 66 | { 67 | cmph_uint32 new_capacity = stack->capacity + 1; 68 | DEBUGP("Increasing current capacity %u to %u\n", stack->capacity, size); 69 | while (new_capacity < size) 70 | { 71 | new_capacity *= 2; 72 | } 73 | stack->values = (cmph_uint32 *)realloc(stack->values, sizeof(cmph_uint32)*new_capacity); 74 | assert(stack->values); 75 | stack->capacity = new_capacity; 76 | DEBUGP("Increased\n"); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /cmph-2.0/src/vstack.h: -------------------------------------------------------------------------------- 1 | #ifndef __CMPH_VSTACK_H__ 2 | #define __CMPH_VSTACK_H__ 3 | 4 | #include "cmph_types.h" 5 | typedef struct __vstack_t vstack_t; 6 | 7 | vstack_t *vstack_new(); 8 | void vstack_destroy(vstack_t *stack); 9 | 10 | void vstack_push(vstack_t *stack, cmph_uint32 val); 11 | cmph_uint32 vstack_top(vstack_t *stack); 12 | void vstack_pop(vstack_t *stack); 13 | int vstack_empty(vstack_t *stack); 14 | cmph_uint32 vstack_size(vstack_t *stack); 15 | 16 | void vstack_reserve(vstack_t *stack, cmph_uint32 size); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /cmph-2.0/src/wingetopt.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #ifndef WIN32 6 | #include 7 | #else 8 | #ifndef _GETOPT_ 9 | #define _GETOPT_ 10 | 11 | #include /* for EOF */ 12 | #include /* for strchr() */ 13 | 14 | char *optarg = NULL; /* pointer to the start of the option argument */ 15 | int optind = 1; /* number of the next argv[] to be evaluated */ 16 | int opterr = 1; /* non-zero if a question mark should be returned */ 17 | 18 | int getopt(int argc, char *argv[], char *opstring); 19 | #endif //_GETOPT_ 20 | #endif //WIN32 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /cmph-2.0/tests/Makefile.am: -------------------------------------------------------------------------------- 1 | TESTS = $(check_PROGRAMS) 2 | check_PROGRAMS = graph_tests select_tests compressed_seq_tests compressed_rank_tests cmph_benchmark_test 3 | noinst_PROGRAMS = packed_mphf_tests mphf_tests 4 | 5 | INCLUDES = -I../src/ 6 | 7 | graph_tests_SOURCES = graph_tests.c 8 | graph_tests_LDADD = ../src/libcmph.la 9 | 10 | packed_mphf_tests_SOURCES = packed_mphf_tests.c 11 | packed_mphf_tests_LDADD = ../src/libcmph.la 12 | 13 | mphf_tests_SOURCES = mphf_tests.c 14 | mphf_tests_LDADD = ../src/libcmph.la 15 | 16 | select_tests_SOURCES = select_tests.c 17 | select_tests_LDADD = ../src/libcmph.la 18 | 19 | compressed_seq_tests_SOURCES = compressed_seq_tests.c 20 | compressed_seq_tests_LDADD = ../src/libcmph.la 21 | 22 | compressed_rank_tests_SOURCES = compressed_rank_tests.c 23 | compressed_rank_tests_LDADD = ../src/libcmph.la 24 | 25 | cmph_benchmark_test_SOURCES = cmph_benchmark_test.c 26 | cmph_benchmark_test_LDADD = ../src/libcmph.la 27 | -------------------------------------------------------------------------------- /cmph-2.0/tests/cmph_benchmark_test.c: -------------------------------------------------------------------------------- 1 | #include // for sleep 2 | #include 3 | 4 | #include "cmph_benchmark.h" 5 | 6 | void bm_sleep(int iters) { 7 | sleep(1); 8 | } 9 | 10 | void bm_increment(int iters) { 11 | int i, v = 0; 12 | for (i = 0; i < INT_MAX; ++i) { 13 | v += i; 14 | } 15 | } 16 | 17 | int main(int argc, char** argv) { 18 | BM_REGISTER(bm_sleep, 1); 19 | BM_REGISTER(bm_increment, 1); 20 | run_benchmarks(argc, argv); 21 | return 0; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /cmph-2.0/tests/compressed_rank_tests.c: -------------------------------------------------------------------------------- 1 | #include "../src/compressed_rank.h" 2 | 3 | #define DEBUG 4 | #include "../src/debug.h" 5 | #include 6 | 7 | static inline void print_values(compressed_rank_t * cr, cmph_uint32 idx) 8 | { 9 | register cmph_uint32 index; 10 | 11 | index = compressed_rank_query(cr, idx); 12 | fprintf(stderr, "Index[%u]\t= %u\n", idx, index); 13 | } 14 | 15 | 16 | static inline void print_values_packed(char * cr_packed, cmph_uint32 idx) 17 | { 18 | register cmph_uint32 index; 19 | 20 | index = compressed_rank_query_packed(cr_packed, idx); 21 | fprintf(stderr, "Index[%u]\t= %u\n", idx, index); 22 | } 23 | 24 | /* 25 | n = 20 26 | Indices: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 27 | vector[] = {0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1} 28 | nzeros = 12 29 | zeroIndices[] = {0, 1, 2, 5, 7, 9, 11, 12, 13, 16, 17, 18} 30 | */ 31 | int main(int argc, char **argv) 32 | { 33 | compressed_rank_t cr; 34 | cmph_uint32 i = 0; 35 | cmph_uint32 n = 12; 36 | cmph_uint32 nIndices = 20; 37 | cmph_uint32 keys_vec[] = {0, 1, 2, 5, 7, 9, 11, 12, 13, 16, 17, 18}; 38 | char *buf = NULL; 39 | cmph_uint32 buflen = 0; 40 | char * cr_packed = NULL; 41 | cmph_uint32 cr_pack_size = 0; 42 | 43 | compressed_rank_init(&cr); 44 | compressed_rank_generate(&cr, keys_vec, n); 45 | fprintf(stderr, "Space usage = %u\n", compressed_rank_get_space_usage(&cr)); 46 | for(i = 0; i < nIndices; i++) 47 | { 48 | print_values(&cr, i); 49 | } 50 | 51 | fprintf(stderr, "Dumping compressed rank structure\n"); 52 | compressed_rank_dump(&cr, &buf, &buflen); 53 | 54 | compressed_rank_destroy(&cr); 55 | fprintf(stderr, "Loading compressed rank structure\n"); 56 | 57 | compressed_rank_load(&cr, buf, buflen); 58 | for(i = 0; i < nIndices; i++) 59 | { 60 | print_values(&cr, i); 61 | } 62 | free(buf); 63 | 64 | cr_pack_size = compressed_rank_packed_size(&cr); 65 | 66 | cr_packed = (char *) calloc(cr_pack_size, sizeof(char)); 67 | compressed_rank_pack(&cr, cr_packed); 68 | compressed_rank_destroy(&cr); 69 | 70 | fprintf(stderr, "Querying the packed compressed rank structure\n"); 71 | for(i = 0; i < nIndices; i++) 72 | { 73 | print_values_packed(cr_packed, i); 74 | } 75 | 76 | free(cr_packed); 77 | return 0; 78 | } 79 | -------------------------------------------------------------------------------- /cmph-2.0/tests/compressed_seq_tests.c: -------------------------------------------------------------------------------- 1 | #include "../src/compressed_seq.h" 2 | 3 | #define DEBUG 4 | #include "../src/debug.h" 5 | #include 6 | 7 | static inline void print_values(compressed_seq_t * cs, cmph_uint32 idx) 8 | { 9 | register cmph_uint32 index; 10 | 11 | index = compressed_seq_query(cs, idx); 12 | fprintf(stderr, "Index[%u]\t= %u\n", idx, index); 13 | } 14 | 15 | 16 | static inline void print_values_packed(char * cs_packed, cmph_uint32 idx) 17 | { 18 | register cmph_uint32 index; 19 | 20 | index = compressed_seq_query_packed(cs_packed, idx); 21 | fprintf(stderr, "Index[%u]\t= %u\n", idx, index); 22 | } 23 | 24 | int main(int argc, char **argv) 25 | { 26 | compressed_seq_t cs; 27 | cmph_uint32 i = 0; 28 | cmph_uint32 n = 20; 29 | cmph_uint32 keys_vec[] = { 0, 1, 1, 1, 2, 2, 2, 3, 5, 5, 30 | 6, 6, 9, 9, 9, 12, 12, 13, 17, 1077}; 31 | char *buf = NULL; 32 | cmph_uint32 buflen = 0; 33 | char * cs_packed = NULL; 34 | cmph_uint32 cs_pack_size = 0; 35 | 36 | compressed_seq_init(&cs); 37 | compressed_seq_generate(&cs, keys_vec, n); 38 | fprintf(stderr, "Space usage = %u\n", compressed_seq_get_space_usage(&cs)); 39 | for(i = 0; i < n; i++) 40 | { 41 | print_values(&cs, i); 42 | } 43 | 44 | fprintf(stderr, "Dumping compressed seq structure\n"); 45 | compressed_seq_dump(&cs, &buf, &buflen); 46 | 47 | compressed_seq_destroy(&cs); 48 | fprintf(stderr, "Loading compressed seq structure\n"); 49 | 50 | compressed_seq_load(&cs, buf, buflen); 51 | for(i = 0; i < n; i++) 52 | { 53 | print_values(&cs, i); 54 | } 55 | free(buf); 56 | 57 | cs_pack_size = compressed_seq_packed_size(&cs); 58 | 59 | cs_packed = (char *) calloc(cs_pack_size, sizeof(char)); 60 | compressed_seq_pack(&cs, cs_packed); 61 | compressed_seq_destroy(&cs); 62 | 63 | fprintf(stderr, "Querying the packed compressed seq structure\n"); 64 | for(i = 0; i < n; i++) 65 | { 66 | print_values_packed(cs_packed, i); 67 | } 68 | 69 | free(cs_packed); 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /cmph-2.0/tests/graph_tests.c: -------------------------------------------------------------------------------- 1 | #include "../src/graph.h" 2 | 3 | #define DEBUG 4 | #include "../src/debug.h" 5 | 6 | int main(int argc, char **argv) 7 | { 8 | graph_iterator_t it; 9 | cmph_uint32 i, neighbor; 10 | graph_t *g = graph_new(5, 10); 11 | 12 | fprintf(stderr, "Building random graph\n"); 13 | for (i = 0; i < 10; ++i) 14 | { 15 | cmph_uint32 v1 = i % 5; 16 | cmph_uint32 v2 = (i*2) % 5; 17 | if (v1 == v2) continue; 18 | graph_add_edge(g, v1, v2); 19 | DEBUGP("Added edge %u %u\n", v1, v2); 20 | } 21 | graph_print(g); 22 | graph_del_edge(g, 4, 3); 23 | graph_print(g); 24 | graph_clear_edges(g); 25 | graph_print(g); 26 | graph_destroy(g); 27 | 28 | fprintf(stderr, "Building cyclic graph\n"); 29 | g = graph_new(4, 5); 30 | graph_add_edge(g, 0, 3); 31 | graph_add_edge(g, 0, 1); 32 | graph_add_edge(g, 1, 2); 33 | graph_add_edge(g, 2, 0); 34 | if (!graph_is_cyclic(g)) 35 | { 36 | return 1; 37 | } 38 | graph_destroy(g); 39 | 40 | fprintf(stderr, "Building non-cyclic graph\n"); 41 | g = graph_new(5, 4); 42 | graph_add_edge(g, 0, 1); 43 | graph_add_edge(g, 1, 2); 44 | graph_add_edge(g, 2, 3); 45 | graph_add_edge(g, 3, 4); 46 | 47 | if (graph_is_cyclic(g)) 48 | { 49 | return 1; 50 | } 51 | 52 | fprintf(stderr, "Checking neighbors iterator\n"); 53 | it = graph_neighbors_it(g, 1); 54 | neighbor = graph_next_neighbor(g, &it); 55 | DEBUGP("Neighbor is %u\n", neighbor); 56 | if (neighbor != 0 && neighbor != 2) return 1; 57 | neighbor = graph_next_neighbor(g, &it); 58 | DEBUGP("Neighbor is %u\n", neighbor); 59 | if (neighbor != 0 && neighbor != 2) return 1; 60 | neighbor = graph_next_neighbor(g, &it); 61 | DEBUGP("Neighbor is %u\n", neighbor); 62 | if (neighbor != GRAPH_NO_NEIGHBOR) return 1; 63 | 64 | 65 | graph_destroy(g); 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /cmph-2.0/tests/mphf_tests.c: -------------------------------------------------------------------------------- 1 | #ifdef WIN32 2 | #include "../wingetopt.h" 3 | #else 4 | #include 5 | #endif 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #ifdef WIN32 16 | #define VERSION "0.8" 17 | #else 18 | #include "config.h" 19 | #endif 20 | 21 | 22 | void usage(const char *prg) 23 | { 24 | fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); 25 | } 26 | void usage_long(const char *prg) 27 | { 28 | fprintf(stderr, "usage: %s [-v] [-h] [-V] [-k nkeys] [-m file.mph] keysfile\n", prg); 29 | fprintf(stderr, "Packed MPHFs testing tool\n\n"); 30 | fprintf(stderr, " -h\t print this help message\n"); 31 | fprintf(stderr, " -V\t print version number and exit\n"); 32 | fprintf(stderr, " -v\t increase verbosity (may be used multiple times)\n"); 33 | fprintf(stderr, " -k\t number of keys\n"); 34 | fprintf(stderr, " -m\t minimum perfect hash function file \n"); 35 | fprintf(stderr, " keysfile\t line separated file with keys\n"); 36 | } 37 | 38 | int main(int argc, char **argv) 39 | { 40 | char verbosity = 0; 41 | char *mphf_file = NULL; 42 | const char *keys_file = NULL; 43 | FILE *mphf_fd = stdout; 44 | FILE *keys_fd; 45 | cmph_uint32 nkeys = UINT_MAX; 46 | cmph_uint32 i = 0; 47 | cmph_t *mphf = NULL; 48 | cmph_io_adapter_t *source; 49 | while (1) 50 | { 51 | char ch = (char)getopt(argc, argv, "hVvk:m:"); 52 | if (ch == -1) break; 53 | switch (ch) 54 | { 55 | case 'k': 56 | { 57 | char *endptr; 58 | nkeys = (cmph_uint32) strtoul(optarg, &endptr, 10); 59 | if(*endptr != 0) { 60 | fprintf(stderr, "Invalid number of keys %s\n", optarg); 61 | exit(1); 62 | } 63 | } 64 | break; 65 | case 'm': 66 | mphf_file = strdup(optarg); 67 | break; 68 | case 'v': 69 | ++verbosity; 70 | break; 71 | case 'V': 72 | printf("%s\n", VERSION); 73 | return 0; 74 | case 'h': 75 | usage_long(argv[0]); 76 | return 0; 77 | default: 78 | usage(argv[0]); 79 | return 1; 80 | } 81 | } 82 | 83 | if (optind != argc - 1) 84 | { 85 | usage(argv[0]); 86 | return 1; 87 | } 88 | keys_file = argv[optind]; 89 | 90 | int ret = 0; 91 | if (mphf_file == NULL) 92 | { 93 | mphf_file = (char *)malloc(strlen(keys_file) + 5); 94 | memcpy(mphf_file, keys_file, strlen(keys_file)); 95 | memcpy(mphf_file + strlen(keys_file), ".mph\0", (size_t)5); 96 | } 97 | 98 | keys_fd = fopen(keys_file, "r"); 99 | 100 | if (keys_fd == NULL) 101 | { 102 | fprintf(stderr, "Unable to open file %s: %s\n", keys_file, strerror(errno)); 103 | return -1; 104 | } 105 | 106 | if(nkeys == UINT_MAX) source = cmph_io_nlfile_adapter(keys_fd); 107 | else source = cmph_io_nlnkfile_adapter(keys_fd, nkeys); 108 | 109 | cmph_uint8 * hashtable = NULL; 110 | mphf_fd = fopen(mphf_file, "r"); 111 | if (mphf_fd == NULL) 112 | { 113 | fprintf(stderr, "Unable to open input file %s: %s\n", mphf_file, strerror(errno)); 114 | free(mphf_file); 115 | return -1; 116 | } 117 | mphf = cmph_load(mphf_fd); 118 | fclose(mphf_fd); 119 | if (!mphf) 120 | { 121 | fprintf(stderr, "Unable to parser input file %s\n", mphf_file); 122 | free(mphf_file); 123 | return -1; 124 | } 125 | cmph_uint32 siz = cmph_size(mphf); 126 | hashtable = (cmph_uint8*)malloc(siz*sizeof(cmph_uint8)); 127 | memset(hashtable, 0, (size_t)siz); 128 | //check all keys 129 | for (i = 0; i < source->nkeys; ++i) 130 | { 131 | cmph_uint32 h; 132 | char *buf; 133 | cmph_uint32 buflen = 0; 134 | source->read(source->data, &buf, &buflen); 135 | h = cmph_search(mphf, buf, buflen); 136 | if (!(h < siz)) 137 | { 138 | fprintf(stderr, "Unknown key %*s in the input.\n", buflen, buf); 139 | ret = 1; 140 | } else if(hashtable[h]) 141 | { 142 | fprintf(stderr, "Duplicated or unknown key %*s in the input\n", buflen, buf); 143 | ret = 1; 144 | } else hashtable[h] = 1; 145 | 146 | if (verbosity) 147 | { 148 | printf("%s -> %u\n", buf, h); 149 | } 150 | source->dispose(source->data, buf, buflen); 151 | } 152 | 153 | cmph_destroy(mphf); 154 | free(hashtable); 155 | 156 | fclose(keys_fd); 157 | free(mphf_file); 158 | cmph_io_nlfile_adapter_destroy(source); 159 | return ret; 160 | 161 | } 162 | -------------------------------------------------------------------------------- /cmph-2.0/tests/select_tests.c: -------------------------------------------------------------------------------- 1 | #include "../src/select.h" 2 | 3 | #define DEBUG 4 | #include "../src/debug.h" 5 | #include 6 | 7 | static inline void print_values(select_t * sel) 8 | { 9 | register cmph_uint32 index; 10 | 11 | index = select_query(sel, 0); 12 | fprintf(stderr, "Index[0]\t= %u\n", index - 0); 13 | 14 | index = select_next_query(sel, index); 15 | fprintf(stderr, "Next Index\t= %u\n", index); 16 | 17 | index = select_query(sel, 1); 18 | fprintf(stderr, "Index[1]\t= %u\n", index - 1); 19 | 20 | index = select_next_query(sel, index); 21 | fprintf(stderr, "Next Index\t= %u\n", index); 22 | 23 | index = select_query(sel, 2); 24 | fprintf(stderr, "Index[2]\t= %u\n", index - 2); 25 | 26 | index = select_next_query(sel, index); 27 | fprintf(stderr, "Next Index\t= %u\n", index); 28 | 29 | index = select_query(sel, 3); 30 | fprintf(stderr, "Index[3]\t= %u\n", index - 3); 31 | } 32 | 33 | 34 | static inline void print_values_packed(char * sel_packed) 35 | { 36 | register cmph_uint32 index; 37 | 38 | index = select_query_packed(sel_packed, 0); 39 | fprintf(stderr, "Index[0]\t= %u\n", index - 0); 40 | 41 | index = select_next_query_packed(sel_packed, index); 42 | fprintf(stderr, "Next Index\t= %u\n", index); 43 | 44 | index = select_query_packed(sel_packed, 1); 45 | fprintf(stderr, "Index[1]\t= %u\n", index - 1); 46 | 47 | index = select_next_query_packed(sel_packed, index); 48 | fprintf(stderr, "Next Index\t= %u\n", index); 49 | 50 | index = select_query_packed(sel_packed, 2); 51 | fprintf(stderr, "Index[2]\t= %u\n", index - 2); 52 | 53 | index = select_next_query_packed(sel_packed, index); 54 | fprintf(stderr, "Next Index\t= %u\n", index); 55 | 56 | index = select_query_packed(sel_packed, 3); 57 | fprintf(stderr, "Index[3]\t= %u\n", index - 3); 58 | } 59 | 60 | int main(int argc, char **argv) 61 | { 62 | select_t sel; 63 | cmph_uint32 n = 4; 64 | cmph_uint32 keys_vec[4] = {0,1,2,3}; 65 | cmph_uint32 m = keys_vec[3]; 66 | char *buf = NULL; 67 | cmph_uint32 buflen = 0; 68 | char * select_packed = NULL; 69 | cmph_uint32 select_pack_size = 0; 70 | 71 | select_init(&sel); 72 | select_generate(&sel, keys_vec, n, m); 73 | fprintf(stderr, "Space usage = %u\n", select_get_space_usage(&sel)); 74 | print_values(&sel); 75 | 76 | fprintf(stderr, "Dumping select structure\n"); 77 | select_dump(&sel, &buf, &buflen); 78 | 79 | select_destroy(&sel); 80 | fprintf(stderr, "Loading select structure\n"); 81 | 82 | select_load(&sel, buf, buflen); 83 | print_values(&sel); 84 | free(buf);; 85 | 86 | select_pack_size = select_packed_size(&sel); 87 | 88 | select_packed = (char *) calloc(select_pack_size, sizeof(char)); 89 | select_pack(&sel, select_packed); 90 | select_destroy(&sel); 91 | 92 | fprintf(stderr, "Querying the packed select structure\n"); 93 | print_values_packed(select_packed); 94 | 95 | free(select_packed); 96 | return 0; 97 | } 98 | -------------------------------------------------------------------------------- /examples/all-bench.sh: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | p=${p:-perl} 3 | 4 | make clean 5 | $p Makefile.PL && make -s 6 | #cd bob; git checkout Makefile; cd .. 7 | g=`git describe --long --tags --dirty --always` 8 | 9 | $p -Mblib examples/bench.pl -size 127 | tee log.bench-$g-127 10 | $p -Mblib examples/bench.pl -size 500 | tee log.bench-$g-500 11 | $p -Mblib examples/bench.pl -size 2000 | tee log.bench-$g-2000 12 | $p -Mblib examples/bench.pl -size 10000 -nul -1opt | tee log.bench-$g-10000 13 | $p -Mblib examples/bench.pl -size 25000 -nul -1opt | tee log.bench-$g-25000 14 | $p -Mblib examples/bench.pl -nul -pic -1opt | tee log.bench-$g 15 | -------------------------------------------------------------------------------- /examples/words20: -------------------------------------------------------------------------------- 1 | A 2 | A's 3 | AA's 4 | AB's 5 | ABM's 6 | AC's 7 | ACTH's 8 | AI's 9 | AIDS's 10 | AM's 11 | AOL 12 | AOL's 13 | ASCII's 14 | ASL's 15 | ATM's 16 | ATP's 17 | AWOL's 18 | AZ's 19 | AZT's 20 | Aachen 21 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | #use warnings; 6 | our @ISA = qw(Perfect::Hash Perfect::Hash::C); 7 | use B (); 8 | use Config; 9 | 10 | use XSLoader; 11 | XSLoader::load('Perfect::Hash::CMPH'); 12 | 13 | =head1 DESCRIPTION 14 | 15 | XS interface to the cmph library, the current state of the art library 16 | for perfect hashes and minimal perfect hashes. 17 | 18 | L 19 | 20 | =head1 METHODS 21 | 22 | =over 23 | 24 | =item new $filename, @options 25 | 26 | filename only so far 27 | 28 | Honored options are: I<-nul> 29 | 30 | =cut 31 | 32 | # TODO support arrayref and hashref converted to arrayrefs, as byte-packed vector 33 | # for the cmph io_vector or io_byte_vector adapter. 34 | sub new { 35 | my $class = shift or die; 36 | my $dict = shift; #hashref, arrayref or filename 37 | my $size; 38 | # enforce KEYFILE 39 | my $fn = "pperf_keys.tmp"; 40 | if (ref $dict eq 'ARRAY') { 41 | open my $F, ">", $fn; 42 | my $i = 0; 43 | my %dict; 44 | $size = scalar @$dict; 45 | for (@$dict) { 46 | print $F "$_\n"; 47 | $dict{$_} = $i++; 48 | } 49 | close $F; 50 | $dict = \%dict; 51 | } 52 | elsif (ref $dict eq 'HASH') { 53 | open my $F, ">", $fn; 54 | for (sort keys %$dict) { 55 | print $F $_,"\t",$dict->{$_},"\n"; 56 | } 57 | #print $F "%%"; 58 | close $F; 59 | $size = scalar keys %$dict; 60 | } elsif (!ref $dict and ! -e $dict) { 61 | die "wrong dict argument. arrayref, hashref or filename expected"; 62 | } else { 63 | $fn = $dict; 64 | # against -false-positive 65 | my %hash; 66 | open my $d, "<", $dict or die; { 67 | local $/; 68 | my $i = 0; 69 | %hash = map {$_ => $i++ } split /\n/, <$d>; 70 | } 71 | close $d; 72 | $dict = \%hash; 73 | $size = scalar keys %hash; 74 | } 75 | my $ph = _new($class, $fn, @_); 76 | if (grep /^-false-positives/, @_) { 77 | push @$ph, $dict; # at [3] 78 | } 79 | $ph->[2]->{size} = $size; 80 | return $ph; 81 | } 82 | 83 | =item perfecthash $ph 84 | 85 | XS method. Returns the position of the found key in the file. 86 | 87 | =item false_positives 88 | 89 | =item option $ph 90 | 91 | Access the option hash in $ph. 92 | 93 | =cut 94 | 95 | sub option { 96 | return $_[0]->[2]->{$_[1]}; 97 | } 98 | 99 | =item save_c fileprefix, options 100 | 101 | Generates a $fileprefix.c and $fileprefix.h file. 102 | 103 | For all CMPH variants. 104 | 105 | =cut 106 | 107 | sub save_c { 108 | my $ph = shift; 109 | my $size = $ph->[2]->{size}; 110 | require Perfect::Hash::C; 111 | Perfect::Hash::C->import(); 112 | 113 | my ($fileprefix, $base) = $ph->save_h_header(@_); 114 | my $FH = $ph->save_c_header($fileprefix, $base); 115 | # XXX need to initialize mphf from the temp FILE 116 | # into a memory buffer. 117 | print $FH "#include \"cmph.h\"\n"; 118 | print $FH $ph->c_funcdecl($base)." {"; 119 | # XXX check for false positives from dict at [3] 120 | my $l = $ph->option('-nul') ? "l" : "strlen(s)"; 121 | print $FH " 122 | static const char *packed_mphf = ",B::cstring($ph->[1]),"; 123 | return cmph_search_packed((void*)packed_mphf, (const char*)s, $l) % $size; 124 | } 125 | "; 126 | } 127 | 128 | =item c_lib, c_include 129 | 130 | TODO: to the installed Alien libpath 131 | 132 | =cut 133 | 134 | # quirks on temp. uninstalled -lcmph 135 | sub c_include { " -Icmph-2.0/include" } 136 | 137 | sub c_lib { 138 | # quirks on temp. uninstalled -lcmph 139 | my $l = " -Lcmph-2.0/lib -lcmph"; 140 | # rpath not with darwin, solaris, msvc. we should rather install cmph locally or via Alien 141 | $l .= " -Wl,-rpath=cmph-2.0/lib" if $^O =~ /linux|bsd|cygwin$/ and $Config{cc} =~ /cc|clang/; 142 | if ($^O eq 'darwin' and $Config{ccflags} =~ /-DDEBUGGING/) { 143 | $l = " cmph-2.0/lib/libcmph.a"; # static to enable debugging 144 | } 145 | return $l; 146 | } 147 | 148 | =back 149 | 150 | =head1 LICENSE 151 | 152 | The code of the cmph library and this perl library is dual licensed under 153 | the B and B licenses. Please refer to the LGPL-2 154 | and MPL-1.1 files in the F subdirectory for the full description of 155 | each of the licenses. 156 | 157 | For cxxmph, the files F and F are covered by the 158 | BSD and MIT licenses, respectively. 159 | 160 | =cut 161 | 162 | 1; 163 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/BDZ.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::BDZ; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 BDZ algorithm. 12 | The MPFH minimal variant is L. 13 | See http://cmph.sourceforge.net/bdz.html 14 | 15 | It is a simple, efficient, near-optimal space and practical algorithm 16 | to generate a family of PHFs and MPHFs. It is also referred to as BPZ 17 | algorithm because the work presented by Botelho, Pagh and Ziviani in 18 | [2]. In the Botelho's PhD. dissertation [1] it is also referred to as 19 | RAM algorithm because it is more suitable for key sets that can be 20 | handled in internal memory. 21 | 22 | The BDZ algorithm uses r-uniform random hypergraphs given by function 23 | values of r uniform random hash functions on the input key set S for 24 | generating PHFs and MPHFs that require O(n) bits to be stored. A 25 | hypergraph is the generalization of a standard undirected graph where 26 | each edge connects vertices. This idea is not new, see e.g. [8], but 27 | we have proceeded differently to achieve a space usage of O(n) bits 28 | rather than O(n log n) bits. Evaluation time for all schemes 29 | considered is constant. For r=3 we obtain a space usage of 30 | approximately 2.6n bits for an MPHF. More compact, and even simpler, 31 | representations can be achieved for larger m. For example, for m=1.23n 32 | we can get a space usage of 1.95n bits. 33 | 34 | Our best MPHF space upper bound is within a factor of 2 from the 35 | information theoretical lower bound of approximately 1.44 bits. We 36 | have shown that the BDZ algorithm is far more practical than previous 37 | methods with proven space complexity, both because of its simplicity, 38 | and because the constant factor of the space complexity is more than 6 39 | times lower than its closest competitor, for plausible problem 40 | sizes. We verify the practicality experimentally, using slightly more 41 | space than in the mentioned theoretical bounds. 42 | 43 | =head1 METHODS 44 | 45 | See L 46 | 47 | =head1 SEE ALSO 48 | 49 | [1] F. C. Botelho. Near-Optimal Space Perfect Hashing 50 | Algorithms. PhD. Thesis, Department of Computer Science, Federal 51 | University of Minas Gerais, September 2008. Supervised by N. Ziviani. 52 | Lhttp://cmph.sourceforge.net/papers/thesis.pdf<> 53 | 54 | [2] F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient 55 | minimal perfect hash functions. In Proceedings of the 10th 56 | International Workshop on Algorithms and Data Structures (WADs'07), 57 | Springer-Verlag Lecture Notes in Computer Science, vol. 4619, Halifax, 58 | Canada, August 2007, 139-150. 59 | L 60 | 61 | =cut 62 | 63 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/BDZ.pm examples/words20 64 | unless (caller) { 65 | require Perfect::Hash; 66 | &Perfect::Hash::_test(@ARGV) 67 | } 68 | 69 | 1; 70 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/BDZ_PH.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::BDZ_PH; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 BDZ_PH algorithm, the MPFH minimal 12 | variant to L. See 13 | L 14 | 15 | It is a simple, efficient, near-optimal space and practical algorithm 16 | to generate a family of PHFs and MPHFs. It is also referred to as BPZ 17 | algorithm because the work presented by Botelho, Pagh and Ziviani in 18 | [2]. In the Botelho's PhD. dissertation [1] it is also referred to as 19 | RAM algorithm because it is more suitable for key sets that can be 20 | handled in internal memory. 21 | 22 | The BDZ algorithm uses r-uniform random hypergraphs given by function 23 | values of r uniform random hash functions on the input key set S for 24 | generating PHFs and MPHFs that require O(n) bits to be stored. A 25 | hypergraph is the generalization of a standard undirected graph where 26 | each edge connects vertices. This idea is not new, see e.g. [8], but 27 | we have proceeded differently to achieve a space usage of O(n) bits 28 | rather than O(n log n) bits. Evaluation time for all schemes 29 | considered is constant. For r=3 we obtain a space usage of 30 | approximately 2.6n bits for an MPHF. More compact, and even simpler, 31 | representations can be achieved for larger m. For example, for m=1.23n 32 | we can get a space usage of 1.95n bits. 33 | 34 | Our best MPHF space upper bound is within a factor of 2 from the 35 | information theoretical lower bound of approximately 1.44 bits. We 36 | have shown that the BDZ algorithm is far more practical than previous 37 | methods with proven space complexity, both because of its simplicity, 38 | and because the constant factor of the space complexity is more than 6 39 | times lower than its closest competitor, for plausible problem 40 | sizes. We verify the practicality experimentally, using slightly more 41 | space than in the mentioned theoretical bounds. 42 | 43 | =head1 METHODS 44 | 45 | See L 46 | 47 | =head1 SEE ALSO 48 | 49 | [1] F. C. Botelho. Near-Optimal Space Perfect Hashing 50 | Algorithms. PhD. Thesis, Department of Computer Science, Federal 51 | University of Minas Gerais, September 2008. Supervised by N. Ziviani. 52 | Lhttp://cmph.sourceforge.net/papers/thesis.pdf<> 53 | 54 | [2] F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient 55 | minimal perfect hash functions. In Proceedings of the 10th 56 | International Workshop on Algorithms and Data Structures (WADs'07), 57 | Springer-Verlag Lecture Notes in Computer Science, vol. 4619, Halifax, 58 | Canada, August 2007, 139-150. 59 | L 60 | 61 | =cut 62 | 63 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20 64 | unless (caller) { 65 | require Perfect::Hash; 66 | &Perfect::Hash::_test(@ARGV) 67 | } 68 | 69 | 1; 70 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/BMZ.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::BMZ; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 BMZ algorithm. 12 | See http://cmph.sourceforge.net/chd.html 13 | 14 | =head1 METHODS 15 | 16 | See L 17 | 18 | =cut 19 | 20 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BMZ.pm examples/words20 21 | unless (caller) { 22 | require Perfect::Hash; 23 | &Perfect::Hash::_test(@ARGV) 24 | } 25 | 26 | 1; 27 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/BMZ8.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::BMZ8; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 BMZ8 algorithm. 12 | See http://cmph.sourceforge.net/bmz.html 13 | 14 | =head1 METHODS 15 | 16 | See L 17 | 18 | =cut 19 | 20 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BMZ8.pm examples/words20 21 | unless (caller) { 22 | require Perfect::Hash; 23 | &Perfect::Hash::_test(@ARGV) 24 | } 25 | 26 | 1; 27 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/BRZ.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::BRZ; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 BRZ algorithm. 12 | See L 13 | 14 | BRZ is an external memory based algorithm esp. suited to huge 15 | dictionaries, which can easily scale to billions of entries. 16 | 17 | The algorithm is linear on the size of keys to construct a MPHF, which 18 | is optimal. For instance, for a collection of 1 billion URLs collected 19 | from the web, each one 64 characters long on average, the time to 20 | construct a MPHF using a 2.4 gigahertz PC with 500 megabytes of 21 | available main memory is approximately 3 hours. Second, the algorithm 22 | needs a small a priori defined vector of one byte entries in main 23 | memory to construct a MPHF. For the collection of 1 billion URLs and 24 | using , the algorithm needs only 5.45 megabytes of internal 25 | memory. Third, the evaluation of the MPHF for each retrieval requires 26 | three memory accesses and the computation of three universal hash 27 | functions. This is not optimal as any MPHF requires at least one 28 | memory access and the computation of two universal hash 29 | functions. Fourth, the description of a MPHF takes a constant number 30 | of bits for each key, which is optimal. For the collection of 1 31 | billion URLs, it needs 8.1 bits for each key, while the theoretical 32 | lower bound is bits per key. 33 | 34 | =head1 METHODS 35 | 36 | See L 37 | 38 | =cut 39 | 40 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/BRZ.pm examples/words20 41 | unless (caller) { 42 | require Perfect::Hash; 43 | &Perfect::Hash::_test(@ARGV) 44 | } 45 | 46 | 1; 47 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/CHD.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::CHD; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 CHD algorithm. 12 | See http://cmph.sourceforge.net/chd.html 13 | 14 | The CHD algorithm permits to obtain PHFs with representation size very 15 | close to optimal while retaining O(n) construction time and O(1) 16 | evaluation time. For example, in the case m=2n we obtain a PHF that 17 | uses space 0.67 bits per key, and for m=1.23n we obtain space 1.4 bits 18 | per key, which was not achievable with previously known methods. The 19 | CHD algorithm is inspired by several known algorithms; the main new 20 | feature is that it combines a modification of Pagh's 21 | ``hash-and-displace'' approach with data compression on a sequence of 22 | hash function indices. That combination makes it possible to 23 | significantly reduce space usage while retaining linear construction 24 | time and constant query time. The CHD algorithm can also be used for 25 | k-perfect hashing, where at most k keys may be mapped to the same 26 | value. For the analysis we assume that fully random hash functions are 27 | given for free; such assumptions can be justified and were made in 28 | previous papers. 29 | 30 | The compact PHFs generated by the CHD algorithm can be used in many 31 | applications in which we want to assign a unique identifier to each 32 | key without storing any information on the key. One of the most 33 | obvious applications of those functions (or k-perfect hash functions) 34 | is when we have a small fast memory in which we can store the perfect 35 | hash function while the keys and associated satellite data are stored 36 | in slower but larger memory. The size of a block or a transfer unit 37 | may be chosen so that k data items can be retrieved in one read 38 | access. In this case we can ensure that data associated with a key can 39 | be retrieved in a single probe to slower memory. This has been used 40 | for example in hardware routers [4]. 41 | 42 | The CHD algorithm generates the most compact PHFs and MPHFs we know of 43 | in O(n) time. The time required to evaluate the generated functions is 44 | constant (in practice less than 1.4 microseconds). The storage space 45 | of the resulting PHFs and MPHFs are distant from the information 46 | theoretic lower bound by a factor of 1.43. The closest competitor is 47 | the algorithm by Martin and Pagh [3] but their algorithm do not work 48 | in linear time. Furthermore, the CHD algorithm can be tuned to run 49 | faster than the BPZ algorithm [1] (the fastest algorithm available in 50 | the literature so far) and to obtain more compact functions. The most 51 | impressive characteristic is that it has the ability, in principle, to 52 | approximate the information theoretic lower bound while being 53 | practical. A detailed description of the CHD algorithm can be found in 54 | [2]. 55 | 56 | =head1 METHODS 57 | 58 | See L 59 | 60 | =head1 SEE ALSO 61 | 62 | =over 63 | 64 | =item [1] 65 | 66 | F. C. Botelho, R. Pagh, N. Ziviani. Simple and space-efficient minimal 67 | perfect hash functions. In Proceedings of the 10th International 68 | Workshop on Algorithms and Data Structures (WADs'07), Springer-Verlag 69 | Lecture Notes in Computer Science, vol. 4619, Halifax, Canada, August 70 | 2007, 139-150. L 71 | 72 | =item [2] 73 | 74 | F. C. Botelho, D. Belazzougui and M. Dietzfelbinger. Compress, hash 75 | and displace. In Proceedings of the 17th European Symposium on 76 | Algorithms (ESA'09). Springer LNCS, 2009. 77 | L 78 | 79 | =item [3] 80 | 81 | M. Dietzfelbinger and R. Pagh. Succinct data structures for retrieval 82 | and approximate membership. In Proceedings of the 35th international 83 | colloquium on Automata, Languages and Programming (ICALP'08), pages 84 | 385-396, Berlin, Heidelberg, 2008. Springer-Verlag. 85 | 86 | =item [4] 87 | 88 | B. Prabhakar and F. Bonomi. Perfect hashing for network 89 | applications. In Proceedings of the IEEE International Symposium on 90 | Information Theory. IEEE Press, 2006. 91 | 92 | =back 93 | 94 | =cut 95 | 96 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20 97 | unless (caller) { 98 | require Perfect::Hash; 99 | &Perfect::Hash::_test(@ARGV) 100 | } 101 | 102 | 1; 103 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/CHD_PH.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::CHD_PH; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 CHD_PH algorithm. 12 | See http://cmph.sourceforge.net/chd.html 13 | 14 | =head1 METHODS 15 | 16 | See L 17 | 18 | =cut 19 | 20 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20 21 | unless (caller) { 22 | require Perfect::Hash; 23 | &Perfect::Hash::_test(@ARGV) 24 | } 25 | 26 | 1; 27 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/CHM.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::CHM; 2 | 3 | our $VERSION = '0.01'; 4 | use strict; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 CHM algorithm. 12 | See http://cmph.sourceforge.net/chm.html 13 | 14 | =head1 METHODS 15 | 16 | =head2 new $filename, @options 17 | 18 | Computes a minimal perfect hash table using the given dictionary, 19 | given as hashref or arrayref or filename. 20 | 21 | Honored options are: I 22 | 23 | Planned: I<-minimal> 24 | 25 | =head2 perfecthash $ph, $key 26 | 27 | Look up a $key in the minimal perfect hash table and return the 28 | associated index into the initially given $dict. 29 | 30 | Checks if the index is correct, otherwise it will return undef. 31 | 32 | =head2 false_positives 33 | 34 | Returns undef, as cmph hashes always store the keys. 35 | 36 | =head2 save_c NYI 37 | 38 | =cut 39 | 40 | # local testing: p -d -Ilib lib/Perfect/Hash/CMPH/CHD.pm examples/words20 41 | unless (caller) { 42 | require Perfect::Hash; 43 | &Perfect::Hash::_test(@ARGV) 44 | } 45 | 46 | 1; 47 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/CMPH/FCH.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::CMPH::FCH; 2 | 3 | use strict; 4 | our $VERSION = '0.01'; 5 | use Perfect::Hash::CMPH; 6 | #use warnings; 7 | our @ISA = qw(Perfect::Hash::CMPH Perfect::Hash Perfect::Hash::C); 8 | 9 | =head1 DESCRIPTION 10 | 11 | XS interface to the cmph-2.0 FCH algorithm. 12 | See http://cmph.sourceforge.net/fch.html 13 | 14 | The total memory consumption of FCH algorithm for generating a minimal 15 | perfect hash function (MPHF) is: O(n) + 9n + 8cn/(log(n) + 1) 16 | bytes. The value of parameter c must be greater than or equal to 2.6. 17 | 18 | Memory consumption to store the resulting function: We only need to 19 | store the g function and a constant number of bytes for the seed of 20 | the hash functions used in the resulting MPHF. Thus, we need 21 | cn/(log(n) + 1) + O(1) bytes. 22 | 23 | E.A. Fox, Q.F. Chen, and L.S. Heath. A faster algorithm for 24 | constructing minimal perfect hash functions. In Proc. 15th Annual 25 | International ACM SIGIR Conference on Research and Development in 26 | Information Retrieval, pages 266-273, 1992. 27 | L 28 | 29 | =head1 METHODS 30 | 31 | See L 32 | 33 | =cut 34 | 35 | # local testing: p -d -Mblib lib/Perfect/Hash/CMPH/FCH.pm examples/words20 36 | unless (caller) { 37 | require Perfect::Hash; 38 | &Perfect::Hash::_test(@ARGV) 39 | } 40 | 41 | 1; 42 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/Cuckoo.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::Cuckoo; 2 | 3 | our $VERSION = '0.01'; 4 | use strict; 5 | #use warnings; 6 | use Perfect::Hash; 7 | use Perfect::Hash::C; 8 | our @ISA = qw(Perfect::Hash Perfect::Hash::C); 9 | 10 | =head1 DESCRIPTION 11 | 12 | Generate non-perfect but fast Cuckoo hashes, with two universal hash 13 | functions f and g into two tables of load factor 50%, guaranteeing constant 14 | lookup and insertion time. 15 | 16 | Only for benchmarks yet. Still just a dummy placeholder. 17 | 18 | A study by Zukowski et al. has shown that cuckoo hashing is much 19 | faster than chained hashing for small, cache-resident hash tables on 20 | modern processors: 21 | 22 | Zukowski, Marcin; Heman, Sandor; Boncz, Peter (June 2006). 23 | "Architecture-Conscious Hashing". Proceedings of the 24 | International Workshop on Data Management on New Hardware (DaMoN). 25 | L 26 | 27 | =head1 METHODS 28 | 29 | =over 30 | 31 | =item new $filename|hashref|arrayref @options 32 | 33 | Can only handle arrayref or single column keyfiles yet. No values. 34 | 35 | Still a dummy placeholder. 36 | 37 | Honored options are: 38 | 39 | C<-pic>, C<-nul> 40 | 41 | =cut 42 | 43 | sub new { 44 | my $class = shift or die; 45 | my $dict = shift; #hashref, arrayref or filename 46 | my $options = Perfect::Hash::_handle_opts(@_); 47 | my ($keys, $values) = _dict_init($dict); 48 | # XXX optimize the 2 uhash functions here 49 | my $uhash = []; 50 | return bless [$uhash, $options, $keys, $values], $class; 51 | } 52 | 53 | =item save_c prefix, options 54 | 55 | Generates F<$prefix_hash.c> and F<.h> files with no external dependencies. 56 | 57 | =cut 58 | 59 | sub save_c { 60 | my $ph = shift; 61 | my ($options, $keys) = ($ph->[1], $ph->[2]); 62 | my ($fileprefix, $base) = $ph->save_h_header(@_); 63 | my $FH = $ph->save_c_header($fileprefix, $base); 64 | # print $FH "#include \n" if @$C or !$ph->option('-nul'); 65 | print $FH $ph->c_hash_impl($base); 66 | print $FH $ph->c_funcdecl($base)." {\n"; 67 | print $FH " int l = strlen(s);" unless $ph->option('-nul'); 68 | my $size = scalar @$keys; 69 | my $type = u_csize($size); 70 | if (!$ph->false_positives) { # store keys 71 | if ($ph->option('-pic')) { 72 | c_stringpool($FH, $keys); 73 | } else { 74 | print $FH " 75 | /* keys */ 76 | static const char* keys[] = {\n"; 77 | _save_c_array(4, $FH, $keys, "\"%s\""); 78 | print $FH " };"; 79 | } 80 | } 81 | # ... 82 | print $FH " 83 | return -1;\n"; 84 | print $FH "}\n"; 85 | close $FH; 86 | } 87 | 88 | =item c_hash_impl $ph, $base 89 | 90 | String for C code for the 2 hash functions. Honors C<-nul>. 91 | 92 | =cut 93 | 94 | # XXX use the two randomly generated uhash params to generate 2 hash funcs 95 | sub c_hash_impl {""} 96 | 97 | =item perfecthash key 98 | 99 | dummy, for testing only. Use the generated C function instead. 100 | 101 | =cut 102 | 103 | sub perfecthash { 104 | my $ph = shift; 105 | my ($keys, $values) = ($ph->[2], $ph->[3]); 106 | my $key = shift; 107 | my $dict = $ph->[4]; 108 | if (!$dict) { 109 | for my $i (0 .. scalar(@$keys)-1) { 110 | $dict->{$keys->[$i]} = $values->[$i]; 111 | } 112 | } 113 | return exists $dict->{$key} ? $dict->{$key} : undef; 114 | } 115 | 116 | =item false_positives 117 | 118 | Returns 1 if the hash might return false positives, i.e. will return 119 | the index of an existing key when you searched for a non-existing key. 120 | 121 | The default is undef, unless you created the hash with the option 122 | C<-false-positives>. 123 | 124 | =cut 125 | 126 | sub false_positives { 127 | return exists $_[0]->[1]->{'-false-positives'}; 128 | } 129 | 130 | =item option $ph 131 | 132 | Access the option hash in $ph. 133 | 134 | =cut 135 | 136 | sub option { 137 | return $_[0]->[1]->{$_[1]}; 138 | } 139 | 140 | #sub c_include { } 141 | #sub c_lib { } 142 | 143 | =back 144 | 145 | =cut 146 | 147 | 1; 148 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/MoreHashes.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::MoreHashes; 2 | use strict; 3 | use Perfect::Hash::C; 4 | our @ISA = qw(Perfect::Hash Perfect::Hash::C); 5 | our $VERSION = '0.01'; 6 | 7 | =head1 NAME 8 | 9 | Perfect::Hash::MoreHashes - perl and c implemenations of alternative hash funcs 10 | 11 | =head1 METHODS 12 | 13 | =over 14 | 15 | =item hash_murmur3 string, [seed] 16 | 17 | pure-perl murmur3 int32 finalizer 18 | 19 | =cut 20 | 21 | sub hash_murmur3 { 22 | use bytes; 23 | my $ph = shift; 24 | my $str = shift; 25 | my $h = shift || 0; 26 | for my $c (split "", $str) { 27 | $h = $h ^ ord($c); # XXX better slice strings into 4 bytes 28 | $h ^= $h >> 16; 29 | $h *= 0x85ebca6b; 30 | $h ^= $h >> 13; 31 | $h *= 0xc2b2ae35; 32 | $h ^= $h >> 16; 33 | } 34 | return $h 35 | } 36 | 37 | =item c_hash_impl_fnv1_mantis string, [seed] 38 | 39 | C version of a faster FNV1 variant, incompat to our pure-perl fnv1 40 | 41 | =cut 42 | 43 | sub c_hash_impl_fnv1_mantis { 44 | my ($ph, $base) = @_; 45 | return " 46 | #ifdef _MSC_VER 47 | #define INLINE __inline 48 | #else 49 | #define INLINE inline 50 | #endif 51 | 52 | #ifdef _MSC_VER 53 | # define rotl(a,b) _rotl(a,b) 54 | #else 55 | static inline rotl(unsigned int x, unsigned char r) { 56 | asm(\"roll %1,%0\" : \"+r\" (x) : \"c\" (r)); 57 | return x; 58 | } 59 | #endif 60 | 61 | /* optimized Mantis FNV from http://www.sanmayce.com/Fastest_Hash/ 62 | but without 64 bit and xmm 128 bit extensions. 63 | */ 64 | static INLINE 65 | unsigned $base\_hash_mantis(unsigned d, const unsigned char *str, const int len) { 66 | const unsigned int PRIME = 709607; /* ad3e7 */ 67 | unsigned int hash32 = d ? d : 2166136261; /* 811c9dc5 */ 68 | const char *p = str; 69 | 70 | /* Cases: 0,1,2,3,4,5,6,7,...,15 */ 71 | if (len & 2*sizeof(int)) { 72 | hash32 = (hash32 ^ *(unsigned int*)p) * PRIME; 73 | p = sizeof(int); 74 | hash32 = (hash32 ^ *(unsigned int*)p) * PRIME; 75 | p += sizeof(int); 76 | } 77 | /* Cases: 0,1,2,3,4,5,6,7 */ 78 | if (len & sizeof(int)) { 79 | hash32 = (hash32 ^ *(unsigned short*)p) * PRIME; 80 | p = sizeof(int); 81 | } 82 | if (len & sizeof(short)) { 83 | hash32 = (hash32 ^ *(unsigned short*)p) * PRIME; 84 | p += sizeof(short); 85 | } 86 | if (len & 1) 87 | hash32 = (hash32 ^ *p) * PRIME; 88 | p += 1; 89 | } 90 | len -= p-str; 91 | 92 | for(; len > 2*sizeof(int); len -= 2*sizeof(int), p += 2*sizeof(int)) { 93 | hash32 = (hash32 ^ (rotl(*(int *)p,5) ^ *(int *)(p+4))) * PRIME; 94 | } 95 | hash32 = (hash32 ^ *(short*)(p+0*sizeof(short))) * PRIME; 96 | hash32 = (hash32 ^ *(short*)(p+1*sizeof(short))) * PRIME; 97 | hash32 = (hash32 ^ *(short*)(p+2*sizeof(short))) * PRIME; 98 | hash32 = (hash32 ^ *(short*)(p+3*sizeof(short))) * PRIME; 99 | return hash32 ^ (hash32 >> 16); 100 | } 101 | 102 | "; 103 | } 104 | 105 | =back 106 | 107 | =cut 108 | 109 | 1; 110 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/Pearson16.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::Pearson16; 2 | our $VERSION = '0.01'; 3 | #use coretypes; 4 | use strict; 5 | #use warnings; 6 | use Perfect::Hash; 7 | use Perfect::Hash::PearsonNP; 8 | use Perfect::Hash::XS; 9 | our @ISA = qw(Perfect::Hash::Pearson); 10 | use integer; 11 | use bytes; 12 | 13 | =head1 DESCRIPTION 14 | 15 | Generate non-perfect pearson hash with an optimized 16bit hash 16 | function, a big 16bit table (size: 65536 shorts), and static binary 17 | tree collision resolution. 18 | 19 | =head1 METHODS 20 | 21 | =head2 new $dict, @options 22 | 23 | Computes a non-prefect, but fast pearson hash table using the given 24 | dictionary, given as hashref or arrayref, with fast lookup. 25 | 26 | Honored options are: 27 | 28 | I<-false-positives> 29 | 30 | I<-max-time seconds> stops generating a pperf at seconds and uses a 31 | non-perfect, but still fast hash then. Default: 60s. 32 | 33 | It returns an object with @H containing the randomized 34 | pearson lookup table of size 65536. 35 | 36 | =cut 37 | 38 | sub new { 39 | return Perfect::Hash::PearsonNP::new(@_); 40 | } 41 | 42 | =head2 hash obj, $key 43 | 44 | =cut 45 | 46 | #sub hash_pp { 47 | # my ($ph, $key ) = @_; 48 | # my $size = $ph->[0]; 49 | # my $H = $ph->[1]; 50 | # my $d = 0; 51 | # # process in 16bit chunks 52 | # for my $c (unpack("S*", $key) { 53 | # $d = $H->[$d ^ $c]; 54 | # } 55 | # return $d % $size; 56 | #} 57 | 58 | =head2 perfecthash $obj, $key 59 | 60 | Look up a $key in the pearson hash table 61 | and return the associated index into the initially 62 | given $dict. 63 | 64 | Note that the hash is probably not perfect. 65 | 66 | Without C<-false-positives> it checks if the index is correct, 67 | otherwise it will return undef. 68 | With C<-false-positives>, the key must have existed in 69 | the given dictionary. If not, a wrong index will be returned. 70 | 71 | =head2 false_positives 72 | 73 | Returns 1 if the hash might return false positives, 74 | i.e. will return the index of an existing key when 75 | you searched for a non-existing key. 76 | 77 | The default is undef, unless you created the hash with the option 78 | C<-false-positives>. 79 | 80 | =cut 81 | 82 | # local testing: pb -d lib/Perfect/Hash/Pearson16.pm examples/words500 83 | # or just: pb -d -MPerfect::Hash -e'new Perfect::Hash([split/\n/,`cat "examples/words20"`], "-pearsonpp")' 84 | unless (caller) { 85 | &Perfect::Hash::_test(shift @ARGV, "-pearson16", @ARGV) 86 | } 87 | 88 | 1; 89 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/Pearson32.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::Pearson32; 2 | our $VERSION = '0.01'; 3 | #use coretypes; 4 | use strict; 5 | #use warnings; 6 | use Perfect::Hash; 7 | use Perfect::Hash::PearsonNP; 8 | use Perfect::Hash::XS; 9 | our @ISA = qw(Perfect::Hash::Pearson); 10 | use integer; 11 | use bytes; 12 | 13 | =head1 DESCRIPTION 14 | 15 | Generate non-perfect pearson hash with an optimized 32bit hash function, 16 | a pearson table of size 256 and static binary tree collision resolution. 17 | 18 | =head1 METHODS 19 | 20 | =head2 new $dict, @options 21 | 22 | Computes a non-prefect, but fast pearson hash table using the given 23 | dictionary, given as hashref or arrayref, with fast lookup. 24 | 25 | Honored options are: 26 | 27 | I<-false-positives> 28 | 29 | I<-max-time seconds> stops generating a pperf at seconds and uses a 30 | non-perfect, but still fast hash then. Default: 60s. 31 | 32 | It returns an object with @H containing the randomized 33 | pearson lookup table of size 255. 34 | 35 | =cut 36 | 37 | sub new { 38 | goto &Perfect::Hash::PearsonNP::new; 39 | } 40 | 41 | =head2 hash obj, $key 42 | 43 | =cut 44 | 45 | #sub hash_pp { 46 | # my ($ph, $key ) = @_; 47 | # my $size = $ph->[0]; 48 | # my $H = $ph->[1]; 49 | # my $d = 0; 50 | # # process in 32bit chunks 51 | # for my $c (split "", $key) { 52 | # $d = $H->[$d ^ ord($c)]; 53 | # } 54 | # return $d % $size; 55 | #} 56 | 57 | =head2 perfecthash $obj, $key 58 | 59 | Look up a $key in the pearson hash table 60 | and return the associated index into the initially 61 | given $dict. 62 | 63 | Note that the hash is probably not perfect. 64 | 65 | Without C<-false-positives> it checks if the index is correct, 66 | otherwise it will return undef. 67 | With C<-false-positives>, the key must have existed in 68 | the given dictionary. If not, a wrong index will be returned. 69 | 70 | =head2 false_positives 71 | 72 | Returns 1 if the hash might return false positives, 73 | i.e. will return the index of an existing key when 74 | you searched for a non-existing key. 75 | 76 | The default is undef, unless you created the hash with the option 77 | C<-false-positives>. 78 | 79 | =cut 80 | 81 | # local testing: pb -d lib/Perfect/Hash/PearsonPP.pm examples/words20 82 | # or just: pb -d -MPerfect::Hash -e'new Perfect::Hash([split/\n/,`cat "examples/words20"`], "-pearsonpp")' 83 | unless (caller) { 84 | &Perfect::Hash::_test(shift @ARGV, "-pearson32", @ARGV) 85 | } 86 | 87 | 1; 88 | -------------------------------------------------------------------------------- /lib/Perfect/Hash/XS.pm: -------------------------------------------------------------------------------- 1 | package Perfect::Hash::XS; 2 | use strict; 3 | our $VERSION = '0.01'; 4 | use Perfect::Hash::C; 5 | our @ISA = qw(Perfect::Hash::C Perfect::Hash); 6 | 7 | use XSLoader; 8 | XSLoader::load('Perfect::Hash', $VERSION); 9 | 10 | =head1 NAME 11 | 12 | Perfect::Hash::XS - Perfect Hash output formatter for XS - compiled perl extensions 13 | 14 | =head1 SYNOPSIS 15 | 16 | pperf keyfile --for-xs --prefix=ph 17 | 18 | use Perfect::Hash; 19 | $hash->{chr($_)} = int rand(2) for 48..90; 20 | my $ph = new Perfect:Hash $hash; 21 | $ph->save_xs("ph.inc"); 22 | 23 | my @dict = split/\n/,`cat /usr/share.dict/words`; 24 | my $ph2 = Perfect::Hash->new(\@dict, -minimal, -for-xs); 25 | $ph2->save_xs("ph1.inc"); 26 | 27 | =head1 DESCRIPTION 28 | 29 | Optimized for sharedlib and PIC, and it can hold more and mixed value 30 | types, not just strings and integers. With the help of Data::Compile 31 | (planned) even any perl values, like code refs, magic, ... 32 | 33 | This is a replacement for cdb databases or write-once or only daily 34 | Storable containers. 35 | 36 | =head1 METHODS 37 | 38 | =over 39 | 40 | =item save_xs filename, options 41 | 42 | Generate XS code, with the perl values saved as perl types. 43 | 44 | =back 45 | 46 | =cut 47 | 48 | sub save_h_header { } 49 | 50 | sub save_c_header { 51 | my ($ph, $filename) = @_; 52 | my $FH; 53 | open $FH, ">", $filename or die "$filename: @!"; 54 | print $FH "#include \n"; # for memcmp/strlen 55 | return $FH; 56 | } 57 | 58 | sub c_funcdecl { 59 | my ($ph, $base) = @_; 60 | if ($ph->option('-nul')) { 61 | " 62 | long $base\_lookup(const char* s, int l)"; 63 | } else { 64 | " 65 | long $base\_lookup(const char* s)"; 66 | } 67 | } 68 | 69 | sub save_xs { 70 | my $ph = shift; 71 | my $file = shift; 72 | my @options = @_; 73 | die 'save_xs nyi'; 74 | } 75 | -------------------------------------------------------------------------------- /script/pperf.PL: -------------------------------------------------------------------------------- 1 | #! perl 2 | use Config; 3 | use File::Basename qw(&basename &dirname); 4 | use File::Spec; 5 | use Cwd; 6 | 7 | # List explicitly here the variables you want Configure to 8 | # generate. Metaconfig only looks for shell variables, so you 9 | # have to mention them as if they were shell variables, not 10 | # %Config entries. Thus you write 11 | # $startperl 12 | # to ensure Configure will look for $Config{startperl}. 13 | # Wanted: $archlibexp 14 | 15 | # This forces PL files to create target in same directory as PL file. 16 | # This is so that make depend always knows where to find PL derivatives. 17 | $origdir = cwd; 18 | chdir dirname($0); 19 | $file = basename($0, '.PL'); 20 | $file .= '.com' if $^O eq 'VMS'; 21 | 22 | open OUT,">$file" or die "Can't create $file: $!"; 23 | 24 | print "Extracting $file (with variable substitutions)\n"; 25 | 26 | # In this section, perl variables will be expanded during extraction. 27 | # You can use $Config{...} to use Configure variables. 28 | 29 | print OUT <<"!GROK!THIS!"; 30 | $Config{startperl} 31 | eval 'exec $Config{perlpath} -S \$0 \${1+"\$@"}' 32 | if \$running_under_some_shell; 33 | --\$running_under_some_shell; 34 | !GROK!THIS! 35 | 36 | # In the following, perl variables are not expanded during extraction. 37 | 38 | print OUT <<'!NO!SUBS!'; 39 | use strict; 40 | use Perfect::Hash; 41 | use Getopt::Long; 42 | use Pod::Usage; 43 | my $options = {}; 44 | 45 | sub opt(*) { 46 | my $opt = shift; 47 | return exists($options->{$opt}) && ($options->{$opt} || 0); 48 | } 49 | 50 | sub version { 51 | no warnings 'once'; 52 | return "pperf $Perfect::Hash::VERSION\n"; 53 | } 54 | 55 | sub _usage_and_die { 56 | print <<'EOF'; 57 | Usage: 58 | pperf [method] [options...] keyfile 59 | 60 | Methods: 61 | --hanovpp 62 | --hanov 63 | --urban 64 | --pearson --pearsonnp --pearson8 65 | --cmph-bdz_ph --cmph-bdz --cmph-bmz --cmph-chm --cmph-fch --cmph-chd_ph --cmph-chd 66 | ... 67 | 68 | Options: 69 | --false-positives 70 | --nul 71 | --for-c (default) 72 | --prefix=name (default: pperf) 73 | 74 | Planned: 75 | --minimal 76 | --optimal-size 77 | --optimal-speed 78 | --for-xs 79 | --hash=name 80 | --pic 81 | --inline 82 | --null-strings 83 | --7bit 84 | --ignore-case 85 | --unicode-ignore-case 86 | 87 | EOF 88 | exit; 89 | } 90 | 91 | sub helpme { 92 | print version(),"\n"; 93 | if (opt('v')) { 94 | pod2usage( -verbose => opt('v') ); 95 | } else { 96 | pod2usage( -verbose => 0 ); 97 | } 98 | _usage_and_die; 99 | } 100 | 101 | GetOptions 102 | ($options, 103 | 'help|h', 104 | 'hanovpp', 105 | 'hanov', 106 | 'urban', 107 | 'pearson', 108 | 'pearson8', 109 | 'pearsonnp', 110 | 'cmph-bdz_ph', 'cmph-bdz', 'cmph-bmz', 'cmph-chm', 'cmph-fch', 'cmph-chd_ph', 'cmph-chd', 111 | #'gperf', 'bob', 'switch', 'cuckoo', 112 | # and many more ... get them from %algo_methods? 113 | 'false-positives!', 114 | 'nul', 115 | '7bit', 116 | 'prefix=s', 117 | 'for-c!', 118 | 'for-xs', 119 | # get them dynamically? 120 | 'for-java', 121 | 'for-ruby', 122 | 'for-python', 123 | 'for-php', 124 | 'for-pecl', 125 | 'ignore-case', 126 | 'unicode-ignore-case', 127 | 'optimal-speed', 128 | 'optimal-size', 129 | 'minimal', 130 | ); 131 | helpme() if opt('help'); # And exit 132 | my $keyfile = shift or _usage_and_die(); 133 | my @options = map { ("-".$_, $options->{$_} == 1 ? () : $options->{$_}) } keys %$options; 134 | 135 | my $ph = new Perfect::Hash $keyfile, @options; 136 | my @fmt = grep /^-for/, @options; 137 | if (@fmt) { 138 | for (@fmt) { 139 | s/^-for-//; 140 | my $class = "Perfect::Hash::" . uc($_); 141 | eval "require $class;"; 142 | my $save = "save_".$_; 143 | $ph->$save($options->{prefix}); 144 | } 145 | } else { 146 | $ph->save_c($options->{prefix}); 147 | } 148 | 149 | # Local Variables: 150 | # mode: cperl 151 | # cperl-indent-level: 2 152 | # fill-column: 80 153 | # End: 154 | # vim: expandtab shiftwidth=2: 155 | !NO!SUBS! 156 | 157 | close OUT or die "Can't close $file: $!"; 158 | chmod 0755, $file or die "Can't reset permissions for $file: $!\n"; 159 | exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':'; 160 | chdir $origdir; 161 | -------------------------------------------------------------------------------- /t/00basic.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use lib 't'; 6 | require "test.pl"; 7 | 8 | my ($default, $methods, $opts) = opt_parse_args(); 9 | $methods = [ grep(!/^-cmph/, @$methods) ]; 10 | 11 | plan tests => scalar(@$methods); 12 | 13 | my %dict = map {chr $_ => $_-48} (48..64); 14 | delete $dict{'\\'}; 15 | for my $m (@$methods) { 16 | my $ph = new Perfect::Hash \%dict, $m, @$opts; 17 | unless ($ph) { 18 | ok(1, "SKIP empty ph $m"); 19 | next; 20 | } 21 | TODO: { 22 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; 23 | my $ok = 1; 24 | for my $w (sort keys %dict) { 25 | my $o = ord $w; 26 | my $v = $ph->perfecthash($w); 27 | $ok = 0 if !defined($v) or $v != $o - 48; 28 | unless ($ok) { 29 | is(defined($v)?$v:"", $o - 48, "method '$m' for '$w' => ".(defined($v)?$v:"")); 30 | last; 31 | } 32 | } 33 | $ok ? ok($ok, "method '$m'") : 0; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /t/01words.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use lib 't'; 6 | require "test.pl"; 7 | 8 | my ($default, $methods, $opts) = opt_parse_args(); 9 | 10 | plan tests => 3 * scalar(@$methods); 11 | 12 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words20"); 13 | my @dict = @$dictarr; 14 | 15 | for my $m (@$methods) { 16 | my $ph = new Perfect::Hash \@dict, $m, @$opts; 17 | unless ($ph) { 18 | ok(1, "SKIP empty pperf $m"); 19 | next; 20 | } 21 | TODO: { 22 | local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m}; 23 | my $ok = 1; 24 | my $i = 0; 25 | for my $w (@dict) { 26 | my $v = $ph->perfecthash($w); 27 | $ok = 0 if !defined($v) or $v ne $i; 28 | unless ($ok) { 29 | is(defined($v)?$v:"", $i, "method $m with arrayref for '$w' => ".(defined($v)?$v:"")); 30 | last; 31 | } 32 | $i++; 33 | } 34 | $ok ? ok($ok, "method $m with arrayref") : 0; 35 | } 36 | } 37 | 38 | my $line = 0; 39 | my %dict = map { $_ => $line++ } @dict; 40 | for my $m (@$methods) { 41 | my $ph = new Perfect::Hash \%dict, $m, @$opts; 42 | unless ($ph) { 43 | ok(1, "SKIP empty pperf $m"); 44 | next; 45 | } 46 | TODO: { 47 | local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m}; 48 | my $ok = 1; 49 | for my $w (sort keys %dict) { 50 | my $v = $ph->perfecthash($w); 51 | $ok = 0 if !defined($v) or $v ne $dict{$w}; 52 | unless ($ok) { 53 | is(defined($v)?$v:"", $dict{$w}, "method $m with hashref for '$w' => ".(defined($v)?$v:"")); 54 | last; 55 | } 56 | } 57 | $ok ? ok($ok, "method $m with hashref") : 0; 58 | } 59 | } 60 | 61 | for my $m (@$methods) { 62 | my $ph = new Perfect::Hash $dict, $m, @$opts; 63 | unless ($ph) { 64 | ok(1, "SKIP empty pperf $m"); 65 | next; 66 | } 67 | TODO: { 68 | local $TODO = "$m pure-perl" if exists $Perfect::Hash::algo_todo{$m}; 69 | my $ok = 1; 70 | my $i = 0; 71 | for my $w (@dict) { 72 | my $v = $ph->perfecthash($w); 73 | $ok = 0 if !defined($v) or $v ne $i; 74 | unless ($ok) { 75 | is(defined($v)?$v:"", $i, "method $m with keyfile for '$w' => ".(defined($v)?$v:"")); 76 | last; 77 | } 78 | $i++; 79 | } 80 | $ok ? ok($ok, "method $m with keyfile") : 0; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /t/02sysdict.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | # pure perl only 3 | use Test::More; 4 | use Perfect::Hash; 5 | 6 | use lib 't'; 7 | require "test.pl"; 8 | 9 | my ($dict, $dictarr, $size, $custom_size); 10 | for (qw(examples/words /usr/share/dict/words /usr/dict/words /opt/local/share/dict/words)) { 11 | if (-e $_) { $dict = $_; last } 12 | } 13 | plan skip_all => "no system dict found" unless -e $dict; 14 | 15 | my ($default, $methods, $opts) = opt_parse_args('-max-time',10); 16 | $methods = [''] if $default; 17 | plan tests => scalar(@$methods); 18 | ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, $dict); 19 | 20 | for my $m (@$methods) { 21 | diag "generating $m ph for $size entries in $dict..." if $ENV{TEST_VERBOSE}; 22 | my $t0 = [gettimeofday]; 23 | my $ph = new Perfect::Hash $dict, $m, @$opts; 24 | diag "done in ",tv_interval($t0),"s\n" if $ENV{TEST_VERBOSE}; 25 | unless ($ph) { 26 | ok(1, "SKIP empty pperf $m"); 27 | next; 28 | } 29 | TODO: { 30 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; 31 | my $ok = 1; 32 | my $i = 0; 33 | for my $w (@$dictarr) { 34 | my $v = $ph->perfecthash($w); 35 | $ok = 0 if !defined($v) or $v ne $i; 36 | unless ($ok) { 37 | is($v, $i, "method $m for $i-th '$w' => ".$v); 38 | last; 39 | } 40 | $i++; 41 | } 42 | $ok ? ok($ok, "checked all $size words with method $m") : 0; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /t/03false-positives.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use lib 't'; 6 | require "test.pl"; 7 | 8 | my ($default, $methods, $opts) = opt_parse_args(); 9 | $methods = [ grep {$_ ne '-cuckoo'} @$methods ]; 10 | plan tests => 2*scalar(@$methods); 11 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words20"); 12 | my $small_dict = $size > 255 ? "examples/words20" : $dict; 13 | 14 | for my $m (@$methods) { 15 | my $ph = new Perfect::Hash($m eq '-pearson8' ? $small_dict : $dict, $m, @$opts); 16 | unless ($ph) { 17 | ok(1, "SKIP empty pperf $m"); 18 | ok(1, "SKIP"); 19 | next; 20 | } 21 | my $w = 'good'; 22 | my $v = $ph->perfecthash($w); 23 | TODO: { 24 | local $TODO = "$m" if $m =~ /^-cmph/; 25 | my $vs = defined $v ? "$v" : 'undef'; 26 | if ($ph->false_positives) { 27 | # this really should not happen! 28 | ok(defined($v) && $v >= 0, "method $m without false-positives '$w' => $vs"); 29 | } else { 30 | is($v, undef, "method $m without false-positives '$w' => $vs"); 31 | } 32 | } 33 | 34 | my $ph1 = new Perfect::Hash($m eq '-pearson8' ? $small_dict : $dict, $m, @$opts, '-false-positives'); 35 | $v = $ph1->perfecthash($w); 36 | TODO: { 37 | local $TODO = "$m" if $m =~ /^-(cmph-|pearson)/; 38 | my $vs = defined $v ? "$v" : 'undef'; 39 | if ($ph1->false_positives) { 40 | ok(defined($v) && $v >= 0, "method $m with false_positives '$w' => $vs"); 41 | } else { 42 | is($v, undef, "method $m without false_positives '$w' => $vs"); 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /t/04save_c.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | #use Config; 6 | use lib 't'; 7 | require "test.pl"; 8 | 9 | my ($default, $methods, $opts) = opt_parse_args(); 10 | 11 | plan tests => 5 * scalar(@$methods); 12 | 13 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words500"); 14 | my $small_dict = $size > 255 ? "examples/words20" : $dict; 15 | 16 | # CHM passes pure-perl, but not compiled yet 17 | $Perfect::Hash::algo_todo{'-cmph-chm'} = 1; 18 | $Perfect::Hash::algo_todo{'-bob'} = 1; 19 | $Perfect::Hash::algo_todo{'-pearson16'} = 1; 20 | 21 | my $i = 0; 22 | for my $m (@$methods) { 23 | my $used_dict = $m eq '-pearson8' 24 | ? $small_dict 25 | : ($m eq '-gperf' or $custom_size) 26 | ? $dictarr 27 | : $dict; 28 | my $ph = new Perfect::Hash($used_dict, $m, @$opts); 29 | unless ($ph) { 30 | ok(1, "SKIP empty pperf $m"); 31 | ok(1) for 1..4; 32 | $i++; 33 | next; 34 | } 35 | if ($m =~ /^-cmph/) { 36 | ok(1, "SKIP nyi save_c for $m"); 37 | ok(1) for 1..4; 38 | $i++; 39 | next; 40 | } 41 | my ($nul) = grep {$_ eq '-nul'} @$opts; 42 | my ($shared) = grep {$_ eq '-shared'} @$opts; 43 | my $suffix = $m eq "-bob" ? "_hash" : ""; 44 | my $base = "pperf$suffix"; 45 | my $out = "$base.c"; 46 | test_wmain($m, 1, 'AOL', $ph->perfecthash('AOL'), $suffix, $nul); 47 | $i++; 48 | $ph->save_c($base); 49 | if (ok(-f "$base.c" && -f "$base.h", "$m generated pperf.c/.h")) { 50 | my ($cmd, $cmd1); 51 | if ($shared) { 52 | $cmd = compile_shared($ph, $suffix); 53 | $cmd1 = link_shared($ph, $suffix); 54 | } else { 55 | $cmd = compile_static($ph, $suffix); 56 | } 57 | diag($cmd) if $ENV{TEST_VERBOSE}; 58 | my $retval = system($cmd); 59 | if (!($retval>>8) and $cmd1) { 60 | print "$cmd1\n" if $ENV{TEST_VERBOSE}; 61 | $retval = system($cmd1); 62 | } 63 | if (ok(!($retval>>8), "could compile $m")) { 64 | my $callprefix = $^O eq 'MSWin32' ? "" 65 | : $^O eq 'darwin' ? "DYLD_LIBRARY_PATH=. ./" 66 | : "LD_LIBRARY_PATH=. ./"; 67 | my $retstr = `${callprefix}$base`; 68 | $retval = $?; 69 | TODO: { 70 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m} and $m !~ /^-cmph/; 71 | like($retstr, qr/^ok - c lookup exists/m, "$m c lookup exists"); 72 | } 73 | TODO: { 74 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; 75 | like($retstr, qr/^ok - c lookup notexists/m, "$m c lookup notexists"); 76 | } 77 | } else { 78 | ok(1, "SKIP") for 0..1; 79 | } 80 | TODO: { 81 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes 82 | ok(!($retval>>8), "could run $m"); 83 | } 84 | } else { 85 | ok(1, "SKIP") for 0..3; 86 | } 87 | unlink($base,"$base.c","$base.h","main.c") if $default; 88 | } 89 | -------------------------------------------------------------------------------- /t/05save_c_nul.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use lib 't'; 6 | require "test.pl"; 7 | 8 | my ($default, $methods, $opts) = test_parse_args("-nul"); 9 | 10 | plan tests => 5 * scalar(@$methods); 11 | my ($dict, $dictarr, $size, $custom_size) = opt_dict_size($opts, "examples/words500"); 12 | my $small_dict = $size > 255 ? "examples/words20" : $dict; 13 | 14 | # CHM passes pure-perl, but not compiled yet 15 | $Perfect::Hash::algo_todo{'-cmph-chm'} = 1; 16 | $Perfect::Hash::algo_todo{'-bob'} = 1; 17 | $Perfect::Hash::algo_todo{'-pearson16'} = 1; 18 | 19 | my $i = 0; 20 | my $key = "AOL"; 21 | 22 | for my $m (@$methods) { 23 | my $used_dict = $m eq '-pearson8' 24 | ? $small_dict 25 | : ($m eq '-gperf' or $custom_size) 26 | ? $dictarr 27 | : $dict; 28 | my $ph = new Perfect::Hash($used_dict, $m, @$opts, "-nul"); 29 | unless ($ph) { 30 | ok(1, "SKIP empty pperf $m"); 31 | ok(1) for 1..4; 32 | $i++; 33 | next; 34 | } 35 | if ($m =~ /^-cmph/) { 36 | ok(1, "SKIP nyi save_c for $m"); 37 | ok(1) for 1..4; 38 | $i++; 39 | next; 40 | } 41 | my $suffix = $m eq "-bob" ? "_hash" : "_nul"; 42 | my $base = "pperf$suffix"; 43 | my $out = "$base.c"; 44 | test_wmain($m, 1, $key, $ph->perfecthash($key), $suffix, 1); 45 | $i++; 46 | $ph->save_c($base); 47 | if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) { 48 | my $cmd = compile_static($ph, $suffix); 49 | diag($cmd) if $ENV{TEST_VERBOSE}; 50 | my $retval = system($cmd); 51 | if (ok(!($retval>>8), "could compile $m")) { 52 | my $retstr = $^O eq 'MSWin32' ? `$base` : `./$base`; 53 | $retval = $?; 54 | TODO: { 55 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m} and $m !~ /^-cmph/; 56 | like($retstr, qr/^ok - c lookup exists/m, "$m c lookup exists"); 57 | } 58 | TODO: { 59 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; 60 | like($retstr, qr/^ok - c lookup notexists/m, "$m c lookup notexists"); 61 | } 62 | } else { 63 | ok(1, "SKIP") for 1..2; 64 | } 65 | TODO: { 66 | local $TODO = "$m" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes 67 | ok(!($retval>>8), "could run $m"); 68 | } 69 | } else { 70 | ok(1, "SKIP") for 1..3; 71 | } 72 | unlink("$base","$base.c","$base.h","main$suffix.c") if $default; 73 | } 74 | -------------------------------------------------------------------------------- /t/06save_c_utf8.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use bytes; 6 | use lib 't'; 7 | require "test.pl"; 8 | 9 | my ($default, $methods, $opts) = opt_parse_args('-max-time', 10); 10 | 11 | plan tests => 4 * scalar(@$methods); 12 | 13 | my ($dict, $dictarr, $size) = opt_dict_size($opts, "examples/utf8"); 14 | my @dict = @$dictarr; 15 | 16 | # CMPH worked fine for some time 17 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz_ph'}; 18 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz'}; 19 | #delete $Perfect::Hash::algo_todo{'-cmph-bmz'}; 20 | delete $Perfect::Hash::algo_todo{'-cmph-chm'}; 21 | #delete $Perfect::Hash::algo_todo{'-cmph-fch'}; 22 | #delete $Perfect::Hash::algo_todo{'-cmph-chd_ph'}; 23 | #delete $Perfect::Hash::algo_todo{'-cmph-chd'}; 24 | $Perfect::Hash::algo_todo{'-bob'} = 1; 25 | $Perfect::Hash::algo_todo{'-pearson16'} = 1; 26 | 27 | my @small_dict = @dict[0..200]; 28 | my $i = 0; 29 | #my $suffix = "_utf8"; 30 | 31 | for my $m (@$methods) { 32 | my $used_dict = $m eq '-pearson8' 33 | ? \@small_dict 34 | : $m eq '-gperf' 35 | ? $dictarr 36 | : $dict; 37 | my $ph = new Perfect::Hash($used_dict, $m, @$opts); 38 | unless ($ph) { 39 | ok(1, "SKIP empty pperf $m") for 1..4; 40 | $i++; 41 | next; 42 | } 43 | my $suffix = $m eq "-bob" ? "_hash" : "_utf8"; 44 | my $base = "pperf$suffix"; 45 | test_wmain_all($m, \@dict, $opts, $suffix); 46 | $i++; 47 | $ph->save_c($base); 48 | # utf8 seqs being split on word boundaries with -switch in comments caused 49 | # emacs display a randomly wrong encoding - mojibake. 50 | open my $FH, ">>", "$base.c"; 51 | print $FH "/*\nLocal variables:\n mode: C\n coding: utf-8-unix\nEnd:\n*/"; 52 | close $FH; 53 | if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) { 54 | my $cmd = compile_static($ph, $suffix); 55 | diag($cmd) if $ENV{TEST_VERBOSE}; 56 | my $retval = system($cmd); 57 | if (ok(!($retval>>8), "could compile $m")) { 58 | my $retstr = $^O eq 'MSWin32' ? `$base` : `./$base`; 59 | $retval = $?; 60 | TODO: { 61 | local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; 62 | is($retval>>8, 0, "no c lookup errors $m"); 63 | diag($retstr) if $retval>>8 and $ENV{TEST_VERBOSE}; 64 | } 65 | } else { 66 | ok(1, "SKIP !compile"); 67 | } 68 | TODO: { 69 | local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes 70 | ok(!($retval>>8), "could run $m"); 71 | } 72 | } else { 73 | ok(1, "SKIP !save_c") for 1..3; 74 | } 75 | unlink("$base","$base.c","$base.h","main$suffix.c") if $default; 76 | } 77 | -------------------------------------------------------------------------------- /t/07save_c_pic.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | use Test::More; 3 | use Perfect::Hash; 4 | 5 | use bytes; 6 | use lib 't'; 7 | require "test.pl"; 8 | 9 | my ($default, $methods, $opts) = opt_parse_args('-max-time', 10); 10 | 11 | plan tests => 4 * scalar(@$methods); 12 | 13 | my ($dict, $dictarr, $size) = opt_dict_size($opts, "examples/words500"); 14 | my @dict = @$dictarr; 15 | 16 | # CMPH worked fine for some time 17 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz_ph'}; 18 | #delete $Perfect::Hash::algo_todo{'-cmph-bdz'}; 19 | #delete $Perfect::Hash::algo_todo{'-cmph-bmz'}; 20 | delete $Perfect::Hash::algo_todo{'-cmph-chm'}; 21 | #delete $Perfect::Hash::algo_todo{'-cmph-fch'}; 22 | #delete $Perfect::Hash::algo_todo{'-cmph-chd_ph'}; 23 | #delete $Perfect::Hash::algo_todo{'-cmph-chd'}; 24 | $Perfect::Hash::algo_todo{'-bob'} = 1; 25 | $Perfect::Hash::algo_todo{'-pearson16'} = 1; 26 | 27 | my @small_dict = @dict[0..200]; 28 | my $i = 0; 29 | 30 | for my $m (@$methods) { 31 | my $used_dict = $m eq '-pearson8' 32 | ? \@small_dict 33 | : $m eq '-gperf' 34 | ? $dictarr 35 | : $dict; 36 | my $ph = new Perfect::Hash($used_dict, $m, "-pic", @$opts); 37 | unless ($ph) { 38 | ok(1, "SKIP empty pperf $m") for 1..4; 39 | $i++; 40 | next; 41 | } 42 | my $suffix = $m eq "-bob" ? "_hash" : "_pic"; 43 | my $base = "pperf$suffix"; 44 | test_wmain_all($m, \@dict, $opts, $suffix); 45 | $i++; 46 | $ph->save_c($base); 47 | # utf8 seqs being split on word boundaries with -switch in comments caused 48 | # emacs display a randomly wrong encoding - mojibake. 49 | open my $FH, ">>", "$base.c"; 50 | print $FH "/*\nLocal variables:\n mode: C\n coding: utf-8-unix\nEnd:\n*/"; 51 | close $FH; 52 | if (ok(-f "$base.c" && -f "$base.h", "$m generated $base.c/.h")) { 53 | my $cmd = compile_shared($ph, $suffix); 54 | diag($cmd) if $ENV{TEST_VERBOSE}; 55 | my $cmd1 = link_shared($ph, $suffix); 56 | my $retval = system($cmd); 57 | if (!($retval>>8)) { 58 | print "$cmd1\n" if $ENV{TEST_VERBOSE}; 59 | $retval = system($cmd1); 60 | } 61 | if (ok(!($retval>>8), "could compile $m")) { 62 | my $callprefix = $^O eq 'MSWin32' ? "" 63 | : $^O eq 'darwin' ? "DYLD_LIBRARY_PATH=. ./" 64 | : "LD_LIBRARY_PATH=. ./"; 65 | my $retstr = `${callprefix}$base`; 66 | $retval = $?; 67 | TODO: { 68 | local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; 69 | is($retval>>8, 0, "no c lookup errors $m"); 70 | diag($retstr) if $retval>>8 and $ENV{TEST_VERBOSE}; 71 | } 72 | } else { 73 | ok(1, "SKIP !compile"); 74 | } 75 | TODO: { 76 | local $TODO = "$m not yet" if exists $Perfect::Hash::algo_todo{$m}; # will return errcodes 77 | ok(!($retval>>8), "could run $m"); 78 | } 79 | } else { 80 | ok(1, "SKIP !save_c") for 1..3; 81 | } 82 | unlink("$base","$base.c","$base.h","main$suffix.c") if $default; 83 | } 84 | -------------------------------------------------------------------------------- /t/z_kwalitee.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More; 5 | 6 | plan skip_all => 'This test is only run for the module author' 7 | unless -d '.git' || $ENV{IS_MAINTAINER}; 8 | plan skip_all => 'This test requires RELEASE_TESTING or AUTHOR_TESTING' 9 | if !$ENV{AUTHOR_TESTING} and !$ENV{RELEASE_TESTING}; 10 | 11 | use File::Copy 'cp'; 12 | cp('MYMETA.yml','META.yml') if -e 'MYMETA.yml' and !-e 'META.yml'; 13 | 14 | eval { require Test::Kwalitee; Test::Kwalitee->import(tests => ['-no_symlinks']) }; 15 | plan skip_all => "Test::Kwalitee needed for testing kwalitee" 16 | if $@; 17 | -------------------------------------------------------------------------------- /t/z_meta.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | # Test that our META.yml file matches the current specification. 4 | 5 | use strict; 6 | BEGIN { 7 | $| = 1; 8 | $^W = 1; 9 | } 10 | 11 | my $MODULE = 'Test::CPAN::Meta 0.12'; 12 | 13 | # Don't run tests for installs 14 | use Test::More; 15 | plan skip_all => 'This test is only run for the module author' 16 | unless -d '.git' || $ENV{IS_MAINTAINER}; 17 | plan skip_all => 'META is autogenerated' 18 | unless -e 'META.yml'; 19 | 20 | # Load the testing module 21 | eval "use $MODULE;"; 22 | if ( $@ ) { 23 | plan( skip_all => "$MODULE not available for testing" ); 24 | die "Failed to load required release-testing module $MODULE 0.12" 25 | if -d '.git' || $ENV{IS_MAINTAINER}; 26 | } 27 | meta_yaml_ok(); 28 | -------------------------------------------------------------------------------- /t/z_pod-coverage.t: -------------------------------------------------------------------------------- 1 | use strict; 2 | use warnings; 3 | 4 | use Test::More; 5 | 6 | plan skip_all => 'This test is only run for the module author' 7 | unless -d '.git' || $ENV{IS_MAINTAINER}; 8 | plan skip_all => 'This test requires RELEASE_TESTING or AUTHOR_TESTING' 9 | if !$ENV{AUTHOR_TESTING} and !$ENV{RELEASE_TESTING}; 10 | 11 | eval "use Test::Pod::Coverage 1.04"; 12 | plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" 13 | if $@; 14 | 15 | all_pod_coverage_ok( { trustme => [ qr/constant/ ] } ); 16 | -------------------------------------------------------------------------------- /t/z_pod.t: -------------------------------------------------------------------------------- 1 | # -*- perl -*- 2 | use Test::More; 3 | eval "use Test::Pod 1.00"; 4 | plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; 5 | all_pod_files_ok(); 6 | --------------------------------------------------------------------------------