├── perl ├── BUGS ├── examples │ ├── 0t.sh │ └── referers.pl ├── cat_ed │ ├── folder.gif │ ├── unknown.gif │ └── config.pl ├── INSTALL └── README ├── README.md ├── debian ├── compat ├── stamp-autotools-files ├── stamp-makefile-build ├── dirs ├── docs ├── cron.d.ex ├── menu.ex ├── dpsearch.500mod_dpsearch.info ├── dpsearch.postrm.debhelper ├── dpsearch.postinst.debhelper ├── README ├── README.Debian ├── dpsearch-default.ex ├── rules~ ├── semantic.cache ├── emacsen-remove.ex ├── dpsearch.doc-base.EX ├── watch.ex ├── preinst.ex ├── rules ├── prerm.ex ├── postrm.ex ├── postinst.ex └── copyright ├── create ├── ibase │ ├── crc-multi.txt │ ├── drop.crc-multi.txt │ ├── drop.cross-crc.txt │ ├── drop.multi.txt │ ├── cross-crc.txt │ └── drop.txt ├── mimer │ ├── crc-multi.txt │ ├── drop.crc-multi.txt │ ├── drop.cross-crc.txt │ ├── drop.multi.txt │ ├── cross-crc.txt │ └── drop.txt ├── oracle │ ├── drop.multi.txt │ ├── drop.crc-multi.txt │ ├── drop.cross-crc.txt │ ├── cross-crc.txt │ └── drop.txt ├── mysql │ ├── drop.crc.txt │ ├── drop.cross-crc.txt │ ├── crc.txt │ ├── drop.multi.txt │ ├── drop.crc-multi.txt │ ├── drop.txt │ └── cross-crc.txt ├── pgsql │ ├── drop.crc.txt │ ├── drop.cross.txt │ ├── drop.cross-crc.txt │ ├── crc.txt │ ├── drop.multi.txt │ ├── drop.crc-multi.txt │ ├── cross-crc.txt │ ├── grant.txt │ ├── cross.txt │ └── drop.txt ├── sqlite │ ├── drop.crc.txt │ ├── drop.cross-crc.txt │ ├── crc.txt │ ├── drop.multi.txt │ ├── drop.crc-multi.txt │ ├── drop.txt │ └── cross-crc.txt ├── mssql │ ├── drop.cross-crc.txt │ ├── drop.multi.txt │ ├── drop.crc-multi.txt │ ├── cross-crc.txt │ └── drop.txt └── Makefile.am ├── test ├── test-revalias │ ├── htdocs │ │ ├── a.txt │ │ ├── b.txt │ │ └── c.txt │ ├── url.tst │ ├── dict.tst │ ├── url0.res │ ├── indexer1.conf │ ├── url1.res │ ├── dict.res │ └── indexer0.conf ├── test-boolean │ ├── search.res │ ├── search5.res │ ├── search7.res │ ├── search8.res │ ├── htdocs │ │ ├── body2.txt │ │ ├── body1.txt │ │ ├── anyword.txt │ │ ├── body3.txt │ │ ├── body5.txt │ │ ├── body6.txt │ │ ├── body4.txt │ │ ├── body7.txt │ │ ├── body8.txt │ │ ├── body9.txt │ │ ├── body10.txt │ │ ├── body11.txt │ │ ├── body12.txt │ │ ├── body13.txt │ │ ├── body14.txt │ │ ├── body15.txt │ │ ├── body16.txt │ │ ├── body17.txt │ │ ├── body18.txt │ │ ├── body19.txt │ │ ├── body20.txt │ │ └── body-not-near.txt │ ├── search2.res │ ├── acr.conf │ ├── search6.res │ ├── search4.res │ ├── search3.res │ └── query.tst ├── test-sample │ ├── htdocs │ │ ├── test1.txt │ │ ├── z-clone1.txt │ │ ├── test3.html │ │ ├── test2.html │ │ ├── z-clone2.html │ │ ├── test1.html │ │ ├── test5.html │ │ └── test4.html │ └── query.tst ├── test-hrefonly │ ├── htdocs │ │ ├── test1.txt │ │ ├── test3.html │ │ ├── test1.html │ │ └── test2.html │ ├── query.tst │ ├── indexer.conf │ └── test.cmd ├── test-include │ ├── htdocs │ │ ├── test1.txt │ │ ├── test3.html │ │ ├── test1.html │ │ └── test2.html │ ├── indexer.conf │ ├── query.tst │ ├── include.conf │ └── test.cmd ├── test-notfound │ ├── htdocs │ │ ├── test1.txt │ │ ├── test1.html │ │ └── test2.html │ ├── query.tst │ ├── indexer.conf │ └── test.cmd ├── test-sections │ ├── match.sl │ ├── htdocs │ │ ├── test.pdf │ │ ├── directory │ │ │ ├── test-tz.html │ │ │ └── test.html │ │ ├── test2.html │ │ └── test1.html │ └── query.tst ├── svn-commit.tmp~ ├── test-complexbody │ ├── htdocs │ │ ├── test2.html │ │ └── test1.html │ ├── query.tst │ └── test.cmd ├── test-notmodified │ ├── htdocs │ │ ├── test1.html │ │ ├── test2.html │ │ ├── test3.html │ │ └── test4.html │ ├── query.tst │ ├── indexer.conf │ └── test.cmd ├── test-cache │ ├── htdocs │ │ ├── test.html │ │ ├── ispattern.html │ │ ├── testpage12.html │ │ ├── testpage7.html │ │ ├── testpage5.html │ │ ├── testpage13.html │ │ ├── testpage4.html │ │ ├── testpage6.html │ │ ├── testpage1.html │ │ ├── testpage3.html │ │ ├── testpage2.html │ │ ├── testpage10.html │ │ └── testpage11.html │ └── query.tst ├── test-charset │ ├── htdocs │ │ ├── ZH.html │ │ ├── JP.html │ │ ├── segmented.gb2312.txt │ │ ├── segmented.utf-8.txt │ │ ├── unsegmented.gb2312.txt │ │ └── unsegmented.utf-8.txt │ ├── query.tst │ └── test.cmd ├── test-multi │ ├── htdocs │ │ ├── test2.html │ │ ├── test4.html │ │ ├── test1.txt │ │ ├── test3.html │ │ └── test1.html │ └── indexer.conf ├── test-crc-multi │ ├── htdocs │ │ ├── test2.html │ │ ├── test4.html │ │ ├── test1.txt │ │ ├── test3.html │ │ └── test1.html │ └── indexer.conf ├── test-parsehtml │ ├── htdocs │ │ ├── index.html │ │ ├── accept.html │ │ ├── refresh.html │ │ ├── xhtml1-missing-doctype-and-xmlns.xhtml │ │ ├── display.html │ │ ├── no-newlines.html │ │ ├── ispattern.html │ │ ├── xhtml1-strict-missing-xmlns.xhtml │ │ ├── xhtml1-strict-minimal.xhtml │ │ ├── html40-strict.html │ │ ├── bogus-fpi.html │ │ ├── xhtml1-blank-1st-line.xhtml │ │ ├── html40-transitional.html │ │ ├── html40-frameset.html │ │ └── xhtml1-strict.xhtml │ ├── search-j.res │ ├── query.tst │ └── json.htm ├── test-mailto │ ├── htdocs │ │ └── test.html │ ├── indexer.conf │ ├── query.tst │ └── test.cmd ├── test-sections2 │ ├── htdocs │ │ ├── test2.html │ │ └── test3.html │ └── query.tst ├── test-follow │ ├── htdocs2 │ │ └── test.html │ ├── htdocs1 │ │ └── test.html │ ├── query.tst │ ├── indexer.conf │ └── test.cmd ├── test-quotes │ ├── htdocs │ │ └── test1.txt │ ├── query.tst │ ├── indexer.conf │ └── test.cmd ├── test-cached │ ├── htdocs │ │ ├── ispattern.html │ │ ├── testpage12.html │ │ ├── testpage7.html │ │ ├── testpage5.html │ │ ├── testpage13.html │ │ ├── testpage4.html │ │ ├── testpage6.html │ │ ├── testpage1.html │ │ ├── testpage3.html │ │ ├── testpage2.html │ │ ├── testpage10.html │ │ └── testpage11.html │ ├── cached.conf │ └── query.tst ├── test-revalias1 │ ├── query.tst │ ├── htdocs │ │ └── index.html │ ├── query.res │ ├── indexer.conf │ └── test.cmd ├── dps_test-run ├── test-searchd │ ├── query.tst │ ├── searchd.conf │ └── htdocs │ │ ├── testpage12.html │ │ ├── testpage7.html │ │ ├── testpage5.html │ │ ├── testpage13.html │ │ ├── testpage4.html │ │ ├── testpage6.html │ │ ├── testpage1.html │ │ ├── testpage3.html │ │ ├── testpage2.html │ │ ├── testpage10.html │ │ └── testpage11.html └── README ├── doc ├── catalog ├── book-ru.xml ├── bugs-ru.xml ├── cjk-ru.xml ├── data-ru.xml ├── db2-ru.xml ├── htdb-ru.xml ├── lib-ru.xml ├── misc-ru.xml ├── mp3-ru.xml ├── news-ru.xml ├── perf-ru.xml ├── syn-ru.xml ├── tags-ru.xml ├── vary-ru.xml ├── accent-ru.xml ├── aspell-ru.xml ├── cache-ru.xml ├── follow-ru.xml ├── groups-ru.xml ├── index-ru.xml ├── index.ru.html ├── intro-ru.xml ├── ispell-ru.xml ├── mirror-ru.xml ├── search-ru.xml ├── stored-ru.xml ├── syslog-ru.xml ├── acronym-ru.xml ├── aliases-ru.xml ├── body-after.html ├── cachemode-ru.xml ├── charset-ru.xml ├── datapark-ru.dsl ├── dbschema-ru.xml ├── exec-cgi-ru.xml ├── general-ru.xml ├── httpcodes-ru.xml ├── indexcmd-ru.xml ├── install-ru.xml ├── parsers-ru.xml ├── relevancy-ru.xml ├── samples │ ├── README │ ├── favicon.ico │ ├── news.conf │ ├── minimal.conf │ ├── ftpsearch.conf │ └── local.conf ├── searchd-ru.xml ├── sql-stor-ru.xml ├── templates-ru.xml ├── tracking-ru.xml ├── categories-ru.xml ├── content-enc-ru.xml ├── cs-aliases-ru.xml ├── html-design-ru.xml ├── htmlparser-ru.xml ├── negotiation-ru.xml ├── servertable-ru.xml ├── dpsearch-cjk.ru.html ├── dpsearch-get.ru.html ├── dpsearch-html.ru.html ├── dpsearch-lib.ru.html ├── dpsearch-misc.ru.html ├── dpsearch-pars.ru.html ├── dpsearch-perf.ru.html ├── dpsearch-rel.ru.html ├── dpsearch-vary.ru.html ├── mod_dpsearch-ru.xml ├── dpsearch-clones.ru.html ├── dpsearch-follow.ru.html ├── dpsearch-fuzzy.ru.html ├── dpsearch-intro.ru.html ├── dpsearch-opsys.ru.html ├── dpsearch-oracle.ru.html ├── dpsearch-stored.ru.html ├── dpsearch-syslog.ru.html ├── dpsearch-track.ru.html ├── dpsearch-aliases.ru.html ├── dpsearch-authors.ru.html ├── dpsearch-cachemode.ru.html ├── dpsearch-dbschema.ru.html ├── dpsearch-donations.ru.html ├── dpsearch-howstore.ru.html ├── dpsearch-index-ru.ru.html ├── dpsearch-indexcmd.ru.html ├── dpsearch-indexing.ru.html ├── dpsearch-install.ru.html ├── dpsearch-multilang.ru.html ├── dpsearch-register.ru.html ├── dpsearch-searchd.ru.html ├── dpsearch-srcache.ru.html ├── dpsearch-srvtable.ru.html ├── dpsearch-stopwords.ru.html ├── dpsearch-subdocs.ru.html ├── dpsearch-templates.ru.html ├── dpsearch-toolsreq.ru.html ├── dpsearch-categories.ru.html ├── dpsearch-content-enc.ru.html ├── dpsearch-disclaimer.ru.html ├── dpsearch-doingsearch.ru.html ├── dpsearch-htmlparser.ru.html ├── dpsearch-http-codes.ru.html ├── dpsearch-installing.ru.html ├── dpsearch-quick-usage.ru.html ├── dpsearch-subsections.ru.html ├── dpsearch-binarydistrib.ru.html ├── dpsearch-installproblem.ru.html ├── dpsearch-international.ru.html ├── dpsearch-mod_dpsearch.ru.html ├── dpsearch-htmlparser-links.ru.html ├── dpsearch-htmlparser-meta.ru.html ├── dpsearch-htmlparser-spec.ru.html ├── dpsearch-extended-indexing.ru.html ├── dpsearch-htmlparser-comments.ru.html ├── dpsearch-htmlparser-bodypatterns.ru.html ├── README.html ├── body-before.html ├── datapark.css ├── db2.xml ├── vary.xml ├── aspell.xml └── accent.xml ├── AUTHORS ├── scripts └── Makefile.am ├── src ├── boolean.c ├── timezones.inc ├── timezones.lst ├── svn-commit.tmp~ ├── dp-Makefile.am └── charset-Makefile.am ├── misc ├── Makefile.am ├── dpsearch.gif ├── dpsearch2.gif ├── dpsearch3.gif └── dpsearch4.gif ├── etc ├── acronym │ ├── en.acr │ ├── ru.acr │ └── fr.acr ├── stopwords │ ├── ar.sl │ ├── ca.sl │ ├── ca2.sl │ ├── da.sl │ ├── da2.sl │ ├── de.sl │ ├── de2.sl │ ├── es.sl │ ├── es2.sl │ ├── fi.sl │ ├── fr.sl │ ├── fr2.sl │ ├── he.sl │ ├── hu.sl │ ├── hu2.sl │ ├── is.sl │ ├── it2.sl │ ├── ja.sl │ ├── lt.sl │ ├── lv.sl │ ├── no.sl │ ├── pl.sl │ ├── pt.sl │ ├── ru.sl │ ├── ru2.sl │ ├── sk.sl │ ├── th.sl │ ├── tr.sl │ ├── uk.sl │ ├── de.top100.sl │ ├── de.top1000.sl │ ├── fr.top100.sl │ ├── fr.top1000.sl │ ├── nl.top100.sl │ ├── nl.top1000.sl │ ├── ro.sl │ └── nl.sl ├── synonym │ ├── francais.syn │ ├── russian.syn │ └── russian.big.syn ├── stored.conf-dist └── stopwords.conf-dist ├── INSTALL ├── .pre-commit-config.yaml ├── .gitignore ├── README.XML ├── .clang-format ├── BUGS ├── .travis.yml ├── charset-Makefile.am ├── include ├── dps_image.h ├── dps_search_tl.h ├── dps_execget.h ├── dps_filter.h ├── dps_mkind.h ├── dps_alias.h ├── dps_env.h ├── dps_http.h └── dps_xmalloc.h └── dp-Makefile.am /perl/BUGS: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | README -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 5 2 | -------------------------------------------------------------------------------- /create/ibase/crc-multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create/mimer/crc-multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create/oracle/drop.multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/stamp-autotools-files: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/stamp-makefile-build: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create/ibase/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create/mimer/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /create/oracle/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /debian/dirs: -------------------------------------------------------------------------------- 1 | usr/bin 2 | usr/sbin 3 | -------------------------------------------------------------------------------- /create/mysql/drop.crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict; 2 | -------------------------------------------------------------------------------- /create/pgsql/drop.crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict; 2 | -------------------------------------------------------------------------------- /create/sqlite/drop.crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict; 2 | -------------------------------------------------------------------------------- /test/test-revalias/htdocs/a.txt: -------------------------------------------------------------------------------- 1 | aa aaa aaaa 2 | -------------------------------------------------------------------------------- /create/pgsql/drop.cross.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE crossdict; 2 | -------------------------------------------------------------------------------- /test/test-boolean/search.res: -------------------------------------------------------------------------------- 1 | No results found. 2 | -------------------------------------------------------------------------------- /test/test-boolean/search5.res: -------------------------------------------------------------------------------- 1 | No results found. 2 | -------------------------------------------------------------------------------- /test/test-boolean/search7.res: -------------------------------------------------------------------------------- 1 | No results found. 2 | -------------------------------------------------------------------------------- /test/test-boolean/search8.res: -------------------------------------------------------------------------------- 1 | No results found. 2 | -------------------------------------------------------------------------------- /create/ibase/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /create/mimer/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /create/mssql/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /create/mysql/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /create/pgsql/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /debian/docs: -------------------------------------------------------------------------------- 1 | BUGS 2 | README 3 | README.XML 4 | TODO 5 | -------------------------------------------------------------------------------- /doc/catalog: -------------------------------------------------------------------------------- 1 | ;CATALOG "/usr/local/share/sgml/catalog" 2 | -------------------------------------------------------------------------------- /create/sqlite/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict; 2 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Developers: 2 | * Maxim Zakharov 3 | -------------------------------------------------------------------------------- /create/oracle/drop.cross-crc.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ncrossdict 2 | / 3 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body2.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | Sak Val 4 | -------------------------------------------------------------------------------- /test/test-revalias/htdocs/b.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | bb bbb bbbb 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body1.txt: -------------------------------------------------------------------------------- 1 | body1 2 | abasement 3 | Val Sak 4 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | -------------------------------------------------------------------------------- /scripts/Makefile.am: -------------------------------------------------------------------------------- 1 | bin_SCRIPTS = dps-config 2 | sbin_SCRIPTS = run-splitter 3 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/anyword.txt: -------------------------------------------------------------------------------- 1 | master: igor body 2 | actor: sergey head 3 | -------------------------------------------------------------------------------- /test/test-hrefonly/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | -------------------------------------------------------------------------------- /test/test-include/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | -------------------------------------------------------------------------------- /test/test-notfound/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/z-clone1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | -------------------------------------------------------------------------------- /doc/book-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/book-ru.xml -------------------------------------------------------------------------------- /doc/bugs-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/bugs-ru.xml -------------------------------------------------------------------------------- /doc/cjk-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/cjk-ru.xml -------------------------------------------------------------------------------- /doc/data-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/data-ru.xml -------------------------------------------------------------------------------- /doc/db2-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/db2-ru.xml -------------------------------------------------------------------------------- /doc/htdb-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/htdb-ru.xml -------------------------------------------------------------------------------- /doc/lib-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/lib-ru.xml -------------------------------------------------------------------------------- /doc/misc-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/misc-ru.xml -------------------------------------------------------------------------------- /doc/mp3-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/mp3-ru.xml -------------------------------------------------------------------------------- /doc/news-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/news-ru.xml -------------------------------------------------------------------------------- /doc/perf-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/perf-ru.xml -------------------------------------------------------------------------------- /doc/syn-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/syn-ru.xml -------------------------------------------------------------------------------- /doc/tags-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/tags-ru.xml -------------------------------------------------------------------------------- /doc/vary-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/vary-ru.xml -------------------------------------------------------------------------------- /src/boolean.c: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/src/boolean.c -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body3.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | 5 | Val ibn Sak 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body5.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | -------------------------------------------------------------------------------- /doc/accent-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/accent-ru.xml -------------------------------------------------------------------------------- /doc/aspell-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/aspell-ru.xml -------------------------------------------------------------------------------- /doc/cache-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/cache-ru.xml -------------------------------------------------------------------------------- /doc/follow-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/follow-ru.xml -------------------------------------------------------------------------------- /doc/groups-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/groups-ru.xml -------------------------------------------------------------------------------- /doc/index-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/index-ru.xml -------------------------------------------------------------------------------- /doc/index.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/index.ru.html -------------------------------------------------------------------------------- /doc/intro-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/intro-ru.xml -------------------------------------------------------------------------------- /doc/ispell-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/ispell-ru.xml -------------------------------------------------------------------------------- /doc/mirror-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/mirror-ru.xml -------------------------------------------------------------------------------- /doc/search-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/search-ru.xml -------------------------------------------------------------------------------- /doc/stored-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/stored-ru.xml -------------------------------------------------------------------------------- /doc/syslog-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/syslog-ru.xml -------------------------------------------------------------------------------- /misc/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | EXTRA_DIST= dpsearch.gif dpsearch2.gif dpsearch3.gif dpsearch4.gif 3 | -------------------------------------------------------------------------------- /misc/dpsearch.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/misc/dpsearch.gif -------------------------------------------------------------------------------- /perl/examples/0t.sh: -------------------------------------------------------------------------------- 1 | QUERY_STRING=apache 2 | export QUERY_STRING 3 | ./search.pl > 0.html 4 | -------------------------------------------------------------------------------- /src/timezones.inc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/src/timezones.inc -------------------------------------------------------------------------------- /src/timezones.lst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/src/timezones.lst -------------------------------------------------------------------------------- /doc/acronym-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/acronym-ru.xml -------------------------------------------------------------------------------- /doc/aliases-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/aliases-ru.xml -------------------------------------------------------------------------------- /doc/body-after.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/body-after.html -------------------------------------------------------------------------------- /doc/cachemode-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/cachemode-ru.xml -------------------------------------------------------------------------------- /doc/charset-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/charset-ru.xml -------------------------------------------------------------------------------- /doc/datapark-ru.dsl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/datapark-ru.dsl -------------------------------------------------------------------------------- /doc/dbschema-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dbschema-ru.xml -------------------------------------------------------------------------------- /doc/exec-cgi-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/exec-cgi-ru.xml -------------------------------------------------------------------------------- /doc/general-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/general-ru.xml -------------------------------------------------------------------------------- /doc/httpcodes-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/httpcodes-ru.xml -------------------------------------------------------------------------------- /doc/indexcmd-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/indexcmd-ru.xml -------------------------------------------------------------------------------- /doc/install-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/install-ru.xml -------------------------------------------------------------------------------- /doc/parsers-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/parsers-ru.xml -------------------------------------------------------------------------------- /doc/relevancy-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/relevancy-ru.xml -------------------------------------------------------------------------------- /doc/samples/README: -------------------------------------------------------------------------------- 1 | Here are some samples of config files for indexer, search.cgi, searchd. 2 | -------------------------------------------------------------------------------- /doc/searchd-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/searchd-ru.xml -------------------------------------------------------------------------------- /doc/sql-stor-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/sql-stor-ru.xml -------------------------------------------------------------------------------- /doc/templates-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/templates-ru.xml -------------------------------------------------------------------------------- /doc/tracking-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/tracking-ru.xml -------------------------------------------------------------------------------- /etc/acronym/en.acr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/acronym/en.acr -------------------------------------------------------------------------------- /etc/acronym/ru.acr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/acronym/ru.acr -------------------------------------------------------------------------------- /etc/stopwords/ar.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ar.sl -------------------------------------------------------------------------------- /etc/stopwords/ca.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ca.sl -------------------------------------------------------------------------------- /etc/stopwords/ca2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ca2.sl -------------------------------------------------------------------------------- /etc/stopwords/da.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/da.sl -------------------------------------------------------------------------------- /etc/stopwords/da2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/da2.sl -------------------------------------------------------------------------------- /etc/stopwords/de.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/de.sl -------------------------------------------------------------------------------- /etc/stopwords/de2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/de2.sl -------------------------------------------------------------------------------- /etc/stopwords/es.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/es.sl -------------------------------------------------------------------------------- /etc/stopwords/es2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/es2.sl -------------------------------------------------------------------------------- /etc/stopwords/fi.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/fi.sl -------------------------------------------------------------------------------- /etc/stopwords/fr.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/fr.sl -------------------------------------------------------------------------------- /etc/stopwords/fr2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/fr2.sl -------------------------------------------------------------------------------- /etc/stopwords/he.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/he.sl -------------------------------------------------------------------------------- /etc/stopwords/hu.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/hu.sl -------------------------------------------------------------------------------- /etc/stopwords/hu2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/hu2.sl -------------------------------------------------------------------------------- /etc/stopwords/is.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/is.sl -------------------------------------------------------------------------------- /etc/stopwords/it2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/it2.sl -------------------------------------------------------------------------------- /etc/stopwords/ja.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ja.sl -------------------------------------------------------------------------------- /etc/stopwords/lt.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/lt.sl -------------------------------------------------------------------------------- /etc/stopwords/lv.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/lv.sl -------------------------------------------------------------------------------- /etc/stopwords/no.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/no.sl -------------------------------------------------------------------------------- /etc/stopwords/pl.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/pl.sl -------------------------------------------------------------------------------- /etc/stopwords/pt.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/pt.sl -------------------------------------------------------------------------------- /etc/stopwords/ru.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ru.sl -------------------------------------------------------------------------------- /etc/stopwords/ru2.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/ru2.sl -------------------------------------------------------------------------------- /etc/stopwords/sk.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/sk.sl -------------------------------------------------------------------------------- /etc/stopwords/th.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/th.sl -------------------------------------------------------------------------------- /etc/stopwords/tr.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/tr.sl -------------------------------------------------------------------------------- /etc/stopwords/uk.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/uk.sl -------------------------------------------------------------------------------- /misc/dpsearch2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/misc/dpsearch2.gif -------------------------------------------------------------------------------- /misc/dpsearch3.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/misc/dpsearch3.gif -------------------------------------------------------------------------------- /misc/dpsearch4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/misc/dpsearch4.gif -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body6.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | -------------------------------------------------------------------------------- /test/test-revalias/htdocs/c.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | cc ccc cccc 10 | -------------------------------------------------------------------------------- /test/test-sections/match.sl: -------------------------------------------------------------------------------- 1 | Language: en 2 | Charset: latin1 3 | 4 | Match: regex ^\$\#\# 5 | -------------------------------------------------------------------------------- /doc/categories-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/categories-ru.xml -------------------------------------------------------------------------------- /doc/content-enc-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/content-enc-ru.xml -------------------------------------------------------------------------------- /doc/cs-aliases-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/cs-aliases-ru.xml -------------------------------------------------------------------------------- /doc/html-design-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/html-design-ru.xml -------------------------------------------------------------------------------- /doc/htmlparser-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/htmlparser-ru.xml -------------------------------------------------------------------------------- /doc/negotiation-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/negotiation-ru.xml -------------------------------------------------------------------------------- /doc/servertable-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/servertable-ru.xml -------------------------------------------------------------------------------- /perl/cat_ed/folder.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/perl/cat_ed/folder.gif -------------------------------------------------------------------------------- /src/svn-commit.tmp~: -------------------------------------------------------------------------------- 1 | 2 | --This line, and those below, will be ignored-- 3 | 4 | M searchtool.c 5 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body4.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | 6 | Val ibn del Sak 7 | -------------------------------------------------------------------------------- /test/test-boolean/search2.res: -------------------------------------------------------------------------------- 1 | Total results found: 1, Results: 1-1 2 | .1 -- http://site/body20.txt 3 | -------------------------------------------------------------------------------- /test/test-include/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DIR 2 | 3 | Include $(DPS_TEST_DIR)/include.conf 4 | -------------------------------------------------------------------------------- /test/test-revalias/url.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT url, docsize, crc32 FROM url ORDER by docsize; 3 | -------------------------------------------------------------------------------- /doc/dpsearch-cjk.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-cjk.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-get.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-get.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-html.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-html.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-lib.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-lib.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-misc.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-misc.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-pars.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-pars.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-perf.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-perf.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-rel.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-rel.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-vary.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-vary.ru.html -------------------------------------------------------------------------------- /doc/mod_dpsearch-ru.xml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/mod_dpsearch-ru.xml -------------------------------------------------------------------------------- /doc/samples/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/samples/favicon.ico -------------------------------------------------------------------------------- /etc/synonym/francais.syn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/synonym/francais.syn -------------------------------------------------------------------------------- /etc/synonym/russian.syn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/synonym/russian.syn -------------------------------------------------------------------------------- /perl/cat_ed/unknown.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/perl/cat_ed/unknown.gif -------------------------------------------------------------------------------- /test/test-boolean/acr.conf: -------------------------------------------------------------------------------- 1 | Language: en 2 | Charset: latin1 3 | # 4 | body10 body9 5 | aba abasement 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body7.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | -------------------------------------------------------------------------------- /test/test-boolean/search6.res: -------------------------------------------------------------------------------- 1 | Total results found: 1, Results: 1-1 2 | .1 -- http://site/anyword.txt 3 | -------------------------------------------------------------------------------- /doc/dpsearch-clones.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-clones.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-follow.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-follow.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-fuzzy.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-fuzzy.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-intro.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-intro.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-opsys.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-opsys.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-oracle.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-oracle.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-stored.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-stored.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-syslog.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-syslog.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-track.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-track.ru.html -------------------------------------------------------------------------------- /etc/stopwords/de.top100.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/de.top100.sl -------------------------------------------------------------------------------- /etc/stopwords/de.top1000.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/de.top1000.sl -------------------------------------------------------------------------------- /etc/stopwords/fr.top100.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/fr.top100.sl -------------------------------------------------------------------------------- /etc/stopwords/fr.top1000.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/fr.top1000.sl -------------------------------------------------------------------------------- /etc/stopwords/nl.top100.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/nl.top100.sl -------------------------------------------------------------------------------- /etc/stopwords/nl.top1000.sl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/stopwords/nl.top1000.sl -------------------------------------------------------------------------------- /etc/synonym/russian.big.syn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/etc/synonym/russian.big.syn -------------------------------------------------------------------------------- /test/test-boolean/search4.res: -------------------------------------------------------------------------------- 1 | Total results found: 1, Results: 1-1 2 | .1 -- http://site/body-not-near.txt 3 | -------------------------------------------------------------------------------- /debian/cron.d.ex: -------------------------------------------------------------------------------- 1 | # 2 | # Regular cron jobs for the dpsearch package 3 | # 4 | 0 4 * * * root dpsearch_maintenance 5 | -------------------------------------------------------------------------------- /doc/dpsearch-aliases.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-aliases.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-authors.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-authors.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-cachemode.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-cachemode.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-dbschema.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-dbschema.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-donations.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-donations.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-howstore.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-howstore.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-index-ru.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-index-ru.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-indexcmd.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-indexcmd.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-indexing.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-indexing.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-install.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-install.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-multilang.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-multilang.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-register.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-register.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-searchd.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-searchd.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-srcache.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-srcache.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-srvtable.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-srvtable.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-stopwords.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-stopwords.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-subdocs.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-subdocs.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-templates.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-templates.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-toolsreq.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-toolsreq.ru.html -------------------------------------------------------------------------------- /test/svn-commit.tmp~: -------------------------------------------------------------------------------- 1 | 2 | --This line, and those below, will be ignored-- 3 | 4 | A test-parsehtml/search4.res 5 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body8.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | -------------------------------------------------------------------------------- /test/test-complexbody/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 3 | blabla 4 | blabla 5 | ... 6 | 7 | 8 | -------------------------------------------------------------------------------- /test/test-notmodified/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test1 test1 test1 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /doc/dpsearch-categories.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-categories.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-content-enc.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-content-enc.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-disclaimer.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-disclaimer.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-doingsearch.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-doingsearch.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-http-codes.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-http-codes.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-installing.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-installing.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-quick-usage.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-quick-usage.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-subsections.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-subsections.ru.html -------------------------------------------------------------------------------- /test/test-cache/htdocs/test.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-cache/htdocs/test.html -------------------------------------------------------------------------------- /test/test-charset/htdocs/ZH.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-charset/htdocs/ZH.html -------------------------------------------------------------------------------- /test/test-notmodified/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test2 test2 test2 test2 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /doc/dpsearch-binarydistrib.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-binarydistrib.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-installproblem.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-installproblem.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-international.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-international.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-mod_dpsearch.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-mod_dpsearch.ru.html -------------------------------------------------------------------------------- /test/test-multi/htdocs/test2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-multi/htdocs/test2.html -------------------------------------------------------------------------------- /test/test-multi/htdocs/test4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-multi/htdocs/test4.html -------------------------------------------------------------------------------- /test/test-sections/htdocs/test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-sections/htdocs/test.pdf -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser-links.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser-links.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser-meta.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser-meta.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser-spec.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser-spec.ru.html -------------------------------------------------------------------------------- /test/test-crc-multi/htdocs/test2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-crc-multi/htdocs/test2.html -------------------------------------------------------------------------------- /test/test-crc-multi/htdocs/test4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-crc-multi/htdocs/test4.html -------------------------------------------------------------------------------- /test/test-notmodified/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | test3 test3 test3 test3 test3 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/index.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-parsehtml/htdocs/index.html -------------------------------------------------------------------------------- /doc/dpsearch-extended-indexing.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-extended-indexing.ru.html -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser-comments.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser-comments.ru.html -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body9.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | -------------------------------------------------------------------------------- /test/test-multi/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | abcdefghijklmnopqrstuvwxyz 3 | 4 | Ford motor company 5 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/accept.html: -------------------------------------------------------------------------------- 1 | 2 | Accept 3 | 4 | 5 | PROBB 6 | 7 | 8 | -------------------------------------------------------------------------------- /debian/menu.ex: -------------------------------------------------------------------------------- 1 | ?package(dpsearch):needs="X11|text|vc|wm" section="Apps/see-menu-manual"\ 2 | title="dpsearch" command="/usr/bin/dpsearch" 3 | -------------------------------------------------------------------------------- /test/test-crc-multi/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | abcdefghijklmnopqrstuvwxyz 3 | 4 | Ford motor company 5 | -------------------------------------------------------------------------------- /doc/dpsearch-htmlparser-bodypatterns.ru.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/doc/dpsearch-htmlparser-bodypatterns.ru.html -------------------------------------------------------------------------------- /test/test-charset/htdocs/JP.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 全文検索エンジン 4 | 5 | 6 | -------------------------------------------------------------------------------- /test/test-charset/htdocs/segmented.gb2312.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-charset/htdocs/segmented.gb2312.txt -------------------------------------------------------------------------------- /test/test-charset/htdocs/segmented.utf-8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-charset/htdocs/segmented.utf-8.txt -------------------------------------------------------------------------------- /test/test-charset/htdocs/unsegmented.gb2312.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-charset/htdocs/unsegmented.gb2312.txt -------------------------------------------------------------------------------- /test/test-charset/htdocs/unsegmented.utf-8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maxime2/dataparksearch/HEAD/test/test-charset/htdocs/unsegmented.utf-8.txt -------------------------------------------------------------------------------- /test/test-mailto/htdocs/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Report bugs here 4 | 5 | 6 | -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- 1 | DataparkSearch Installation notes 2 | ================================= 3 | 4 | See documentation in doc subdirectory in DataparkSearch distribution. 5 | -------------------------------------------------------------------------------- /debian/dpsearch.500mod_dpsearch.info: -------------------------------------------------------------------------------- 1 | LoadModule dpsearch_module /usr/lib/apache/1.3/mod_dpsearch.so 2 | Description: Support for the dpsearch search engine 3 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body10.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | abasements 12 | -------------------------------------------------------------------------------- /test/test-sections2/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | test2 4 | 5 | 6 | Just a simple HTML file 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-sections2/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | test3 4 | 5 | 6 | Just a simple HTML file 7 | 8 | 9 | -------------------------------------------------------------------------------- /debian/dpsearch.postrm.debhelper: -------------------------------------------------------------------------------- 1 | # Automatically added by dh_makeshlibs 2 | if [ "$1" = "remove" ]; then 3 | ldconfig 4 | fi 5 | # End automatically added section 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body11.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | aba 13 | -------------------------------------------------------------------------------- /test/test-include/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | -------------------------------------------------------------------------------- /test/test-revalias/dict.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.docsize FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.docsize,dict.intag; 3 | -------------------------------------------------------------------------------- /create/Makefile.am: -------------------------------------------------------------------------------- 1 | #SUBDIRS= db2 ibase msql mssql mysql oracle pgsql sapdb solid sybase virtuoso sqlite mimer 2 | SUBDIRS= ibase mssql mysql oracle pgsql sqlite mimer 3 | -------------------------------------------------------------------------------- /debian/dpsearch.postinst.debhelper: -------------------------------------------------------------------------------- 1 | # Automatically added by dh_makeshlibs 2 | if [ "$1" = "configure" ]; then 3 | ldconfig 4 | fi 5 | # End automatically added section 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body12.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | -------------------------------------------------------------------------------- /test/test-hrefonly/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | -------------------------------------------------------------------------------- /doc/README.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | You should have jade/openjade installed on your system to build 4 | DataparkSearch documentation in html format. 5 | 6 | 7 | -------------------------------------------------------------------------------- /test/test-follow/htdocs2/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | S2 t2 4 | 5 | 6 | Ss2 b2. Site1. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body13.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | -------------------------------------------------------------------------------- /test/test-multi/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 3 title 4 | 5 | 6 | This is the third test. No more tests available. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/refresh.html: -------------------------------------------------------------------------------- 1 | 2 | Redirect 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 3 title 4 | 5 | 6 | This is the third test. No more tests available. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-crc-multi/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 3 title 4 | 5 | 6 | This is the third test. No more tests available. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-hrefonly/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 3 title 4 | 5 | 6 | This is the third test. No more tests available. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-include/htdocs/test3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 3 title 4 | 5 | 6 | This is the third test. No more tests available. 7 | 8 | 9 | -------------------------------------------------------------------------------- /debian/README: -------------------------------------------------------------------------------- 1 | The Debian Package dpsearch 2 | ---------------------------- 3 | 4 | Comments regarding the Package 5 | 6 | -- Software Tue, 14 Nov 2006 11:54:56 -0500 7 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body14.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | -------------------------------------------------------------------------------- /test/test-quotes/htdocs/test1.txt: -------------------------------------------------------------------------------- 1 | This is a text file. No title available. 2 | 3 | Some special characters: " '. ' . ' . ' . ' " . " . " . " \ \ \ \ \ 4 | 5 | Let's check how they get escaped. 6 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body15.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | -------------------------------------------------------------------------------- /test/test-include/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page. Here is the second one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-include/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 2 title 4 | 5 | 6 | This is the second test page. Here is the third one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 2 title 4 | 5 | 6 | This is the second test page. Here is the third one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /debian/README.Debian: -------------------------------------------------------------------------------- 1 | dpsearch for Debian 2 | ------------------- 3 | 4 | 5 | 6 | -- Software Tue, 14 Nov 2006 11:54:56 -0500 7 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body16.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | body16 17 | -------------------------------------------------------------------------------- /test/test-hrefonly/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page. Here is the second one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-hrefonly/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 2 title 4 | 5 | 6 | This is the second test page. Here is the third one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-notfound/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page. Here is the second one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-notfound/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 2 title 4 | 5 | 6 | This is the second test page. Here is the third one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/z-clone2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 2 title 4 | 5 | 6 | This is the second test page. Here is the third one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-notmodified/htdocs/test4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 4 4 | 5 | 6 | 7 | 8 | test4 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body17.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | body16 17 | body17 18 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/ispattern.html: -------------------------------------------------------------------------------- 1 | 2 | isPattern 3 | 4 | 5 | @twitter 6 | #trend 7 | ++k 8 | trend# 9 | twitter@gmail.com 10 | +1 11 | c++ 12 | l'orex 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/ispattern.html: -------------------------------------------------------------------------------- 1 | 2 | isPattern 3 | 4 | 5 | @twitter 6 | #trend 7 | ++k 8 | trend# 9 | twitter@gmail.com 10 | +1 11 | c++ 12 | l'orex 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body18.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | body16 17 | body17 18 | body18 19 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page.  & Here is the second one. 7 | 8 | 9 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body19.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | body16 17 | body17 18 | body18 19 | body19 20 | -------------------------------------------------------------------------------- /create/pgsql/crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ndict ( 2 | url_id int4 DEFAULT 0 NOT NULL, 3 | word_id int4 DEFAULT 0 NOT NULL, 4 | intag int4 DEFAULT 0 NOT NULL 5 | ); 6 | 7 | CREATE INDEX n_url ON ndict (url_id); 8 | CREATE INDEX n_word ON ndict (word_id); 9 | -------------------------------------------------------------------------------- /etc/acronym/fr.acr: -------------------------------------------------------------------------------- 1 | # 2 | # French acronyms 3 | # Compiled by Maxim Zakharov 4 | # 5 | Language: fr 6 | Charset: latin1 7 | # 8 | c' ca 9 | c' ce 10 | # 11 | l' la 12 | l' le 13 | # 14 | m' me 15 | # 16 | qu' que 17 | # 18 | t' te 19 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body20.txt: -------------------------------------------------------------------------------- 1 | body1 2 | body2 3 | body3 4 | body4 5 | body5 6 | body6 7 | body7 8 | body8 9 | body9 10 | body10 11 | body11 12 | body12 13 | body13 14 | body14 15 | body15 16 | body16 17 | body17 18 | body18 19 | body19 20 | body20 21 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 5 title 4 | 5 | 6 | THAT is out of BodyBrackets. 7 | 8 | THAT is inside BodyBrackets. 9 | 10 | And THAT is also out. 11 | 12 | 13 | -------------------------------------------------------------------------------- /create/mysql/crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ndict ( 2 | url_id int(11) DEFAULT 0 NOT NULL, 3 | word_id int(11) DEFAULT 0 NOT NULL, 4 | intag int(11) DEFAULT 0 NOT NULL 5 | ); 6 | 7 | CREATE INDEX n_url ON ndict (url_id); 8 | CREATE INDEX n_word ON ndict (word_id); 9 | -------------------------------------------------------------------------------- /create/sqlite/crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ndict ( 2 | url_id int DEFAULT 0 NOT NULL, 3 | word_id int DEFAULT 0 NOT NULL, 4 | intag int DEFAULT 0 NOT NULL 5 | ); 6 | CREATE INDEX key_ndict_url_id ON ndict (url_id); 7 | CREATE INDEX key_ndict_word ON ndict (word_id); 8 | -------------------------------------------------------------------------------- /test/test-boolean/htdocs/body-not-near.txt: -------------------------------------------------------------------------------- 1 | body19 2 | body1 3 | body2 4 | body3 5 | body4 6 | body5 7 | body6 8 | body7 9 | body8 10 | body9 11 | body10 12 | body11 13 | body12 14 | body13 15 | body14 16 | body15 17 | body16 18 | body17 19 | body18 20 | body20 21 | abasements 22 | -------------------------------------------------------------------------------- /doc/samples/news.conf: -------------------------------------------------------------------------------- 1 | # 2 | # This is a sample for NEWs indexing 3 | # It indexes newsgroup 'udm' 4 | # 5 | 6 | DBAddr mysql://foo:bar@localhost/search/ 7 | 8 | 9 | # Add udm hierarchy of your news server: 10 | Server news://news.my.domain.com/udm 11 | 12 | Include sections.conf 13 | -------------------------------------------------------------------------------- /test/test-cached/cached.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=cache 4 | VarDir $(DPS_TEST_DIR)/var 5 | 6 | WrdFiles 1 7 | URLDataFiles 1 8 | 9 | CacheLogWords 1024 10 | CacheLogDels 1024 11 | 12 | LogsOnly yes 13 | LogLevel 5 14 | -------------------------------------------------------------------------------- /test/test-follow/htdocs1/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | S1 t1 4 | 5 | 6 | Ss1 bb1. Site2. 7 |
8 | an OK link. 9 |
10 | a stange link. 11 | 12 | 13 | -------------------------------------------------------------------------------- /test/test-sample/htdocs/test4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 4 title 4 | 5 | 6 | 7 | Now the fourth test here. The page contains some magic word 8 | and it is in the end of the body. The word is THIS. 9 | 10 | 11 | -------------------------------------------------------------------------------- /debian/dpsearch-default.ex: -------------------------------------------------------------------------------- 1 | # Defaults for dpsearch initscript 2 | # sourced by /etc/init.d/dpsearch 3 | # installed at /etc/default/dpsearch by the maintainer scripts 4 | 5 | # 6 | # This is a POSIX shell fragment 7 | # 8 | 9 | # Additional options that are passed to the Daemon. 10 | DAEMON_OPTS="" 11 | -------------------------------------------------------------------------------- /test/test-revalias/url0.res: -------------------------------------------------------------------------------- 1 | SQL>'FIELDS=OFF' 2 | SQL>'SELECT url, docsize, crc32 FROM url ORDER by docsize' 3 | http://server/twiki/bin/view/a 12 531628026 4 | http://server/twiki/bin/view/b 16 141563372 5 | http://server/twiki/bin/view/c 20 -1603867750 6 | http://server/twiki/bin/view/ 189 -1817918939 7 | SQL> 8 | -------------------------------------------------------------------------------- /test/test-sections/htdocs/directory/test-tz.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test Timezone 4 | 5 | 6 | 7 | 8 | TZ 9 |
10 | insurance 11 | 12 | 13 | -------------------------------------------------------------------------------- /debian/rules~: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | DEB_CONFIGURE_USER_FLAGS=-with-openssl --with-mysql --with-pgsql --enable-apache-module --with-readline --enable-idn 4 | 5 | include /usr/share/cdbs/1/rules/debhelper.mk 6 | include /usr/share/cdbs/1/class/autotools.mk 7 | 8 | # Add here any variable or target overrides you need. 9 | -------------------------------------------------------------------------------- /test/test-revalias/indexer1.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* */.svn/* 7 | AddType text/plain * 8 | Section body 1 256 9 | 10 | Server http://server/twiki/bin/view/ file:$(DPS_TEST_DIR)/htdocs/ 11 | -------------------------------------------------------------------------------- /test/test-revalias/url1.res: -------------------------------------------------------------------------------- 1 | SQL>'FIELDS=OFF' 2 | SQL>'SELECT url, docsize, crc32 FROM url ORDER by docsize' 3 | http://server/twiki/bin/view/a.txt 12 531628026 4 | http://server/twiki/bin/view/b.txt 16 141563372 5 | http://server/twiki/bin/view/c.txt 20 -1603867750 6 | http://server/twiki/bin/view/ 189 -1817918939 7 | SQL> 8 | -------------------------------------------------------------------------------- /create/ibase/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /create/mimer/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /create/mssql/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /create/mysql/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /create/pgsql/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /create/sqlite/drop.multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE dict2; 2 | DROP TABLE dict3; 3 | DROP TABLE dict4; 4 | DROP TABLE dict5; 5 | DROP TABLE dict6; 6 | DROP TABLE dict7; 7 | DROP TABLE dict8; 8 | DROP TABLE dict9; 9 | DROP TABLE dict10; 10 | DROP TABLE dict11; 11 | DROP TABLE dict12; 12 | DROP TABLE dict16; 13 | DROP TABLE dict32; 14 | -------------------------------------------------------------------------------- /test/test-boolean/search3.res: -------------------------------------------------------------------------------- 1 | Total results found: 8, Results: 1-8 2 | .1 -- http://site/body1.txt 3 | 2 -- http://site/body5.txt 4 | 3 -- http://site/body6.txt 5 | 4 -- http://site/body7.txt 6 | 5 -- http://site/body4.txt 7 | 6 -- http://site/body2.txt 8 | 7 -- http://site/body8.txt 9 | 8 -- http://site/body3.txt 10 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.6.0 4 | hooks: 5 | - id: check-yaml 6 | - id: end-of-file-fixer 7 | - repo: https://github.com/pocc/pre-commit-hooks 8 | rev: v1.3.5 9 | hooks: 10 | - id: clang-format 11 | args: [-i] 12 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/xhtml1-missing-doctype-and-xmlns.xhtml: -------------------------------------------------------------------------------- 1 | 2 | 3 | xhtml1-missing-doctype-and-xmlns.html 4 | 5 | 6 |

This is a simple bogus HTML doc with neither a doctype nor 7 | an xmlns attribute on its root element. 8 |

9 | 10 | 11 | -------------------------------------------------------------------------------- /create/mssql/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict2; 2 | DROP TABLE ndict3; 3 | DROP TABLE ndict4; 4 | DROP TABLE ndict5; 5 | DROP TABLE ndict6; 6 | DROP TABLE ndict7; 7 | DROP TABLE ndict8; 8 | DROP TABLE ndict9; 9 | DROP TABLE ndict10; 10 | DROP TABLE ndict11; 11 | DROP TABLE ndict12; 12 | DROP TABLE ndict16; 13 | DROP TABLE ndict32; 14 | -------------------------------------------------------------------------------- /create/mysql/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict2; 2 | DROP TABLE ndict3; 3 | DROP TABLE ndict4; 4 | DROP TABLE ndict5; 5 | DROP TABLE ndict6; 6 | DROP TABLE ndict7; 7 | DROP TABLE ndict8; 8 | DROP TABLE ndict9; 9 | DROP TABLE ndict10; 10 | DROP TABLE ndict11; 11 | DROP TABLE ndict12; 12 | DROP TABLE ndict16; 13 | DROP TABLE ndict32; 14 | -------------------------------------------------------------------------------- /create/pgsql/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict2; 2 | DROP TABLE ndict3; 3 | DROP TABLE ndict4; 4 | DROP TABLE ndict5; 5 | DROP TABLE ndict6; 6 | DROP TABLE ndict7; 7 | DROP TABLE ndict8; 8 | DROP TABLE ndict9; 9 | DROP TABLE ndict10; 10 | DROP TABLE ndict11; 11 | DROP TABLE ndict12; 12 | DROP TABLE ndict16; 13 | DROP TABLE ndict32; 14 | -------------------------------------------------------------------------------- /create/sqlite/drop.crc-multi.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE ndict2; 2 | DROP TABLE ndict3; 3 | DROP TABLE ndict4; 4 | DROP TABLE ndict5; 5 | DROP TABLE ndict6; 6 | DROP TABLE ndict7; 7 | DROP TABLE ndict8; 8 | DROP TABLE ndict9; 9 | DROP TABLE ndict10; 10 | DROP TABLE ndict11; 11 | DROP TABLE ndict12; 12 | DROP TABLE ndict16; 13 | DROP TABLE ndict32; 14 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/display.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Displaytest 4 | 5 | 6 | test 7 | Displaytest 8 |
Test1.
9 |
Test2.
10 |
dummytext
11 | 12 | 13 | -------------------------------------------------------------------------------- /test/test-sections/htdocs/test2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test2 4 | 5 | 8 | 9 | 10 | 2 11 |
12 | Phone: (8622) 62-12-12 13 | $##a 14 | $$#b 15 | 16 | 17 | -------------------------------------------------------------------------------- /doc/samples/minimal.conf: -------------------------------------------------------------------------------- 1 | # This is a minimal sample indexer config file 2 | 3 | DBAddr mysql://foo:bar@localhost/search/ 4 | 5 | Server http://localhost/ 6 | 7 | # Allow some known extensions and directory index 8 | Allow *.html *.htm *.shtml *.txt */ 9 | 10 | # Disallow everything else 11 | Disallow * 12 | 13 | Include sections.conf 14 | -------------------------------------------------------------------------------- /test/test-sections/htdocs/directory/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test Subdirectory 4 | 5 | 6 | 7 | 8 | 9 | Subdir 10 |
11 | insurance press 12 | 13 | 14 | -------------------------------------------------------------------------------- /test/test-mailto/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | 6 | CVSIgnore yes 7 | Disallow */CVS/* */.svn/* 8 | AddType text/plain *.txt 9 | AddType text/html *.html *.htm 10 | 11 | Section body 1 256 12 | Section title 2 128 13 | 14 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.bak 2 | *.bz2 3 | *.cgi 4 | *.deb 5 | *.la 6 | *.lo 7 | *.log 8 | *.o 9 | *.sql 10 | *~ 11 | .deps/ 12 | .libs/ 13 | .idea/ 14 | .svn 15 | Makefile 16 | autom4te.cache/ 17 | cached 18 | config.status 19 | dp.inc 20 | dpconv 21 | dpguesser 22 | dps_config.h 23 | dpurl2text 24 | indexer 25 | libtool 26 | searchd 27 | splitter 28 | stored 29 | dps-config 30 | run-splitter 31 | -------------------------------------------------------------------------------- /create/mysql/drop.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE url; 2 | DROP TABLE urlinfo; 3 | DROP TABLE dict; 4 | DROP TABLE crossdict; 5 | DROP TABLE categories; 6 | DROP TABLE qtrack; 7 | DROP TABLE qinfo; 8 | DROP TABLE server; 9 | DROP TABLE srvinfo; 10 | DROP TABLE links; 11 | DROP TABLE storedchk; 12 | DROP TABLE cachedchk; 13 | DROP TABLE cachedchk2; 14 | DROP TABLE robots; 15 | DROP TABLE cookies; 16 | -------------------------------------------------------------------------------- /create/sqlite/drop.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE url; 2 | DROP TABLE urlinfo; 3 | DROP TABLE dict; 4 | DROP TABLE crossdict; 5 | DROP TABLE categories; 6 | DROP TABLE qtrack; 7 | DROP TABLE qinfo; 8 | DROP TABLE server; 9 | DROP TABLE srvinfo; 10 | DROP TABLE links; 11 | DROP TABLE storedchk; 12 | DROP TABLE cachedchk; 13 | DROP TABLE cachedchk2; 14 | DROP TABLE robots; 15 | DROP TABLE cookies; 16 | -------------------------------------------------------------------------------- /debian/semantic.cache: -------------------------------------------------------------------------------- 1 | ;; Object debian/ 2 | ;; SEMANTICDB Tags save file 3 | (semanticdb-project-database-file "debian/" 4 | :tables (list 5 | (semanticdb-table "rules" 6 | :major-mode 'makefile-mode 7 | :tags 'nil 8 | :file "rules" 9 | ) 10 | ) 11 | :file "semantic.cache" 12 | :semantic-tag-version "2.0pre3" 13 | :semanticdb-version "2.0pre3" 14 | ) 15 | -------------------------------------------------------------------------------- /create/mssql/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int DEFAULT '0' NOT NULL, 3 | ref_id int DEFAULT '0' NOT NULL, 4 | word_id int DEFAULT '0' NOT NULL, 5 | intag int DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /create/sqlite/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int DEFAULT '0' NOT NULL, 3 | ref_id int DEFAULT '0' NOT NULL, 4 | word_id int DEFAULT '0' NOT NULL, 5 | intag int DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /test/test-mailto/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | -------------------------------------------------------------------------------- /test/test-quotes/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | -------------------------------------------------------------------------------- /create/ibase/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int4 DEFAULT '0' NOT NULL, 3 | ref_id int4 DEFAULT '0' NOT NULL, 4 | word_id int4 DEFAULT '0' NOT NULL, 5 | intag int4 DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /create/mimer/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int4 DEFAULT '0' NOT NULL, 3 | ref_id int4 DEFAULT '0' NOT NULL, 4 | word_id int4 DEFAULT '0' NOT NULL, 5 | intag int4 DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /create/pgsql/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int4 DEFAULT '0' NOT NULL, 3 | ref_id int4 DEFAULT '0' NOT NULL, 4 | word_id int4 DEFAULT '0' NOT NULL, 5 | intag int4 DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /test/test-notfound/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | -------------------------------------------------------------------------------- /test/test-complexbody/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Test title 5 | 6 | 7 | This is the complex body test page. 8 | 9 | 10 | -------------------------------------------------------------------------------- /test/test-follow/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32, hops; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | -------------------------------------------------------------------------------- /create/mysql/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int(11) DEFAULT '0' NOT NULL, 3 | ref_id int(11) DEFAULT '0' NOT NULL, 4 | word_id int(11) DEFAULT '0' NOT NULL, 5 | intag int(11) DEFAULT '0' NOT NULL 6 | ); 7 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id); 8 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id); 9 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id); 10 | -------------------------------------------------------------------------------- /create/oracle/cross-crc.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE ncrossdict ( 2 | url_id int DEFAULT '0' NOT NULL, 3 | ref_id int DEFAULT '0' NOT NULL, 4 | word_id int DEFAULT '0' NOT NULL, 5 | intag int DEFAULT '0' NOT NULL 6 | ) 7 | / 8 | CREATE INDEX ncrossdict_url_id ON ncrossdict (url_id) 9 | / 10 | CREATE INDEX ncrossdict_ref_id ON ncrossdict (ref_id) 11 | / 12 | CREATE INDEX ncrossdict_word_id ON ncrossdict (word_id) 13 | / 14 | -------------------------------------------------------------------------------- /create/pgsql/grant.txt: -------------------------------------------------------------------------------- 1 | GRANT ALL ON url,dict,robots,stopword,categories TO PUBLIC; 2 | 3 | GRANT ALL ON ndict TO PUBLIC; 4 | 5 | 6 | GRANT ALL ON ndict2,ndict3,ndict4,ndict5,ndict6,ndict7,ndict8,ndict9, 7 | ndict10,ndict11,ndict12,ndict16,ndict32 TO PUBLIC; 8 | 9 | GRANT ALL ON dict2,dict3,dict4,dict5,dict6,dict7,dict8,dict9,dict10, 10 | dict11,dict12,dict16,dict32 TO PUBLIC; 11 | 12 | 13 | GRANT ALL ON "qtrack" TO PUBLIC; 14 | -------------------------------------------------------------------------------- /perl/INSTALL: -------------------------------------------------------------------------------- 1 | INSTALLATION 2 | 3 | 1. Install DataparkSearch at your system. You need enable threadsless 4 | version of libdpsearch (use --enable-threadsless option for confirure). 5 | 6 | 7 | 2. Follow the standard installation procedure for Perl modules, which is to 8 | type the following commands: 9 | 10 | perl Makefile.PL 11 | make 12 | make test 13 | make install 14 | 15 | You'll probably need to do the last as root. 16 | -------------------------------------------------------------------------------- /src/dp-Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = . 2 | 3 | AM_CFLAGS = @CFLAGS@ -DDPS_CONF_DIR=\"@sysconfdir@\" -DDPS_VAR_DIR=\"@localstatedir@\" 4 | 5 | lib_LTLIBRARIES = libdp.la 6 | libdp_la_SOURCES = dp.c 7 | libdp_la_LDFLAGS = -release @DPS_BASE_VERSION@ 8 | 9 | 10 | EXTRA_DIST = dps_memcpy.inc 11 | 12 | include_HEADERS = \ 13 | ../include/dps_config.h 14 | 15 | 16 | INCLUDES = -I$(srcdir)/../include 17 | 18 | LIBS = @STATIC_LFLAGS@ \ 19 | @LIBS@ 20 | -------------------------------------------------------------------------------- /test/test-quotes/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* */.svn/* 7 | AddType text/plain *.txt 8 | AddType text/html *.html *.htm 9 | 10 | Section body 1 256 11 | Section title 2 128 12 | 13 | #MakePrefixes yes 14 | #MakeSuffixes yes 15 | MinWordLength 1 16 | 17 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 18 | -------------------------------------------------------------------------------- /create/mssql/drop.txt: -------------------------------------------------------------------------------- 1 | set quoted_identifier on; 2 | 3 | DROP TABLE "url"; 4 | DROP TABLE "urlinfo"; 5 | DROP TABLE "dict"; 6 | DROP TABLE "server"; 7 | DROP TABLE "srvinfo"; 8 | DROP TABLE "links"; 9 | DROP TABLE "categories"; 10 | DROP TABLE "crossdict"; 11 | DROP TABLE "qinfo"; 12 | DROP TABLE "qtrack"; 13 | DROP TABLE "storedchk"; 14 | DROP TABLE "cachedchk"; 15 | DROP TABLE "cachedchk2"; 16 | DROP TABLE "robots"; 17 | DROP TABLE "cookies"; 18 | -------------------------------------------------------------------------------- /etc/stopwords/ro.sl: -------------------------------------------------------------------------------- 1 | # 2 | # Romanian stopwords 3 | # From punBB language packs 4 | 5 | Language: ro 6 | Charset: iso-8859-1 7 | 8 | caci 9 | cea 10 | cel 11 | cind 12 | cit 13 | cum 14 | este 15 | fara 16 | fie 17 | foarte 18 | inca 19 | lor 20 | lui 21 | mele 22 | nici 23 | prea 24 | sale 25 | sint 26 | tale 27 | toate 28 | toti 29 | the 30 | and 31 | you 32 | that 33 | was 34 | for 35 | are 36 | with 37 | his 38 | they 39 | have 40 | this 41 | -------------------------------------------------------------------------------- /test/test-revalias1/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT url FROM url WHERE url LIKE '%1.php%'; 3 | SELECT url FROM url WHERE url LIKE '%2l.php%'; 4 | SELECT url FROM url WHERE url LIKE '%2r.php%'; 5 | SELECT url FROM url WHERE url LIKE '%2m.php%'; 6 | SELECT url FROM url WHERE url LIKE '%3l.php%'; 7 | SELECT url FROM url WHERE url LIKE '%3r.php%'; 8 | SELECT url FROM url WHERE url LIKE '%3m.php%'; 9 | SELECT url FROM url WHERE url LIKE '%sid=%'; 10 | -------------------------------------------------------------------------------- /create/ibase/drop.txt: -------------------------------------------------------------------------------- 1 | DROP GENERATOR rec_id_GEN; 2 | DROP TABLE url; 3 | DROP TABLE urlinfo; 4 | DROP TABLE dict; 5 | DROP TABLE server; 6 | DROP TABLE srvinfo; 7 | DROP TABLE links; 8 | DROP TABLE qtrack; 9 | DROP TABLE categories; 10 | DROP TABLE storedchk; 11 | DROP TABLE robots; 12 | DROP TABLE cachedchk; 13 | DROP TABLE cachedchk2; 14 | 15 | -- DROP TABLE qinfo; 16 | -- DROP TABLE ndict; 17 | -- DROP TABLE crossdict; 18 | -- DROP TABLE ncrossdict; 19 | -------------------------------------------------------------------------------- /create/mimer/drop.txt: -------------------------------------------------------------------------------- 1 | DROP TABLE url; 2 | DROP TABLE urlinfo; 3 | DROP TABLE dict; 4 | DROP TABLE server; 5 | DROP TABLE srvinfo; 6 | DROP TABLE links; 7 | DROP TABLE qtrack; 8 | DROP TABLE categories; 9 | DROP TABLE storedchk; 10 | DROP TABLE cachedchk; 11 | DROP TABLE cachedchk2; 12 | DROP TABLE robots; 13 | 14 | -- DROP TABLE qinfo; 15 | -- DROP TABLE ndict; 16 | -- DROP TABLE crossdict; 17 | -- DROP TABLE ncrossdict; 18 | -- DROP TABLE categories; 19 | -------------------------------------------------------------------------------- /create/pgsql/cross.txt: -------------------------------------------------------------------------------- 1 | CREATE TABLE crossdict ( 2 | url_id int4 DEFAULT '0' NOT NULL, 3 | ref_id int4 DEFAULT '0' NOT NULL, 4 | intag int4 DEFAULT '0' NOT NULL, 5 | word text DEFAULT '0' NOT NULL 6 | ); 7 | 8 | CREATE INDEX crossdict_url_id ON crossdict (url_id); 9 | CREATE INDEX crossdict_ref_id ON crossdict (ref_id); 10 | CREATE INDEX crossdict_word ON crossdict (word); 11 | CREATE INDEX crossdict_word_url_id ON crossdict (word, url_id); 12 | -------------------------------------------------------------------------------- /test/test-complexbody/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-follow/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | 4 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 5 | 6 | AddType text/html *.html *.htm 7 | 8 | Section body 1 256 9 | Section title 2 128 10 | 11 | DetectClones no 12 | 13 | CVSIgnore yes 14 | Disallow */CVS/* */.svn/* 15 | Alias http://site1/ file:$(DPS_TEST_DIR)/htdocs1/ 16 | Alias http://site2/ file:$(DPS_TEST_DIR)/htdocs2/ 17 | 18 | Server world http://site1/ 19 | -------------------------------------------------------------------------------- /test/test-charset/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY dict.word,url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-parsehtml/search-j.res: -------------------------------------------------------------------------------- 1 | {"responseData": {"query":"пятерка ten", 2 | "found":"1","first":"1","last":"1", 3 | "stat":"пятерка: 1 / 1, ten: 1 / 1", 4 | "stat_extended":" пятерка: 1, ten: 1", 5 | "stat_all":" пятерка: 1, ten: 1", 6 | "stat_short":"пятерка: 1, ten: 1", 7 | "results":[{"title":"This is a title in the middle . I'm wondering if this is working as expected.","url":"http://site/index.html","content":"One two Five Six Nine Ten пятёрка c++, c--, c#. l'espoire "}]}} 8 | -------------------------------------------------------------------------------- /test/test-notmodified/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, site_id, server_id, last_mod_time, url FROM url ORDER BY status, crc32, hops; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT ot,k FROM links ORDER BY ot,k; 6 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/no-newlines.html: -------------------------------------------------------------------------------- 1 | Jesse blah blah. blah blah <A HREF="html40-strict.html"> blah </A> blah blah blah 2 | -------------------------------------------------------------------------------- /doc/body-before.html: -------------------------------------------------------------------------------- 1 | 4 |
5 |
6 |
7 |
8 |
9 |   10 |
11 |
12 | -------------------------------------------------------------------------------- /test/test-revalias/dict.res: -------------------------------------------------------------------------------- 1 | SQL>'FIELDS=OFF' 2 | SQL>'SELECT dict.word,dict.intag,url.docsize FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.docsize,dict.intag' 3 | aa 65794 12 4 | aaa 131331 12 5 | aaaa 196868 12 6 | bb 65794 16 7 | bbb 131331 16 8 | bbbb 196868 16 9 | cc 65794 20 10 | ccc 131331 20 11 | cccc 196868 20 12 | b 65793 189 13 | txt 65795 189 14 | b.txt 65797 189 15 | c 131329 189 16 | txt 131331 189 17 | c.txt 131333 189 18 | a 196865 189 19 | txt 196867 189 20 | a.txt 196869 189 21 | SQL> 22 | -------------------------------------------------------------------------------- /doc/datapark.css: -------------------------------------------------------------------------------- 1 | BODY {margin-left: 6em; margin-right: 6em;} 2 | P {text-align: justify;} 3 | TABLE.CALSTABLE { margin-left: 4em; width: 86%; } 4 | .SECT1 { text-align: left; } 5 | .SECT2 { text-align: left; } 6 | .SECT3 { text-align: left; } 7 | .SECT4 { text-alogn: left; } 8 | .chapter, .CHAPTER{ text-align: left; } 9 | .programlisting, .PROGRAMLISTING { text-align: left; color: #4444FF; font-size: small;} 10 | .synopsis, .SYNOPSIS { text-align: left; color: #4444FF; } 11 | .APPLICATION { font-style: italic;} 12 | .TOC {margin-left: 4em;} 13 | -------------------------------------------------------------------------------- /create/oracle/drop.txt: -------------------------------------------------------------------------------- 1 | delimiter=/ 2 | DROP TABLE dict 3 | / 4 | DROP TABLE url 5 | / 6 | DROP TABLE urlinfo 7 | / 8 | DROP TABLE server 9 | / 10 | DROP TABLE links 11 | / 12 | DROP TABLE srvinfo 13 | / 14 | DROP TABLE categories 15 | / 16 | DROP TABLE storedchk 17 | / 18 | DROP TABLE cachedchk 19 | / 20 | DROP TABLE cachedchk2 21 | / 22 | DROP TABLE robots 23 | / 24 | DROP TABLE cookies 25 | / 26 | DROP SEQUENCE categories_seq 27 | / 28 | DROP SEQUENCE qtrack_seq 29 | / 30 | DROP SEQUENCE next_url_id 31 | / 32 | DROP SEQUENCE server_seq 33 | / 34 | -------------------------------------------------------------------------------- /debian/emacsen-remove.ex: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | # /usr/lib/emacsen-common/packages/remove/dpsearch 3 | 4 | FLAVOR=$1 5 | PACKAGE=dpsearch 6 | 7 | if [ ${FLAVOR} != emacs ]; then 8 | if test -x /usr/sbin/install-info-altdir; then 9 | echo remove/${PACKAGE}: removing Info links for ${FLAVOR} 10 | install-info-altdir --quiet --remove --dirname=${FLAVOR} /usr/info/dpsearch.info.gz 11 | fi 12 | 13 | echo remove/${PACKAGE}: purging byte-compiled files for ${FLAVOR} 14 | rm -rf /usr/share/${FLAVOR}/site-lisp/${PACKAGE} 15 | fi 16 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/ispattern.html: -------------------------------------------------------------------------------- 1 | 2 | isPattern 3 | 4 | 5 | ("Go.") (He did.) 6 | ("Go?") (He did.) 7 | U.S.A. is. 8 | U.S.A.? He. 9 | 3.4. 10 | c.d. 11 | etc.)' the. 12 | etc.)' The. 13 | 14 | 15 | @twitter 16 | #trend 17 | ++k 18 | trend# 19 | twitter@gmail.com 20 | +1 21 | c++ 22 | l'orex 23 | <3 24 | #books 25 | #amwriting 26 | @momentum 27 | Samuel 28 | cheers
Rhyl 29 | cheers
rhyl 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/dps_test-run: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | export DPS_TEST_ROOT=`pwd` 4 | export DPS_TEST="$DPS_TEST_ROOT/dps_test" 5 | export DPS_TEST_LOG="$DPS_TEST_ROOT/dps_test.log" 6 | export INDEXER="$DPS_TEST_ROOT/../src/indexer" 7 | export SEARCH="$DPS_TEST_ROOT/../src/search.cgi" 8 | export STOREDOC="$DPS_TEST_ROOT/../src/storedoc.cgi" 9 | export CACHED="$DPS_TEST_ROOT/../src/cached" 10 | export SPLITTER="$DPS_TEST_ROOT/../src/splitter" 11 | export SEARCHD="$DPS_TEST_ROOT/../src/searchd" 12 | export DPS_SHARE_DIR="$DPS_TEST_ROOT/../create/" 13 | 14 | $DPS_TEST $@ 15 | -------------------------------------------------------------------------------- /test/test-sections/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | TTT TTT TTT TTT 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | BBB BBB BBB BBB 12 | Second 13 |
Phone: +7 495 123 45 67 14 | 15 | 16 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/xhtml1-strict-missing-xmlns.xhtml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Virtual Library 8 | 9 | 10 |

This is a "minimal XHTML document", copied here more or 11 | less from 12 | PR-xhtml1-19991210.

14 | 15 | 16 | -------------------------------------------------------------------------------- /create/pgsql/drop.txt: -------------------------------------------------------------------------------- 1 | DROP table "url"; 2 | DROP table "urlinfo"; 3 | DROP table "dict"; 4 | DROP TABLE server; 5 | DROP TABLE links; 6 | DROP TABLE categories; 7 | DROP TABLE srvinfo; 8 | -- DROP FUNCTION clean_srvinfo(); 9 | DROP TABLE "qtrack"; 10 | DROP TABLE "qinfo"; 11 | DROP TABLE storedchk; 12 | DROP TABLE cachedchk; 13 | DROP TABLE cachedchk2; 14 | DROP TABLE robots; 15 | DROP TABLE cookies; 16 | -- DROP SEQUENCE url_rec_id_seq; 17 | -- DROP SEQUENCE categories_rec_id_seq; 18 | -- DROP SEQUENCE qtrack_rec_id_seq; 19 | -- DROP TRIGGER srvdel ON server; 20 | -------------------------------------------------------------------------------- /test/test-parsehtml/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, referrer, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT url.url,server.url FROM url,server WHERE url.site_id=server.rec_id ORDER BY url.url,server.url; 6 | 7 | SELECT url FROM url WHERE url='http://site/'; 8 | -------------------------------------------------------------------------------- /README.XML: -------------------------------------------------------------------------------- 1 | 2 | 3 | In order to produce documentation from XML source, 4 | these packages need to be installed: 5 | 6 | openjade 7 | dsssl-docbook-modular 8 | docbook-4.1 9 | docbook-tdg 10 | docbook-xml-4.1.2 11 | 12 | Optionaly it may happen you need to add this line: 13 | CATALOG "dsssl/modular/catalog" 14 | into file /usr/local/share/sgml/docbook/catalog. 15 | 16 | 17 | --------------- 18 | #!/bin/sh 19 | openjade -v -t sgml -wno-valid \ 20 | -d ./mnogo.dsl \ 21 | /usr/local/share/sgml/docbook/dsssl/modular/dtds/decls/xml.dcl \ 22 | book.xml 23 | -------------------------------------------------------------------------------- /test/test-include/include.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* */.svn/* 7 | 8 | AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e 9 | AddType text/html *.html *.htm 10 | 11 | Section body 1 256 12 | Section title 2 128 13 | Section meta.keywords 3 128 14 | Section meta.description 4 128 15 | 16 | Section url.file 6 0 17 | Section url.path 7 0 18 | Section url.host 8 0 19 | Section url.proto 9 0 20 | 21 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 22 | -------------------------------------------------------------------------------- /test/test-revalias1/htdocs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # See https://www.apertis.org/policies/coding_conventions/#code-formatting 2 | BasedOnStyle: GNU 3 | SortIncludes: Never 4 | AlwaysBreakAfterDefinitionReturnType: All 5 | BreakBeforeBinaryOperators: None 6 | BinPackParameters: false 7 | SpaceAfterCStyleCast: true 8 | PointerAlignment: Right 9 | # Our column limit is actually 80, but setting that results in clang-format 10 | # making a lot of dubious hanging-indent choices; disable it and assume the 11 | # developer will line wrap appropriately. clang-format will still check 12 | # existing hanging indents. 13 | ColumnLimit: 0 14 | -------------------------------------------------------------------------------- /etc/stopwords/nl.sl: -------------------------------------------------------------------------------- 1 | # 2 | # Dutch stop-list 3 | # Theo Vosse (theo@dmo.com) and Stan P. van de Burgt (stan@dmo.com) 4 | # 5 | 6 | Charset: latin1 7 | Language: nl 8 | 9 | 10 | de 11 | het 12 | van 13 | en 14 | een 15 | in 16 | dat 17 | te 18 | ik 19 | hij 20 | die 21 | is 22 | met 23 | ze 24 | was 25 | als 26 | aan 27 | er 28 | je 29 | ook 30 | dan 31 | of 32 | had 33 | bij 34 | wat 35 | uit 36 | nog 37 | hem 38 | tot 39 | zo 40 | zij 41 | zou 42 | we 43 | al 44 | dit 45 | wel 46 | kan 47 | hun 48 | nu 49 | zei 50 | men 51 | me 52 | mij 53 | zal 54 | heb 55 | hoe 56 | ons 57 | wij 58 | af 59 | -------------------------------------------------------------------------------- /test/test-boolean/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, pop_rank, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT u1.docsize,u2.docsize,u1.url,u2.url FROM url u1,url u2, links l WHERE u1.rec_id=l.ot AND u2.rec_id=l.k ORDER BY u1.docsize,u2.docsize; 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-cached/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, pop_rank, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT u1.docsize,u2.docsize,u1.url,u2.url FROM url u1,url u2, links l WHERE u1.rec_id=l.ot AND u2.rec_id=l.k ORDER BY u1.docsize,u2.docsize; 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-hrefonly/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | 4 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 5 | 6 | CVSIgnore yes 7 | Disallow */CVS/* */.svn/* 8 | AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e 9 | AddType text/html *.html *.htm 10 | 11 | Section body 1 256 12 | Section title 2 128 13 | Section meta.keywords 3 128 14 | Section meta.description 4 128 15 | Section url.file 6 0 16 | Section url.path 7 0 17 | Section url.host 8 0 18 | Section url.proto 9 0 19 | 20 | HrefOnly *.html 21 | 22 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 23 | -------------------------------------------------------------------------------- /test/test-sample/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, pop_rank, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT u1.docsize,u2.docsize,u1.url,u2.url FROM url u1,url u2, links l WHERE u1.rec_id=l.ot AND u2.rec_id=l.k ORDER BY u1.docsize,u2.docsize; 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-searchd/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, pop_rank, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT u1.docsize,u2.docsize,u1.url,u2.url FROM url u1,url u2, links l WHERE u1.rec_id=l.ot AND u2.rec_id=l.k ORDER BY u1.docsize,u2.docsize; 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/xhtml1-strict-minimal.xhtml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | Virtual Library 8 | 9 | 10 |

This is a "minimal XHTML document", copied here more or 11 | less from 12 | PR-xhtml1-19991210.

14 | 15 | 16 | -------------------------------------------------------------------------------- /doc/samples/ftpsearch.conf: -------------------------------------------------------------------------------- 1 | # This is indexer.conf sample for 'ftpsearch' mode. 2 | # Indexer will index only the URL but no the content 3 | # of the documents. 4 | # 5 | 6 | # Database parameters 7 | DBAddr mysql://foo:bar@localhost/search/ 8 | 9 | 10 | # Turn on indexing URL of the documents 11 | 12 | Section url.file 1 0 13 | Section url.path 2 0 14 | Section url.host 3 0 15 | Section url.proto 4 0 16 | 17 | 18 | # Add start URL 19 | Server ftp://ftp.cdrom.com/ 20 | 21 | 22 | # Retrieve only directory list, check other files. 23 | 24 | CheckOnly NoMatch */ 25 | HrefOnly */ 26 | 27 | Include sections.conf 28 | -------------------------------------------------------------------------------- /test/test-cache/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 3 | SELECT status, docsize, hops, crc32, pop_rank, site_id, charset_id, url FROM url ORDER BY status, crc32; 4 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 5 | SELECT u1.docsize,u2.docsize,u1.url,u2.url FROM url u1,url u2, links l WHERE u1.rec_id=l.ot AND u2.rec_id=l.k ORDER BY u1.docsize,u2.docsize; 6 | SELECT url FROM url WHERE url='http://site/'; 7 | -------------------------------------------------------------------------------- /BUGS: -------------------------------------------------------------------------------- 1 | * Occasionally coredumps on spell-checking some Russian words. This is aspell bug. 2 | Please build without aspell to avoid. Or try apply patches to 0.60.3 version, see: 3 | http://sourceforge.net/tracker/index.php?func=detail&aid=1276512&group_id=245&atid=100245 4 | http://sourceforge.net/tracker/index.php?func=detail&aid=1274670&group_id=245&atid=100245 5 | 6 | * PHP module is deep deep pre-alpha, doesn't works totally. 7 | 8 | See also: http://code.google.com/p/dataparksearch/issues/list (prefered place to put a new bug report) 9 | The old bug tracking system: http://www.dataparksearch.org/cgi-bin/bt.pl 10 | -------------------------------------------------------------------------------- /test/test-notmodified/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* 7 | AddType text/html *.html *.htm 8 | 9 | CollectLinks yes 10 | PopRankSkipSameSite no 11 | LogLevel 5 12 | #UseDateHeader force 13 | UseDateHeader yes 14 | 15 | Section body 1 256 16 | Section title 2 128 17 | Section meta.keywords 3 128 18 | Section meta.description 4 128 19 | 20 | Section url.file 6 0 21 | Section url.path 7 0 22 | Section url.host 8 0 23 | Section url.proto 9 0 24 | 25 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 26 | -------------------------------------------------------------------------------- /debian/dpsearch.doc-base.EX: -------------------------------------------------------------------------------- 1 | Document: dpsearch 2 | Title: Debian dpsearch Manual 3 | Author: 4 | Abstract: This manual describes what dpsearch is 5 | and how it can be used to 6 | manage online manuals on Debian systems. 7 | Section: unknown 8 | 9 | Format: debiandoc-sgml 10 | Files: /usr/share/doc/dpsearch/dpsearch.sgml.gz 11 | 12 | Format: postscript 13 | Files: /usr/share/doc/dpsearch/dpsearch.ps.gz 14 | 15 | Format: text 16 | Files: /usr/share/doc/dpsearch/dpsearch.text.gz 17 | 18 | Format: HTML 19 | Index: /usr/share/doc/dpsearch/html/index.html 20 | Files: /usr/share/doc/dpsearch/html/*.html 21 | -------------------------------------------------------------------------------- /test/test-revalias/indexer0.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* */.svn/* 7 | AddType text/plain * 8 | Section body 1 256 9 | 10 | ReverseAlias regex ^http://server/twiki/bin/view/([^.])\.(.*)txt$ http://server/twiki/bin/view/$1 11 | 12 | # Alias directory index without removing extensions 13 | 14 | Alias regex http://server/twiki/bin/view/$ file:$(DPS_TEST_DIR)/htdocs/ 15 | 16 | # Alias other files removing extensions 17 | 18 | Alias regex http://server/twiki/bin/view/(.{1,}) file:$(DPS_TEST_DIR)/htdocs/$1.txt 19 | 20 | Server http://server/twiki/bin/view/ 21 | -------------------------------------------------------------------------------- /test/test-searchd/searchd.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=cache 4 | VarDir $(DPS_TEST_DIR)/var 5 | 6 | WrdFiles 1 7 | URLDataFiles 1 8 | StoredFiles 1 9 | 10 | DoStore yes 11 | 12 | LogLevel 5 13 | 14 | DetectClones no 15 | AccentExtensions yes 16 | PreloadURLData yes 17 | 18 | Section body 1 256 19 | Section title 2 128 20 | Section meta.keywords 3 128 21 | Section meta.description 4 128 22 | 23 | Section url.file 6 0 24 | Section url.path 7 0 25 | Section url.host 8 0 26 | Section url.proto 9 0 27 | 28 | Section adate 10 256 "

(.*)

" "$1" 29 | Section none 11 256

(.*)

" "$1" 30 | -------------------------------------------------------------------------------- /doc/db2.xml: -------------------------------------------------------------------------------- 1 | 2 | IBM DB2 notes 3 | <indexterm><primary>DB2</primary><secondary>notes</secondary></indexterm> 4 | 5 | To successfully use 6 | DataparkSearch with IBM DB2 database, please 7 | add the following lines to Apache's 8 | httpd.conf: 9 | 10 | SetEnv DB2DIR /usr/IBMdb2/V7.1 11 | SetEnv DB2INSTANCE db2 12 | PassEnv DB2DIR DB2INSTANCE 13 | 14 | 15 | Where DB2DIR is the path to DB2 16 | installation. DB2INSTANCE is the DB2 instance that 17 | search.cgi should use. 18 | 19 | 20 | -------------------------------------------------------------------------------- /test/test-revalias1/query.res: -------------------------------------------------------------------------------- 1 | SQL>'FIELDS=OFF' 2 | SQL>'SELECT url FROM url WHERE url LIKE '%1.php%'' 3 | http://site/1.php 4 | SQL>'SELECT url FROM url WHERE url LIKE '%2l.php%'' 5 | http://site/2l.php?a=b 6 | SQL>'SELECT url FROM url WHERE url LIKE '%2r.php%'' 7 | http://site/2r.php?a=b 8 | SQL>'SELECT url FROM url WHERE url LIKE '%2m.php%'' 9 | http://site/2m.php?a=b&c=d 10 | SQL>'SELECT url FROM url WHERE url LIKE '%3l.php%'' 11 | http://site/3l.php?a=b&c=d 12 | SQL>'SELECT url FROM url WHERE url LIKE '%3r.php%'' 13 | http://site/3r.php?a=b&c=d 14 | SQL>'SELECT url FROM url WHERE url LIKE '%3m.php%'' 15 | http://site/3m.php?a=b&c=d 16 | SQL>'SELECT url FROM url WHERE url LIKE '%sid=%'' 17 | SQL> 18 | -------------------------------------------------------------------------------- /test/test-revalias1/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 4 | 5 | CVSIgnore yes 6 | Disallow */CVS/* 7 | 8 | AddType text/html *.html *.htm 9 | 10 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 11 | 12 | # sid is the only argument 13 | ReverseAlias regex (http://[^?]*)[?]sid=[^&]*$ $1 14 | 15 | # sid is the first argument but not the last 16 | ReverseAlias regex (http://[^?]*[?])sid=[^&]*&(.*) $1$2 17 | 18 | # sid is not the first argument and doesn't matter whether the last 19 | ReverseAlias regex (http://.*)&sid=[^&]*(.*) $1$2 20 | 21 | ReverseAlias regex file\=(http://.*\.mp3) $1 22 | ReverseAlias http://avril.ru/flashplayer.swf?file=http:// http:// 23 | -------------------------------------------------------------------------------- /test/README: -------------------------------------------------------------------------------- 1 | 2 | This directory contains a test suite for DataparkSearch Engine. 3 | To run it, please 4 | 5 | 1. Compile DataparkSearch with any SQL database support. 6 | 7 | 2. Set DPS_TEST_DBADDR environment variable specifying the database 8 | you want to run tests with, in format the same with DBAddr 9 | indexer.conf command. 10 | 11 | For example, 12 | 13 | export DPS_TEST_DBADDR=mysql://root@localhost/test/ 14 | 15 | 16 | or 17 | 18 | export DPS_TEST_DBADDR=sqlite://localhost/home/bar/sqlite.db/ 19 | 20 | 21 | You don't need to create tables, test program will do it itself. 22 | 23 | NOTE: all existing data will be lost in this database 24 | after running the tests. 25 | 26 | 27 | 3. Run "make check" 28 | -------------------------------------------------------------------------------- /debian/watch.ex: -------------------------------------------------------------------------------- 1 | # Example watch control file for uscan 2 | # Rename this file to "watch" and then you can run the "uscan" command 3 | # to check for upstream updates and more. 4 | # See uscan(1) for format 5 | 6 | # Compulsory line, this is a version 3 file 7 | version=3 8 | 9 | # Uncomment to examine a Webpage 10 | # 11 | #http://www.example.com/downloads.php dpsearch-(.*)\.tar\.gz 12 | 13 | # Uncomment to examine a Webserver directory 14 | #http://www.example.com/pub/dpsearch-(.*)\.tar\.gz 15 | 16 | # Uncommment to examine a FTP server 17 | #ftp://ftp.example.com/pub/dpsearch-(.*)\.tar\.gz debian uupdate 18 | 19 | # Uncomment to find new files on sourceforge, for debscripts >= 2.9 20 | # http://sf.net/dpsearch/dpsearch-(.*)\.tar\.gz 21 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/html40-strict.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | test HTML 4.0 Strict document 6 | 7 | 9 | 10 | 11 | 12 | 13 |

14 | This is a test HTML 4.0 Strict document. 15 |

16 | 17 |

18 | See: W3C HTML Validation Service: Tests 19 |

20 | 21 |
22 | 23 |
24 | valid HTML
25 | Gerald Oskoboiny
26 | $Date: 1999/09/17 08:49:22 $ 27 |
28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/bogus-fpi.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | test document with a bogus FPI 6 | 7 | 9 | 10 | 11 | 12 | 13 |

14 | This is a test document with a bogus FPI. 15 |

16 | 17 |

18 | See: W3C HTML Validation Service: Tests 19 |

20 | 21 |
22 | 23 |
24 | valid HTML
25 | Gerald Oskoboiny
26 | $Date: 1999/09/27 10:04:58 $ 27 |
28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/xhtml1-blank-1st-line.xhtml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | Virtual Library 9 | 10 | 11 |

This is a "minimal XHTML document", copied here more or 12 | less from 13 | PR-xhtml1-19991210, 15 | with an extra blank line added at the top to test this 17 | case.

18 | 19 | 20 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/html40-transitional.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | test HTML 4.0 Transitional document 6 | 7 | 9 | 10 | 11 | 12 | 13 |

14 | This is a test HTML 4.0 Transitional document. 15 |

16 | 17 |

18 | See: W3C HTML Validation Service: Tests 19 |

20 | 21 |
22 | 23 |
24 | valid HTML
25 | Gerald Oskoboiny
26 | $Date: 1999/09/17 08:49:22 $ 27 |
28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /doc/vary.xml: -------------------------------------------------------------------------------- 1 | 2 | Multilingual servers support 3 | CommandVaryLang 4 | 5 | Some web-servers can handle language negotiation for documents language. 6 | In this case, for one URL exist several copies in different languages. 7 | 8 | 9 | For indexing all pages of such servers, VaryLang command is used. 10 | It specify list of languages separated by spaces. 11 | These languages will used for indexing URL with multi-language versions. 12 | 13 | 14 | Usage example: 15 | 16 | VaryLang "ru en fr" 17 | 18 | 19 | index will fetch all document copies in Russian, English and French languages. 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /test/test-notfound/indexer.conf: -------------------------------------------------------------------------------- 1 | # Bug id #365 2 | # I noticed that it fails after getting an Object Not Found HTTP Error. 3 | # It starts normally until it gets a not found object. 4 | 5 | # 6 | # There is no test3.html in this test 7 | # It is referenced to from test2.html 8 | # 9 | 10 | ImportEnv DPS_TEST_DBADDR0 11 | ImportEnv DPS_TEST_DIR 12 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=single 13 | 14 | AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e 15 | AddType text/html *.html *.htm 16 | 17 | CVSIgnore yes 18 | Disallow */CVS/* 19 | 20 | Section body 1 256 21 | Section title 2 128 22 | Section meta.keywords 3 128 23 | Section meta.description 4 128 24 | 25 | Section url.file 6 0 26 | Section url.path 7 0 27 | Section url.host 8 0 28 | Section url.proto 9 0 29 | 30 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 31 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | dist: 3 | - precise 4 | - trusty 5 | before_script: 6 | - sudo apt-get update 7 | # `sudo apt-get install postgresql` fails with `Refused to start PostgreSQL 9.5, because PostgreSQL 9.1 is currently running! You should first stop 9.1 instance...` 8 | - sudo service postgresql stop 9 | - sudo apt-get install --yes postgresql 10 | # necessary for packaging `make` targets: 11 | - sudo apt-get install dh-make libegl1-mesa:amd64 libgl1-mesa-glx:i386 libaspell15 sgml-data docbook docbook-xml opensp docbook-dsssl libc6-i386 libosp5 libc6-x32 libc6:i386 libbsd-dev metacity-common libaspell-dev 12 | script: 13 | - export DPS_TEST_DBADDR="mysql://localhost/test/;pgsql://root@/root/" 14 | - ./bootstrap && ./configure && make -j8 && make -j8 check && sudo make install 15 | - make bin-dist 16 | - make pkg-deb 17 | - make pkg-rpm 18 | -------------------------------------------------------------------------------- /charset-Makefile.am: -------------------------------------------------------------------------------- 1 | ## Process this file with automake to produce Makefile.in 2 | AUTOMAKE_OPTIONS = foreign no-installinfo 3 | SUBDIRS = src 4 | 5 | # copy dirs w/out Makefile.am into dist 6 | dist-hook: 7 | # mkdir $(distdir)/include 8 | cp -r $(srcdir)/include/ $(distdir)/include/ 9 | rm -f $(distdir)/include/dps_config.h $(distdir)/include/stamp.h 10 | 11 | install-data-local: 12 | $(mkinstalldirs) $(localstatedir) 13 | 14 | uninstall-local: 15 | -rmdir $(localstatedir) 16 | -rmdir $(DESTDIR)$(bindir) 17 | -rmdir $(DESTDIR)$(sbindir) 18 | -rmdir $(DESTDIR)$(libdir) 19 | -rmdir $(DESTDIR)$(includedir) 20 | -rmdir $(DESTDIR)$(prefix) 21 | 22 | EXTRA_DIST = LICENSE src/uconv-8bit.h src/sgml.entities src/uconv-8bit.inc src/sgml.inc bootstrap src/unidata.ch 23 | 24 | libtool: $(LIBTOOL_DEPS) 25 | $(SHELL) ./config.status --recheck 26 | -------------------------------------------------------------------------------- /perl/README: -------------------------------------------------------------------------------- 1 | !!! THIS IS NOT COMPLETE YET. DON'T USE !!! 2 | 3 | Dataparksearch-perl -- a libdpsearch interface for perl. 4 | 5 | Copyright (C) 2003-2004 Datapark corp. http://www.dataparksearch.org/ 6 | Copyright (C) 2000, Gecko. http://www.gecko.fr/ 7 | Copyright (C) 1999,2000 UdmSearch developers team 8 | 9 | You may distribute under the terms of the GNU General Public License 10 | as specified in the COPYING file. 11 | 12 | DESCRIPTION: 13 | ------------ 14 | 15 | Dataparksearch-perl provide perl interface for libdpsearch: 16 | 17 | - Full DBMode support include crc-multi, single, crc, multi and cache. 18 | - Support TrackQuery ( UDM_TRACK_QUERIES ). 19 | - Support Sort modes by rate or by date. 20 | - Support Search modes 'all', 'any' and 'bool'. 21 | - Support Affix & Spell 22 | - Partial template support 23 | -------------------------------------------------------------------------------- /debian/preinst.ex: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # preinst script for dpsearch 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | set -e 7 | 8 | # summary of how this script can be called: 9 | # * `install' 10 | # * `install' 11 | # * `upgrade' 12 | # * `abort-upgrade' 13 | # for details, see http://www.debian.org/doc/debian-policy/ or 14 | # the debian-policy package 15 | 16 | 17 | case "$1" in 18 | install|upgrade) 19 | ;; 20 | 21 | abort-upgrade) 22 | ;; 23 | 24 | *) 25 | echo "preinst called with unknown argument \`$1'" >&2 26 | exit 1 27 | ;; 28 | esac 29 | 30 | # dh_installdeb will replace this with shell code automatically 31 | # generated by other debhelper scripts. 32 | 33 | #DEBHELPER# 34 | 35 | exit 0 36 | -------------------------------------------------------------------------------- /test/test-follow/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex -v6 $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-hrefonly/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-include/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-mailto/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/html40-frameset.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | test HTML 4.0 Frameset document 6 | 7 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | <p> 18 | This is a test HTML 4.0 Frameset document. 19 | </p> 20 | 21 | <p> 22 | See: <a href="./">W3C HTML Validation Service: Tests</a> 23 | </p> 24 | 25 | <hr> 26 | 27 | <address> 28 | <a href="http://validator.w3.org/check/referer">valid HTML</a><br> 29 | <a href="/feedback.html">Gerald Oskoboiny</A><br> 30 | $Date: 1999/09/17 08:45:42 $ 31 | </address> 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /test/test-quotes/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-charset/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex -v 6 $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-notfound/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -Eindex -v5 $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-complexbody/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -v5 -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 13 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 14 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 15 | -------------------------------------------------------------------------------- /test/test-notmodified/test.cmd: -------------------------------------------------------------------------------- 1 | # 2 | # Tests that status 304 is returned 3 | # 4 | 5 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 6 | 7 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 8 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -v5 -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -v5 -Eindex -a $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 12 | 13 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 14 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 15 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 16 | -------------------------------------------------------------------------------- /etc/stored.conf-dist: -------------------------------------------------------------------------------- 1 | # 2 | # You need specify DBAddr command(s) to using stored db checkup 3 | #DBAddr mysql://localhost/search/?dbmode=cache 4 | # 5 | # Listen: Allows to bind searchd to specific IP address and/or port. 6 | # It is binded to all host addresses by default. 7 | # 8 | #Listen 7004 9 | #Listen 127.0.0.1:7004 10 | 11 | 12 | # Set non-standard /var directory 13 | # for cache mode and built-n database. 14 | # 15 | #VarDir /mnt/d/dpsearch/var/ 16 | 17 | 18 | # set number of file-stores in var/store for documents archiving 19 | # Note: this is *not* number of documents to store. 20 | # 21 | #StoredFiles 4096 22 | 23 | # try optimize one base every OptimizeIntervel seconds. 24 | # default value: 600 seconds 25 | # 26 | #OptimizeInterval 600 27 | 28 | # do optimize if defragmentation is great than OptimizeRatio percent 29 | # ATN: this is integer value 30 | # default value: 5 31 | #OptimizeRatio 5 32 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | DEB_CONFIGURE_USER_FLAGS=--with-pgsql --with-openssl --disable-apache-module --with-readline --enable-multidbaddr --datadir=$(DEB_CONFIGURE_DATADIR) --sbindir=$(DEB_CONFIGURE_SBINDIR) 4 | 5 | include /usr/share/cdbs/1/rules/debhelper.mk 6 | include /usr/share/cdbs/1/class/autotools.mk 7 | 8 | # Add here any variable or target overrides you need. 9 | DEB_CONFIGURE_SBINDIR = "\$${prefix}/bin" 10 | DEB_CONFIGURE_INCLUDEDIR = "\$${prefix}/include/dpsearch" 11 | DEB_CONFIGURE_SYSCONFDIR = "\$${prefix}/etc/dpsearch" 12 | DEB_CONFIGURE_LOCALSTATEDIR = "\$${prefix}/var/lib/dpsearch" 13 | DEB_CONFIGURE_DATADIR = "\$${prefix}/share/dpsearch" 14 | DEB_DH_STRIP_ARGS := --dbg-package=dpsearch 15 | 16 | # 17 | # Turned off the apache stuff. 18 | # 19 | #install/dpsearch:: 20 | # cp debian/dpsearch.500mod_dpsearch.info \ 21 | # debian/dpsearch/`apxs -q LIBEXECDIR`/500mod_dpsearch.info 22 | -------------------------------------------------------------------------------- /perl/examples/referers.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | use Dataparksearch; 5 | 6 | print "Content-type: text/html\n\n"; 7 | print < 9 | 10 | 11 | 12 | Stats 13 | 14 | 15 | 16 | 17 | EOF 18 | 19 | # EDIT - ME 20 | 21 | my $DBAddr = "pgsql://user:passwd\@localhost/search/?dbmode=cache"; 22 | 23 | # 24 | 25 | my $search = new Dataparksearch('DBAddr' => $DBAddr); 26 | my $stats = $search->GetReferers(); 27 | 28 | foreach my $line ( @$stats ){ 29 | printf(" \n", @$line) 30 | } 31 | 32 | print < 34 | 35 | 36 | EOF 37 | 38 | # Free all allocations 39 | $search->Free(); 40 | 41 | exit; 42 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage12.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage12 Within Three Months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Sun, 30 Apr 2006 23:06:00 EDT

11 |
12 |

rewrite 13 | 14 |

none 15 |

16 | 22 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage12.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage12 Within Three Months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Sun, 30 Apr 2006 23:06:00 EDT

11 |
12 |

rewrite 13 | 14 |

none 15 |

16 | 22 | -------------------------------------------------------------------------------- /test/test-multi/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=multi 4 | 5 | LocalCharset utf-8 6 | MinWordLength 1 7 | 8 | CVSIgnore yes 9 | Disallow */CVS/* */.svn/* 10 | 11 | AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e 12 | AddType text/html *.html *.htm 13 | 14 | Section body 1 256 15 | Section title 2 128 16 | Section meta.keywords 3 128 17 | Section meta.description 4 128 18 | 19 | Section url.file 6 0 20 | Section url.path 7 0 21 | Section url.host 8 0 22 | Section url.proto 9 0 23 | 24 | Section Charset 10 128 25 | Section Content-Language 11 128 26 | 27 | #Include langmap.conf 28 | 29 | LangMapFile langmap/ru.koi8-r.lm 30 | #LangMapFile langmap/ru.cp1251.lm 31 | #LangMapFile langmap/ru.utf8.lm 32 | #LangMapFile langmap/en.ascii.lm 33 | 34 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 35 | # 36 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage12.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage12 Within Three Months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Sun, 30 Apr 2006 23:06:00 EDT

11 |
12 |

rewrite 13 | 14 |

none 15 |

16 | 22 | -------------------------------------------------------------------------------- /test/test-crc-multi/indexer.conf: -------------------------------------------------------------------------------- 1 | ImportEnv DPS_TEST_DBADDR0 2 | ImportEnv DPS_TEST_DIR 3 | DBAddr $(DPS_TEST_DBADDR0)?dbmode=crc-multi 4 | 5 | LocalCharset utf-8 6 | MinWordLength 1 7 | 8 | CVSIgnore yes 9 | Disallow */CVS/* */.svn/* 10 | 11 | AddType text/plain *.txt *.pl *.js *.h *.c *.pm *.e 12 | AddType text/html *.html *.htm 13 | 14 | Section body 1 256 15 | Section title 2 128 16 | Section meta.keywords 3 128 17 | Section meta.description 4 128 18 | 19 | Section url.file 6 0 20 | Section url.path 7 0 21 | Section url.host 8 0 22 | Section url.proto 9 0 23 | 24 | Section Charset 10 128 25 | Section Content-Language 11 128 26 | 27 | #Include langmap.conf 28 | 29 | LangMapFile langmap/ru.koi8-r.lm 30 | LangMapFile langmap/ru.cp1251.lm 31 | LangMapFile langmap/ru.utf8.lm 32 | LangMapFile langmap/en.ascii.lm 33 | 34 | Server http://site/ file:$(DPS_TEST_DIR)/htdocs/ 35 | # 36 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage7 Search Body Content only 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

title1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage7 Search Body Content only 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

title1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage7.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage7 Search Body Content only 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

title1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage5 Phrase searching 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2006 23:06:00 EDT

11 |
12 |

American typo Insurance Group 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage5 Phrase searching 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2006 23:06:00 EDT

11 |
12 |

American typo Insurance Group 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage5.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage5 Phrase searching 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2006 23:06:00 EDT

11 |
12 |

American typo Insurance Group 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /doc/samples/local.conf: -------------------------------------------------------------------------------- 1 | # This is indexer.conf sample for 'local' mode. 2 | # Indexer will index only the URL but no the content 3 | # of the documents. 4 | # 5 | 6 | # Database parameters 7 | DBAddr mysql://foo:bar@localhost/search/ 8 | 9 | 10 | # Turn on indexing body, title, path and filename of the documents 11 | 12 | Section body 1 0 13 | Section title 2 0 14 | Section url.file 3 0 15 | Section url.path 4 0 16 | 17 | 18 | # Add start URL (the root of FS) 19 | Server file:/// 20 | 21 | 22 | # Add Content-type for several extensions 23 | AddType regex text/plain \.txt\.? 24 | AddType regex text/html \.htm\.? \.html\.? \.shtml\.? 25 | AddType regex text/xml \.xml\.? 26 | AddType regex audio/mpeg \.mp3\.? 27 | 28 | # Allow indexing for extensions specified above. 29 | Allow regex \.txt\.? \.htm\.? \.html\.? \.shtml\.? \.xml\.? \.mp3\.? 30 | 31 | # Retrieve only directory list, check other files. 32 | 33 | CheckOnly NoMatch */ 34 | HrefOnly */ 35 | -------------------------------------------------------------------------------- /test/test-revalias1/test.cmd: -------------------------------------------------------------------------------- 1 | #skip !0 testenv DPS_TEST_ROOT 2 | #skip !0 testenv DPS_TEST_DIR 3 | #skip !0 testenv DPS_TEST_DBADDR0 4 | #skip !0 testenv DPS_SHARE_DIR 5 | #skip !0 testenv INDEXER 6 | skip !0 exec $(INDEXER) -Echeck $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 7 | 8 | fail 20 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 9 | fail !0 exec $(INDEXER) -Ecreate $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 10 | fail !0 exec $(INDEXER) -v5 -Eindex $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 11 | fail !0 exec $(INDEXER) -v5 -Efilter $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 12 | fail !0 exec $(INDEXER) -Esqlmon $(DPS_TEST_DIR)/indexer.conf < $(DPS_TEST_DIR)/query.tst > $(DPS_TEST_DIR)/query.rej 2>&1 13 | fail !0 mdiff $(DPS_TEST_DIR)/query.rej $(DPS_TEST_DIR)/query.res 14 | fail !0 exec rm -f $(DPS_TEST_DIR)/query.rej 15 | pass 0 exec $(INDEXER) -Edrop $(DPS_TEST_DIR)/indexer.conf >> $(DPS_TEST_LOG) 2>&1 16 | -------------------------------------------------------------------------------- /include/dps_image.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003 Datapark corp. All rights reserved. 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | */ 17 | 18 | #ifndef _DPS_IMAGE_H 19 | #define _DPS_IMAGE_H 20 | 21 | extern int DpsGIFParse (DPS_AGENT *A, DPS_DOCUMENT *D); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /test/test-parsehtml/htdocs/xhtml1-strict.xhtml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | test XHTML 1.0 Strict document 8 | 9 | 11 | 12 | 13 | 14 | 15 |

16 | This is a test XHTML 1.0 Strict document. 17 |

18 | 19 |

20 | See: W3C HTML Validation Service: Tests 21 |

22 | 23 |
24 | 25 |
26 | valid HTML
27 | Gerald Oskoboiny
28 | $Date: 1999/12/06 05:52:07 $ 29 |
30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /perl/cat_ed/config.pl: -------------------------------------------------------------------------------- 1 | ## 2 | # Database configuration parameters 3 | # $dbport=3306; 4 | $dbhost='localhost'; 5 | $dbname='udm'; 6 | $dbuser='udm'; 7 | $dbpass='udm'; 8 | 9 | # $dbtype can be mysql, psql or oracle 10 | # $dbtype='psql'; 11 | $dbtype='mysql'; 12 | 13 | 14 | # name of root to add if database is empty 15 | $root_name='Home'; 16 | 17 | # Relative path with leading slash where unknown.gif and folder.gif located maybe empty) 18 | $images_path='/icons/'; 19 | 20 | # tree filling strategy : 0 - new records always inserted at the end of tree (old behavior) 21 | # 1 - new records inserted in first empty slot (new optimal strategy, DEFAULT) 22 | $fill=1; 23 | 24 | # debug output (0-no, 1-yes) 25 | $DEBUG=0; 26 | 27 | # arbitrary base for path and link field in categories table ,range: {9 to 36} 28 | # for example: 29 | # set to 16 (default) if you want 01..AA..FF values 30 | # set to 36 (this is max allowed value) if you want 01..AA..ZZ values 31 | $path_base=16; 32 | 33 | 1; 34 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage13.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage13 Within 6 months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Tue, 31 Jan 2006 23:06:00 EDT

11 |
12 |

rewrite 13 |
This is a real-time news story and may be updated in the near future.
14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage13.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage13 Within 6 months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Tue, 31 Jan 2006 23:06:00 EDT

11 |
12 |

rewrite 13 |
This is a real-time news story and may be updated in the near future.
14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage13.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage13 Within 6 months 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Tue, 31 Jan 2006 23:06:00 EDT

11 |
12 |

rewrite 13 |
This is a real-time news story and may be updated in the near future.
14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-multi/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page. Here is the second one. 7 | 1 8 | 22 9 | 333 10 | 4444 11 | 55555 12 | 666666 13 | 7777777 14 | 88888888 15 | 999999999 16 | AAAAAAAAAA 17 | BBBBBBBBBBB 18 | CCCCCCCCCCCC 19 | DDDDDDDDDDDDD 20 | EEEEEEEEEEEEEE 21 | FFFFFFFFFFFFFFF 22 | GGGGGGGGGGGGGGGG 23 | HHHHHHHHHHHHHHHHH 24 | IIIIIIIIIIIIIIIIII 25 | JJJJJJJJJJJJJJJJJJJ 26 | KKKKKKKKKKKKKKKKKKKK 27 | LLLLLLLLLLLLLLLLLLLLL 28 | MMMMMMMMMMMMMMMMMMMMMM 29 | NNNNNNNNNNNNNNNNNNNNNNN 30 | OOOOOOOOOOOOOOOOOOOOOOOO 31 | PPPPPPPPPPPPPPPPPPPPPPPPP 32 | QQQQQQQQQQQQQQQQQQQQQQQQQQ 33 | RRRRRRRRRRRRRRRRRRRRRRRRRRR 34 | SSSSSSSSSSSSSSSSSSSSSSSSSSSS 35 | TTTTTTTTTTTTTTTTTTTTTTTTTTTTT 36 | UUUUUUUUUUUUUUUUUUUUUUUUUUUUUU 37 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV 38 | WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW 39 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 40 | YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY 41 | ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ 42 | 43 | 44 | -------------------------------------------------------------------------------- /doc/aspell.xml: -------------------------------------------------------------------------------- 1 | 2 | Aspell 3 | Aspell 4 | 5 | With Aspell support compiled, it's possible automatically extend search query by spelling 6 | suggestions for query words. To enable this feature, you need to install Aspell at your system 7 | before DataparkSearch build. Then you need to place 8 | CommandAspellExtensions 9 | 10 | AspellExtensions yes 11 | 12 | command into your indexer.conf and search.htm (or into searchd.conf, if 13 | searchd is used) files to activate this feature. 14 | 15 | 16 | Automatically spelling suggestion for search query words is going only if search parameter is set, see 17 | . 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /test/test-crc-multi/htdocs/test1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Test 1 title 4 | 5 | 6 | This is the first test page. Here is the second one. 7 | 1 8 | 22 9 | 333 10 | 4444 11 | 55555 12 | 666666 13 | 7777777 14 | 88888888 15 | 999999999 16 | AAAAAAAAAA 17 | BBBBBBBBBBB 18 | CCCCCCCCCCCC 19 | DDDDDDDDDDDDD 20 | EEEEEEEEEEEEEE 21 | FFFFFFFFFFFFFFF 22 | GGGGGGGGGGGGGGGG 23 | HHHHHHHHHHHHHHHHH 24 | IIIIIIIIIIIIIIIIII 25 | JJJJJJJJJJJJJJJJJJJ 26 | KKKKKKKKKKKKKKKKKKKK 27 | LLLLLLLLLLLLLLLLLLLLL 28 | MMMMMMMMMMMMMMMMMMMMMM 29 | NNNNNNNNNNNNNNNNNNNNNNN 30 | OOOOOOOOOOOOOOOOOOOOOOOO 31 | PPPPPPPPPPPPPPPPPPPPPPPPP 32 | QQQQQQQQQQQQQQQQQQQQQQQQQQ 33 | RRRRRRRRRRRRRRRRRRRRRRRRRRR 34 | SSSSSSSSSSSSSSSSSSSSSSSSSSSS 35 | TTTTTTTTTTTTTTTTTTTTTTTTTTTTT 36 | UUUUUUUUUUUUUUUUUUUUUUUUUUUUUU 37 | VVVVVVVVVVVVVVVVVVVVVVVVVVVVVVV 38 | WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW 39 | XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX 40 | YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY 41 | ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ 42 | 43 | 44 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Testpage4 Stopwords and Phrase Search Check 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 25 May 2006 23:06:00 EDT

11 |
12 |

The government has a million dollar system. 13 |

The policy comes from the American Insurance Group, or AIG. 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Testpage4 Stopwords and Phrase Search Check 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 25 May 2006 23:06:00 EDT

11 |
12 |

The government has a million dollar system. 13 |

The policy comes from the American Insurance Group, or AIG. 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage4.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Testpage4 Stopwords and Phrase Search Check 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 25 May 2006 23:06:00 EDT

11 |
12 |

The government has a million dollar system. 13 |

The policy comes from the American Insurance Group, or AIG. 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /dp-Makefile.am: -------------------------------------------------------------------------------- 1 | ## Process this file with automake to produce Makefile.in 2 | ACLOCAL_AMFLAGS = -I m4 3 | AUTOMAKE_OPTIONS = foreign no-installinfo dist-bzip2 no-dist-gzip 4 | SUBDIRS = src 5 | 6 | # copy dirs w/out Makefile.am into dist 7 | dist-hook: 8 | rm -f $(distdir)/include/dps_config.h $(distdir)/include/stamp.h 9 | 10 | install-data-local: 11 | $(mkinstalldirs) $(localstatedir) 12 | 13 | uninstall-local: 14 | -rmdir $(localstatedir) 15 | -rmdir $(DESTDIR)$(bindir) 16 | -rmdir $(DESTDIR)$(sbindir) 17 | -rmdir $(DESTDIR)$(libdir) 18 | -rmdir $(DESTDIR)$(includedir) 19 | -rmdir $(DESTDIR)$(prefix) 20 | 21 | bin-dist: 22 | $(MAKE) install DESTDIR=$(CDIR)/$(PACKAGE)-$(VERSION) 23 | cd $(CDIR)/$(PACKAGE)-$(VERSION)$(prefix) \ 24 | && tar -ycPf $(PACKAGE)-$(VERSION)-$(DBTYPE)-$(host_triplet)-$(LINKING).tar.bz2 ./ 25 | mv $(CDIR)/$(PACKAGE)-$(VERSION)$(prefix)/*.tar.bz2 $(CDIR) 26 | rm -r $(CDIR)/$(PACKAGE)-$(VERSION) 27 | 28 | EXTRA_DIST = LICENSE bootstrap 29 | 30 | libtool: $(LIBTOOL_DEPS) 31 | $(SHELL) ./config.status --recheck 32 | -------------------------------------------------------------------------------- /debian/prerm.ex: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # prerm script for dpsearch 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | set -e 7 | 8 | # summary of how this script can be called: 9 | # * `remove' 10 | # * `upgrade' 11 | # * `failed-upgrade' 12 | # * `remove' `in-favour' 13 | # * `deconfigure' `in-favour' 14 | # `removing' 15 | # 16 | # for details, see http://www.debian.org/doc/debian-policy/ or 17 | # the debian-policy package 18 | 19 | 20 | case "$1" in 21 | remove|upgrade|deconfigure) 22 | ;; 23 | 24 | failed-upgrade) 25 | ;; 26 | 27 | *) 28 | echo "prerm called with unknown argument \`$1'" >&2 29 | exit 1 30 | ;; 31 | esac 32 | 33 | # dh_installdeb will replace this with shell code automatically 34 | # generated by other debhelper scripts. 35 | 36 | #DEBHELPER# 37 | 38 | exit 0 39 | -------------------------------------------------------------------------------- /include/dps_search_tl.h: -------------------------------------------------------------------------------- 1 | #ifndef _DPS_SEARCH_TL_H 2 | #define _DPS_SEARCH_TL_H 3 | 4 | /* 5 | * udm_search_tl.h from UdmSearch 6 | * (C) 2000 Kir , UdmSearch Developers Team 7 | */ 8 | 9 | /* FIXME: should be taken from template somehow */ 10 | #define DEFAULT_DT "back" 11 | #define DEFAULT_DP "0" 12 | #define DEFAULT_DX "1" 13 | #define DEFAULT_DM "0" 14 | #define DEFAULT_DD "1" 15 | #define DEFAULT_DY "2000" 16 | #define DEFAULT_DB "01/01/1999" 17 | #define DEFAULT_DE "31/12/2001" 18 | 19 | #include 20 | 21 | struct dps_stl_info_t 22 | { 23 | int type; 24 | time_t t1; 25 | time_t t2; 26 | }; 27 | 28 | /* Function prototypes */ 29 | 30 | /* converts string representation of time search type (option) 31 | * to integer 32 | */ 33 | int getSTLType (char *type_str); 34 | 35 | /* converts string in the form dd/mm/yyyy to time_t 36 | */ 37 | time_t dmy2time_t (char *time_str); 38 | 39 | /* converts day, month, year to time_t 40 | */ 41 | time_t d_m_y2time_t (int d, int m, int y); 42 | 43 | #endif /* _DPS_SEARCH_TL_H */ 44 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage6 title1 title2 title3 title4 title5 title6 title7 title8 title9 title10 title11 title12 title13 title14 title15 title16 title17 title18 title19 title20 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

machine1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage6 title1 title2 title3 title4 title5 title6 title7 title8 title9 title10 title11 title12 title13 title14 title15 title16 title17 title18 title19 title20 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

machine1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage6.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage6 title1 title2 title3 title4 title5 title6 title7 title8 title9 title10 title11 title12 title13 title14 title15 title16 title17 title18 title19 title20 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Wed, 24 May 2006 23:06:00 EDT

11 |
12 |

machine1 13 | 14 | 15 |

none 16 |

17 | 23 | -------------------------------------------------------------------------------- /include/dps_execget.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003 Datapark corp. All rights reserved. 2 | Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef _DPS_EXEC_GET_H 20 | #define _DPS_EXEC_GET_H 21 | 22 | int DpsExecGet (DPS_AGENT *Indexer, DPS_DOCUMENT *Doc); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /test/test-sections/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,last_mod_time,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag,dict.word; 3 | SELECT * FROM crossdict ORDER BY url_id,intag; 4 | SELECT dict.word,dict.intag,url.crc32,url.url,ref.url FROM crossdict dict, url, url ref WHERE url.rec_id=dict.url_id AND ref.rec_id=dict.ref_id ORDER BY url.crc32,dict.intag; 5 | 6 | SELECT dict.word,dict.intag,url.crc32,dict.url_id,dict.ref_id FROM crossdict dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 7 | 8 | #SELECT dict.word_id,dict.intag,url.crc32,url.url,ref.url FROM ncrossdict dict, url, url ref WHERE url.rec_id=dict.url_id AND ref.rec_id=dict.ref_id ORDER BY url.crc32,dict.itag; 9 | SELECT status, docsize, hops, crc32, last_mod_time, url FROM url ORDER BY status, crc32; 10 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 11 | 12 | SELECT url FROM url WHERE url='http://site/'; 13 | -------------------------------------------------------------------------------- /doc/accent.xml: -------------------------------------------------------------------------------- 1 | 2 | Accent insensitive search 3 | Accent insensitive search 4 | CommandAccentExtensions 5 | Since version 4.17 DataparkSearch also support an 6 | accent insensitive search. 7 | 8 | To enable this extension, use AccentExtensions command in your 9 | search.htm (or in searchd.conf, if searchd 10 | is used) to make automatically accent-free copies for query words, and in your indexer.conf 11 | config file to produce accent-free word's copies to store in database. 12 | 13 | AccentExtensions yes 14 | 15 | 16 | If AccentExtensions command is placed before Spell and Affix commands, 17 | accent-free copies for those data also will be loaded automaticaly. 18 | 19 | 20 | -------------------------------------------------------------------------------- /include/dps_filter.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2012 DataPark Ltd. All rights reserved. 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | */ 17 | 18 | #ifndef _DPS_FILTER_H 19 | #define _DPS_FILTER_H 20 | 21 | #include "dps_common.h" 22 | 23 | int DpsFilterFind (int log_level, DPS_MATCHLIST *L, const char *newhref, char *reason, int default_method); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /include/dps_mkind.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003 Datapark corp. All rights reserved. 2 | Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef _DPS_MKIND_H 20 | #define _DPS_MKIND_H 21 | 22 | extern __C_LINK int __DPSCALL DpsCacheMakeIndexes (DPS_AGENT *Indexer, DPS_DB *db); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /debian/postrm.ex: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # postrm script for dpsearch 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | set -e 7 | 8 | # summary of how this script can be called: 9 | # * `remove' 10 | # * `purge' 11 | # * `upgrade' 12 | # * `failed-upgrade' 13 | # * `abort-install' 14 | # * `abort-install' 15 | # * `abort-upgrade' 16 | # * `disappear' 17 | # 18 | # for details, see http://www.debian.org/doc/debian-policy/ or 19 | # the debian-policy package 20 | 21 | 22 | case "$1" in 23 | purge|remove|upgrade|failed-upgrade|abort-install|abort-upgrade|disappear) 24 | ;; 25 | 26 | *) 27 | echo "postrm called with unknown argument \`$1'" >&2 28 | exit 1 29 | ;; 30 | esac 31 | 32 | # dh_installdeb will replace this with shell code automatically 33 | # generated by other debhelper scripts. 34 | 35 | #DEBHELPER# 36 | 37 | exit 0 38 | -------------------------------------------------------------------------------- /test/test-sections2/query.tst: -------------------------------------------------------------------------------- 1 | FIELDS=OFF; 2 | SELECT dict.word,dict.intag,url.crc32,last_mod_time,url.url FROM dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag,dict.word; 3 | SELECT url_id,ref_id, intag, word FROM crossdict ORDER BY url_id,intag; 4 | SELECT dict.word,dict.intag,url.crc32,url.url,ref.url FROM crossdict dict, url, url ref WHERE url.rec_id=dict.url_id AND ref.rec_id=dict.ref_id ORDER BY url.crc32,dict.intag; 5 | 6 | SELECT dict.word,dict.intag,url.crc32,dict.url_id,dict.ref_id FROM crossdict dict, url WHERE url.rec_id=dict.url_id ORDER BY url.crc32,dict.intag; 7 | 8 | #SELECT dict.word_id,dict.intag,url.crc32,url.url,ref.url FROM ncrossdict dict, url, url ref WHERE url.rec_id=dict.url_id AND ref.rec_id=dict.ref_id ORDER BY url.crc32,dict.itag; 9 | SELECT status, docsize, hops, crc32, last_mod_time, rec_id, url FROM url ORDER BY status, crc32, rec_id; 10 | SELECT url.status,url.crc32,url.url,urlinfo.sname,urlinfo.sval FROM url,urlinfo WHERE url.rec_id=urlinfo.url_id ORDER BY url.status,url.crc32,lower(urlinfo.sname); 11 | 12 | SELECT url FROM url WHERE url='http://site/'; 13 | -------------------------------------------------------------------------------- /include/dps_alias.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 2 | 3 | This program is free software; you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation; either version 2 of the License, or 6 | (at your option) any later version. 7 | 8 | This program is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with this program; if not, write to the Free Software 15 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 16 | */ 17 | 18 | #ifndef _UDM_ALIAS_H 19 | #define _UDM_ALIAS_H 20 | 21 | #include "dps_common.h" 22 | 23 | extern __C_LINK int __DPSCALL DpsAliasProg (DPS_AGENT *Indexer, const char *alias_prog, const char *argument, char *res, size_t rsize); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage1 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY2 BODY3 BODY4 BODY5 BODY6 BODY7 BODY8 BODY9 BODY10 BODY11 BODY12 BODY13 BODY14 BODY15 BODY16 BODY17 BODY18 BODY19 BODY20 13 |

BODY21 BODY22 BODY23 BODY24 BODY25 BODY26 BODY27 BODY28 BODY29 BODY30 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage3 Even Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 26 May 2006 23:06:00 EDT

11 |
12 |

BODY2 BODY4 BODY6 BODY8 BODY10 BODY12 BODY14 BODY16 BODY18 BODY20 BODY22 BODY24 BODY26 BODY28 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage1 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY2 BODY3 BODY4 BODY5 BODY6 BODY7 BODY8 BODY9 BODY10 BODY11 BODY12 BODY13 BODY14 BODY15 BODY16 BODY17 BODY18 BODY19 BODY20 13 |

BODY21 BODY22 BODY23 BODY24 BODY25 BODY26 BODY27 BODY28 BODY29 BODY30 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage3 Even Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 26 May 2006 23:06:00 EDT

11 |
12 |

BODY2 BODY4 BODY6 BODY8 BODY10 BODY12 BODY14 BODY16 BODY18 BODY20 BODY22 BODY24 BODY26 BODY28 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage1 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY2 BODY3 BODY4 BODY5 BODY6 BODY7 BODY8 BODY9 BODY10 BODY11 BODY12 BODY13 BODY14 BODY15 BODY16 BODY17 BODY18 BODY19 BODY20 13 |

BODY21 BODY22 BODY23 BODY24 BODY25 BODY26 BODY27 BODY28 BODY29 BODY30 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage3 Even Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 26 May 2006 23:06:00 EDT

11 |
12 |

BODY2 BODY4 BODY6 BODY8 BODY10 BODY12 BODY14 BODY16 BODY18 BODY20 BODY22 BODY24 BODY26 BODY28 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage2 Odd Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY3 BODY5 BODY7 BODY9 BODY11 BODY13 BODY15 BODY17 BODY19 BODY21 BODY23 BODY25 BODY27 BODY29 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage2 Odd Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY3 BODY5 BODY7 BODY9 BODY11 BODY13 BODY15 BODY17 BODY19 BODY21 BODY23 BODY25 BODY27 BODY29 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage2 Odd Number of BODY's until 30 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Thu, 25 May 2006 23:06:00 EDT

11 |
12 |

BODY1 BODY3 BODY5 BODY7 BODY9 BODY11 BODY13 BODY15 BODY17 BODY19 BODY21 BODY23 BODY25 BODY27 BODY29 13 |

BODY30 BODY31 BODY32 BODY33 BODY34 BODY35 BODY36 BODY37 BODY38 BODY39 BODY40 14 | 15 | 16 |

none 17 |

18 | 24 | -------------------------------------------------------------------------------- /include/dps_env.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003-2005 Datapark corp. All rights reserved. 2 | Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef _DPS_ENV_H 20 | #define _DPS_ENV_H 21 | 22 | extern DPS_ENV *DpsEnvInit (DPS_ENV *); 23 | extern void DpsEnvFree (DPS_ENV *); 24 | extern char *DpsEnvErrMsg (DPS_ENV *); 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /debian/postinst.ex: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # postinst script for dpsearch 3 | # 4 | # see: dh_installdeb(1) 5 | 6 | set -e 7 | 8 | # summary of how this script can be called: 9 | # * `configure' 10 | # * `abort-upgrade' 11 | # * `abort-remove' `in-favour' 12 | # 13 | # * `abort-remove' 14 | # * `abort-deconfigure' `in-favour' 15 | # `removing' 16 | # 17 | # for details, see http://www.debian.org/doc/debian-policy/ or 18 | # the debian-policy package 19 | 20 | 21 | case "$1" in 22 | configure) 23 | ;; 24 | 25 | abort-upgrade|abort-remove|abort-deconfigure) 26 | ;; 27 | 28 | *) 29 | echo "postinst called with unknown argument \`$1'" >&2 30 | exit 1 31 | ;; 32 | esac 33 | 34 | # dh_installdeb will replace this with shell code automatically 35 | # generated by other debhelper scripts. 36 | 37 | #DEBHELPER# 38 | 39 | exit 0 40 | -------------------------------------------------------------------------------- /etc/stopwords.conf-dist: -------------------------------------------------------------------------------- 1 | StopwordFile stopwords/cz.sl 2 | StopwordFile stopwords/da.sl 3 | StopwordFile stopwords/de.sl 4 | #StopwordFile stopwords/de.top100.sl 5 | #StopwordFile stopwords/de.top1000.sl 6 | StopwordFile stopwords/en.big.sl 7 | StopwordFile stopwords/en.huge.sl 8 | StopwordFile stopwords/en.sl 9 | #StopwordFile stopwords/en.top100.sl 10 | #StopwordFile stopwords/en.top1000.sl 11 | StopwordFile stopwords/es.sl 12 | StopwordFile stopwords/fr.sl 13 | #StopwordFile stopwords/fr.top100.sl 14 | #StopwordFile stopwords/fr.top1000.sl 15 | StopwordFile stopwords/it.sl 16 | # Uncomment this if you have japanese charsets enabled 17 | #StopwordFile stopwords/ja.sl 18 | StopwordFile stopwords/lt.sl 19 | StopwordFile stopwords/nl.sl 20 | #StopwordFile stopwords/nl.top100.sl 21 | #StopwordFile stopwords/nl.top1000.sl 22 | StopwordFile stopwords/no.sl 23 | StopwordFile stopwords/pl.sl 24 | StopwordFile stopwords/pt.sl 25 | StopwordFile stopwords/ru.sl 26 | StopwordFile stopwords/sk.sl 27 | StopwordFile stopwords/tr.sl 28 | StopwordFile stopwords/uk.sl 29 | # Uncomment this if you have chinese charsets enabled 30 | #StopwordFile stopwords/zh.sl 31 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage10.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage10 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

aaaaaaaaaabbbbbbbbbbcccccccccc 13 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 17 | 18 |

none 19 |

20 | 26 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage10.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage10 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

aaaaaaaaaabbbbbbbbbbcccccccccc 13 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 17 | 18 |

none 19 |

20 | 26 | -------------------------------------------------------------------------------- /test/test-parsehtml/json.htm: -------------------------------------------------------------------------------- 1 | 22 | 23 | {"responseData": { 24 | 25 | 26 | 27 | "query":"$(q:json)", 28 | "found":"$(total:json)","first":"$(first:json)","last":"$(last:json)", 29 | "stat":"$(W:json)", 30 | "stat_extended":"$(WE:json)", 31 | "stat_all":"$(WA:json)", 32 | "stat_short":"$(WS:json)", 33 | "results":[ 34 | 35 | 36 | 37 | {"title":"$(title:json)","url":"$(url:json)","content":"$(body:json)"} 38 | 39 | 40 | 41 | ] 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | }} 58 | 59 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage10.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage10 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

aaaaaaaaaabbbbbbbbbbcccccccccc 13 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 17 | 18 |

none 19 |

20 | 26 | -------------------------------------------------------------------------------- /src/charset-Makefile.am: -------------------------------------------------------------------------------- 1 | SUBDIRS = . 2 | 3 | sbin_PROGRAMS= dpconv 4 | 5 | dpconv_SOURCES = dpconv.c 6 | dpconv_LDADD = libdpcharset.la @PTHREAD_LDADD@ 7 | dpconv_LDFLAGS = @PTHREAD_LFLAGS@ 8 | 9 | CFLAGS = @CFLAGS@ -DDPS_CONF_DIR=\"@sysconfdir@\" -DDPS_VAR_DIR=\"@localstatedir@\" 10 | 11 | lib_LTLIBRARIES = libdpcharset.la 12 | libdpcharset_la_SOURCES = uconv.c unidata.c sgml.c unicode.c xmalloc.c \ 13 | uconv-8bit.c uconv-alias.c uconv-big5.c uconv-big5hkscs.c uconv-eucjp.c uconv-euckr.c \ 14 | uconv-gb2312.c uconv-gbk.c uconv-sjis.c uconv-sys.c uconv-utf8.c uconv-gujarati.c \ 15 | uconv-tscii.c charset-utils.c 16 | libdpcharset_la_LDFLAGS = -release @DPS_BASE_VERSION@ 17 | 18 | sgml.c: sgml.inc 19 | 20 | sgml.inc: sgml.entities 21 | cat sgml.entities | sort -k 2 > sgml.inc 22 | 23 | 24 | include_HEADERS = \ 25 | ../include/dps_config.h \ 26 | ../include/dps_unicode.h \ 27 | ../include/dps_unidata.h \ 28 | ../include/dps_uniconv.h \ 29 | ../include/dps_sgml.h \ 30 | ../include/dps_charsetutils.h \ 31 | ../include/dps_filence.h \ 32 | ../include/dps_xmalloc.h 33 | 34 | 35 | INCLUDES = -I$(srcdir)/../include 36 | 37 | LIBS = @STATIC_LFLAGS@ \ 38 | @LIBS@ 39 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | This is dpsearch, written and maintained by Software 2 | on Tue, 14 Nov 2006 11:54:56 -0500. 3 | 4 | The original source can always be found at: 5 | ftp://ftp.debian.org/dists/unstable/main/source/ 6 | 7 | Copyright Holder: Software 8 | 9 | License: 10 | 11 | This program is free software; you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation; either version 2 of the License, or 14 | (at your option) any later version. 15 | 16 | This program is distributed in the hope that it will be useful, 17 | but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | GNU General Public License for more details. 20 | 21 | You should have received a copy of the GNU General Public License 22 | along with this package; if not, write to the Free Software 23 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 | 25 | On Debian systems, the complete text of the GNU General 26 | Public License can be found in `/usr/share/common-licenses/GPL'. 27 | -------------------------------------------------------------------------------- /include/dps_http.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003 Datapark corp. All rights reserved. 2 | Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef _DPS_HTTP_H 20 | #define _DPS_HTTP_H 21 | 22 | extern int DpsHTTPConnect (DPS_AGENT *Agent, DPS_CONN *connp, char *hostname, int port, int timeout); 23 | extern void DpsParseHTTPResponse (DPS_AGENT *Indexer, DPS_DOCUMENT *Doc); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /test/test-cache/htdocs/testpage11.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage11 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

amit amit amit 13 |

aaaaaaaaaabbbbbbbbbbcccccccccc 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 17 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 18 | 19 |

none 20 |

21 | 27 | -------------------------------------------------------------------------------- /test/test-cached/htdocs/testpage11.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage11 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

amit amit amit 13 |

aaaaaaaaaabbbbbbbbbbcccccccccc 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 17 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 18 | 19 |

none 20 |

21 | 27 | -------------------------------------------------------------------------------- /test/test-searchd/htdocs/testpage11.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | TestPage11 Long Words 6 | 7 | 8 |

The Wall Street Journal 9 |

10 |

Mon, 24 May 2005 23:06:00 EDT

11 |
12 |

amit amit amit 13 |

aaaaaaaaaabbbbbbbbbbcccccccccc 14 |

aaaaaaaaaabbbbbbbbbbccccccccccdd 15 |

aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd 16 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeee 17 |

aaaaaaaaaabbbbbbbbbbccccccccccddddddddddeeeeeeeeeeffffffffff 18 | 19 |

none 20 |

21 | 27 | -------------------------------------------------------------------------------- /include/dps_xmalloc.h: -------------------------------------------------------------------------------- 1 | /* Copyright (C) 2003-2011 DataPark Ltd. All rights reserved. 2 | Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved. 3 | 4 | This program is free software; you can redistribute it and/or modify 5 | it under the terms of the GNU General Public License as published by 6 | the Free Software Foundation; either version 2 of the License, or 7 | (at your option) any later version. 8 | 9 | This program is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | GNU General Public License for more details. 13 | 14 | You should have received a copy of the GNU General Public License 15 | along with this program; if not, write to the Free Software 16 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 | */ 18 | 19 | #ifndef _DPS_XMALLOC_H 20 | #define _DPS_XMALLOC_H 21 | 22 | #ifndef EFENCE 23 | 24 | #include 25 | 26 | extern void *DpsXmalloc (size_t size); 27 | extern void *DpsXrealloc (void *ptr, size_t newsize); 28 | 29 | #endif 30 | 31 | #endif 32 | --------------------------------------------------------------------------------
URLs and referers
\%d\%s\%s