├── LICENSE.txt
├── README.md
├── composer.json
└── library
    └── ZendSearch
        ├── Exception
            └── ExceptionInterface.php
        └── Lucene
            ├── AbstractFSM.php
            ├── AbstractPriorityQueue.php
            ├── Analysis
                ├── Analyzer
                │   ├── AbstractAnalyzer.php
                │   ├── Analyzer.php
                │   ├── AnalyzerInterface.php
                │   └── Common
                │   │   ├── AbstractCommon.php
                │   │   ├── Text.php
                │   │   ├── Text
                │   │       └── CaseInsensitive.php
                │   │   ├── TextNum.php
                │   │   ├── TextNum
                │   │       └── CaseInsensitive.php
                │   │   ├── Utf8.php
                │   │   ├── Utf8
                │   │       └── CaseInsensitive.php
                │   │   ├── Utf8Num.php
                │   │   └── Utf8Num
                │   │       └── CaseInsensitive.php
                ├── Token.php
                └── TokenFilter
                │   ├── LowerCase.php
                │   ├── LowerCaseUtf8.php
                │   ├── ShortWords.php
                │   ├── StopWords.php
                │   └── TokenFilterInterface.php
            ├── Document.php
            ├── Document
                ├── AbstractOpenXML.php
                ├── Docx.php
                ├── Exception
                │   ├── ExceptionInterface.php
                │   └── InvalidArgumentException.php
                ├── Field.php
                ├── HTML.php
                ├── Pptx.php
                └── Xlsx.php
            ├── Exception
                ├── ExceptionInterface.php
                ├── ExtensionNotLoadedException.php
                ├── InvalidArgumentException.php
                ├── InvalidFileFormatException.php
                ├── OutOfBoundsException.php
                ├── OutOfRangeException.php
                ├── RuntimeException.php
                ├── UnexpectedValueException.php
                └── UnsupportedMethodCallException.php
            ├── FSMAction.php
            ├── Index.php
            ├── Index
                ├── DictionaryLoader.php
                ├── DocsFilter.php
                ├── FieldInfo.php
                ├── SegmentInfo.php
                ├── SegmentMerger.php
                ├── SegmentWriter
                │   ├── AbstractSegmentWriter.php
                │   ├── DocumentWriter.php
                │   └── StreamWriter.php
                ├── Term.php
                ├── TermInfo.php
                ├── TermsPriorityQueue.php
                ├── TermsStreamInterface.php
                └── Writer.php
            ├── LockManager.php
            ├── Lucene.php
            ├── MultiSearcher.php
            ├── Search
                ├── BooleanExpressionRecognizer.php
                ├── Exception
                │   ├── ExceptionInterface.php
                │   └── QueryParserException.php
                ├── Highlighter
                │   ├── DefaultHighlighter.php
                │   └── HighlighterInterface.php
                ├── Query
                │   ├── AbstractQuery.php
                │   ├── Boolean.php
                │   ├── EmptyResult.php
                │   ├── Fuzzy.php
                │   ├── Insignificant.php
                │   ├── MultiTerm.php
                │   ├── Phrase.php
                │   ├── Preprocessing
                │   │   ├── AbstractPreprocessing.php
                │   │   ├── Fuzzy.php
                │   │   ├── Phrase.php
                │   │   └── Term.php
                │   ├── Range.php
                │   ├── Term.php
                │   └── Wildcard.php
                ├── QueryEntry
                │   ├── AbstractQueryEntry.php
                │   ├── Phrase.php
                │   ├── Subquery.php
                │   └── Term.php
                ├── QueryHit.php
                ├── QueryLexer.php
                ├── QueryParser.php
                ├── QueryParserContext.php
                ├── QueryToken.php
                ├── Similarity
                │   ├── AbstractSimilarity.php
                │   └── DefaultSimilarity.php
                └── Weight
                │   ├── AbstractWeight.php
                │   ├── Boolean.php
                │   ├── EmptyResultWeight.php
                │   ├── MultiTerm.php
                │   ├── Phrase.php
                │   └── Term.php
            ├── SearchIndexInterface.php
            ├── Storage
                ├── Directory
                │   ├── DirectoryInterface.php
                │   └── Filesystem.php
                └── File
                │   ├── AbstractFile.php
                │   ├── FileInterface.php
                │   ├── Filesystem.php
                │   └── Memory.php
            └── TermStreamsPriorityQueue.php


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2005-2012, Zend Technologies USA, Inc.
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without modification,
 5 | are permitted provided that the following conditions are met:
 6 | 
 7 |     * Redistributions of source code must retain the above copyright notice,
 8 |       this list of conditions and the following disclaimer.
 9 | 
10 |     * Redistributions in binary form must reproduce the above copyright notice,
11 |       this list of conditions and the following disclaimer in the documentation
12 |       and/or other materials provided with the distribution.
13 | 
14 |     * Neither the name of Zend Technologies USA, Inc. nor the names of its
15 |       contributors may be used to endorse or promote products derived from this
16 |       software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ZendSearch component
 2 | 
 3 | > ## UNMAINTAINED
 4 | >
 5 | > This package is no longer maintained.
 6 | 
 7 | You can install using:
 8 | 
 9 | ```
10 | curl -s https://getcomposer.org/installer | php
11 | php composer.phar install
12 | ```
13 | 
14 | At that point, follow the instructions in the documentation folder for actual
15 | usage of the component. (Documentation is forthcoming.)
16 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "zendframework/zendsearch",
 3 |     "description": "a general purpose text search engine written entirely in PHP 5",
 4 |     "type": "library",
 5 |     "license": "BSD-3-Clause",
 6 |     "keywords": [
 7 |         "zf2",
 8 |         "lucene"
 9 |     ],
10 |     "homepage": "http://packages.zendframework.com/",
11 |     "autoload": {
12 |         "psr-0": {
13 |             "ZendSearch": "library/"
14 |         }
15 |     },
16 |     "repositories": [
17 |         {
18 |             "type": "composer",
19 |             "url": "http://packages.zendframework.com/"
20 |         }
21 |     ],
22 |     "require": {
23 |         "php": ">=5.3.3",
24 |         "zendframework/zend-stdlib": "2.*"
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Exception/ExceptionInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Exception;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search
16 |  */
17 | interface ExceptionInterface
18 | {}
19 | 
20 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/AbstractPriorityQueue.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene;
 12 | 
 13 | /**
 14 |  * Abstract Priority Queue
 15 |  *
 16 |  * It implements a priority queue.
 17 |  * Please go to "Data Structures and Algorithms",
 18 |  * Aho, Hopcroft, and Ullman, Addison-Wesley, 1983 (corrected 1987 edition),
 19 |  * for implementation details.
 20 |  *
 21 |  * It provides O(log(N)) time of put/pop operations, where N is a size of queue
 22 |  *
 23 |  * @category   Zend
 24 |  * @package    Zend_Search_Lucene
 25 |  */
 26 | abstract class AbstractPriorityQueue
 27 | {
 28 |     /**
 29 |      * Queue heap
 30 |      *
 31 |      * Heap contains balanced partial ordered binary tree represented in array
 32 |      * [0] - top of the tree
 33 |      * [1] - first child of [0]
 34 |      * [2] - second child of [0]
 35 |      * ...
 36 |      * [2*n + 1] - first child of [n]
 37 |      * [2*n + 2] - second child of [n]
 38 |      *
 39 |      * @var array
 40 |      */
 41 |     private $_heap = array();
 42 | 
 43 | 
 44 |     /**
 45 |      * Add element to the queue
 46 |      *
 47 |      * O(log(N)) time
 48 |      *
 49 |      * @param mixed $element
 50 |      */
 51 |     public function put($element)
 52 |     {
 53 |         $nodeId   = count($this->_heap);
 54 |         $parentId = ($nodeId-1) >> 1;   // floor( ($nodeId-1)/2 )
 55 | 
 56 |         while ($nodeId != 0  &&  $this->_less($element, $this->_heap[$parentId])) {
 57 |             // Move parent node down
 58 |             $this->_heap[$nodeId] = $this->_heap[$parentId];
 59 | 
 60 |             // Move pointer to the next level of tree
 61 |             $nodeId   = $parentId;
 62 |             $parentId = ($nodeId-1) >> 1;   // floor( ($nodeId-1)/2 )
 63 |         }
 64 | 
 65 |         // Put new node into the tree
 66 |         $this->_heap[$nodeId] = $element;
 67 |     }
 68 | 
 69 | 
 70 |     /**
 71 |      * Return least element of the queue
 72 |      *
 73 |      * Constant time
 74 |      *
 75 |      * @return mixed
 76 |      */
 77 |     public function top()
 78 |     {
 79 |         if (count($this->_heap) == 0) {
 80 |             return null;
 81 |         }
 82 | 
 83 |         return $this->_heap[0];
 84 |     }
 85 | 
 86 | 
 87 |     /**
 88 |      * Removes and return least element of the queue
 89 |      *
 90 |      * O(log(N)) time
 91 |      *
 92 |      * @return mixed
 93 |      */
 94 |     public function pop()
 95 |     {
 96 |         if (count($this->_heap) == 0) {
 97 |             return null;
 98 |         }
 99 | 
100 |         $top = $this->_heap[0];
101 |         $lastId = count($this->_heap) - 1;
102 | 
103 |         /**
104 |          * Find appropriate position for last node
105 |          */
106 |         $nodeId  = 0;     // Start from a top
107 |         $childId = 1;     // First child
108 | 
109 |         // Choose smaller child
110 |         if ($lastId > 2  &&  $this->_less($this->_heap[2], $this->_heap[1])) {
111 |             $childId = 2;
112 |         }
113 | 
114 |         while ($childId < $lastId  &&
115 |                $this->_less($this->_heap[$childId], $this->_heap[$lastId])
116 |           ) {
117 |             // Move child node up
118 |             $this->_heap[$nodeId] = $this->_heap[$childId];
119 | 
120 |             $nodeId  = $childId;               // Go down
121 |             $childId = ($nodeId << 1) + 1;     // First child
122 | 
123 |             // Choose smaller child
124 |             if (($childId+1) < $lastId  &&
125 |                 $this->_less($this->_heap[$childId+1], $this->_heap[$childId])
126 |                ) {
127 |                 $childId++;
128 |             }
129 |         }
130 | 
131 |         // Move last element to the new position
132 |         $this->_heap[$nodeId] = $this->_heap[$lastId];
133 |         unset($this->_heap[$lastId]);
134 | 
135 |         return $top;
136 |     }
137 | 
138 | 
139 |     /**
140 |      * Clear queue
141 |      */
142 |     public function clear()
143 |     {
144 |         $this->_heap = array();
145 |     }
146 | 
147 | 
148 |     /**
149 |      * Compare elements
150 |      *
151 |      * Returns true, if $el1 is less than $el2; else otherwise
152 |      *
153 |      * @param mixed $el1
154 |      * @param mixed $el2
155 |      * @return boolean
156 |      */
157 |     abstract protected function _less($el1, $el2);
158 | }
159 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/AbstractAnalyzer.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface as LuceneAnalyzer;
14 | 
15 | /**
16 |  * General analyzer implementation.
17 |  *
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Analysis
21 |  */
22 | abstract class AbstractAnalyzer implements LuceneAnalyzer
23 | {
24 |     /**
25 |      * Input string
26 |      *
27 |      * @var string
28 |      */
29 |     protected $_input = null;
30 | 
31 |     /**
32 |      * Input string encoding
33 |      *
34 |      * @var string
35 |      */
36 |     protected $_encoding = '';
37 | 
38 |     /**
39 |      * Tokenize text to a terms
40 |      * Returns array of \ZendSearch\Lucene\Analysis\Token objects
41 |      *
42 |      * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding)
43 |      *
44 |      * @param string $data
45 |      * @return array
46 |      */
47 |     public function tokenize($data, $encoding = '')
48 |     {
49 |         $this->setInput($data, $encoding);
50 | 
51 |         $tokenList = array();
52 |         while (($nextToken = $this->nextToken()) !== null) {
53 |             $tokenList[] = $nextToken;
54 |         }
55 | 
56 |         return $tokenList;
57 |     }
58 | 
59 |     /**
60 |      * Tokenization stream API
61 |      * Set input
62 |      *
63 |      * @param string $data
64 |      */
65 |     public function setInput($data, $encoding = '')
66 |     {
67 |         $this->_input    = $data;
68 |         $this->_encoding = $encoding;
69 |         $this->reset();
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Analyzer.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface as LuceneAnalyzer;
14 | 
15 | /**
16 |  * AnalyzerInterface manager.
17 |  *
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Analysis
21 |  */
22 | class Analyzer
23 | {
24 |     /**
25 |      * The AnalyzerInterface implementation used by default.
26 |      *
27 |      * @var \ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface
28 |      */
29 |     private static $_defaultImpl = null;
30 | 
31 |     /**
32 |      * Set the default AnalyzerInterface implementation used by indexing code.
33 |      *
34 |      * @param \ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface $analyzer
35 |      */
36 |     public static function setDefault(LuceneAnalyzer $analyzer)
37 |     {
38 |         self::$_defaultImpl = $analyzer;
39 |     }
40 | 
41 |     /**
42 |      * Return the default AnalyzerInterface implementation used by indexing code.
43 |      *
44 |      * @return \ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface
45 |      */
46 |     public static function getDefault()
47 |     {
48 |         if (self::$_defaultImpl === null) {
49 |             self::$_defaultImpl = new Common\Text\CaseInsensitive();
50 |         }
51 | 
52 |         return self::$_defaultImpl;
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/AnalyzerInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer;
12 | 
13 | /**
14 |  * An AnalyzerInterface is used to analyze text.
15 |  *
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Analysis
19 |  */
20 | interface AnalyzerInterface
21 | {
22 |     /**
23 |      * Tokenize text to terms
24 |      * Returns array of ZendSearch\Lucene\Analysis\Token objects
25 |      *
26 |      * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding)
27 |      *
28 |      * @param string $data
29 |      * @return array
30 |      */
31 |     public function tokenize($data, $encoding = '');
32 | 
33 |     /**
34 |      * Tokenization stream API
35 |      * Set input
36 |      *
37 |      * @param string $data
38 |      */
39 |     public function setInput($data, $encoding = '');
40 | 
41 |     /**
42 |      * Reset token stream
43 |      */
44 |     public function reset();
45 | 
46 |     /**
47 |      * Tokenization stream API
48 |      * Get next token
49 |      * Returns null at the end of stream
50 |      *
51 |      * Tokens are returned in UTF-8 (internal Zend_Search_Lucene encoding)
52 |      *
53 |      * @return \ZendSearch\Lucene\Analysis\Token|null
54 |      */
55 |     public function nextToken();
56 | }
57 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/AbstractCommon.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common;
12 | 
13 | use ZendSearch\Lucene\Analysis;
14 | use ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface;
15 | use ZendSearch\Lucene\Analysis\TokenFilter\TokenFilterInterface;
16 | 
17 | /**
18 |  * AbstractCommon implementation of the analyzerfunctionality.
19 |  *
20 |  * There are several standard standard subclasses provided
21 |  * by Analysis subpackage.
22 |  *
23 |  * @category   Zend
24 |  * @package    Zend_Search_Lucene
25 |  * @subpackage Analysis
26 |  */
27 | abstract class AbstractCommon extends Analysis\Analyzer\AbstractAnalyzer
28 | {
29 |     /**
30 |      * The set of Token filters applied to the Token stream.
31 |      * Array of \ZendSearch\Lucene\Analysis\TokenFilter\TokenFilterInterface objects.
32 |      *
33 |      * @var array
34 |      */
35 |     private $_filters = array();
36 | 
37 |     /**
38 |      * Add Token filter to the AnalyzerInterface
39 |      *
40 |      * @param \ZendSearch\Lucene\Analysis\TokenFilter\TokenFilterInterface $filter
41 |      */
42 |     public function addFilter(TokenFilterInterface $filter)
43 |     {
44 |         $this->_filters[] = $filter;
45 |     }
46 | 
47 |     /**
48 |      * Apply filters to the token. Can return null when the token was removed.
49 |      *
50 |      * @param \ZendSearch\Lucene\Analysis\Token $token
51 |      * @return \ZendSearch\Lucene\Analysis\Token
52 |      */
53 |     public function normalize(Analysis\Token $token)
54 |     {
55 |         foreach ($this->_filters as $filter) {
56 |             $token = $filter->normalize($token);
57 | 
58 |             // resulting token can be null if the filter removes it
59 |             if ($token === null) {
60 |                 return null;
61 |             }
62 |         }
63 | 
64 |         return $token;
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Text.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common;
12 | 
13 | use ZendSearch\Lucene\Analysis;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Analysis
19 |  */
20 | class Text extends AbstractCommon
21 | {
22 |     /**
23 |      * Current position in a stream
24 |      *
25 |      * @var integer
26 |      */
27 |     private $_position;
28 | 
29 |     /**
30 |      * Reset token stream
31 |      */
32 |     public function reset()
33 |     {
34 |         $this->_position = 0;
35 | 
36 |         if ($this->_input === null) {
37 |             return;
38 |         }
39 | 
40 |         // convert input into ascii
41 |         if (PHP_OS != 'AIX') {
42 |             $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
43 |         }
44 |         $this->_encoding = 'ASCII';
45 |     }
46 | 
47 |     /**
48 |      * Tokenization stream API
49 |      * Get next token
50 |      * Returns null at the end of stream
51 |      *
52 |      * @return \ZendSearch\Lucene\Analysis\Token|null
53 |      */
54 |     public function nextToken()
55 |     {
56 |         if ($this->_input === null) {
57 |             return null;
58 |         }
59 | 
60 | 
61 |         do {
62 |             if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
63 |                 // It covers both cases a) there are no matches (preg_match(...) === 0)
64 |                 // b) error occured (preg_match(...) === FALSE)
65 |                 return null;
66 |             }
67 | 
68 |             $str = $match[0][0];
69 |             $pos = $match[0][1];
70 |             $endpos = $pos + strlen($str);
71 | 
72 |             $this->_position = $endpos;
73 | 
74 |             $token = $this->normalize(new Analysis\Token($str, $pos, $endpos));
75 |         } while ($token === null); // try again if token is skipped
76 | 
77 |         return $token;
78 |     }
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common\Text;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\Common;
14 | use ZendSearch\Lucene\Analysis\TokenFilter;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Analysis
20 |  */
21 | class CaseInsensitive extends Common\Text
22 | {
23 |     public function __construct()
24 |     {
25 |         $this->addFilter(new TokenFilter\LowerCase());
26 |     }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/TextNum.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common;
12 | 
13 | use ZendSearch\Lucene\Analysis;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Analysis
19 |  */
20 | class TextNum extends AbstractCommon
21 | {
22 |     /**
23 |      * Current position in a stream
24 |      *
25 |      * @var integer
26 |      */
27 |     private $_position;
28 | 
29 |     /**
30 |      * Reset token stream
31 |      */
32 |     public function reset()
33 |     {
34 |         $this->_position = 0;
35 | 
36 |         if ($this->_input === null) {
37 |             return;
38 |         }
39 | 
40 |         // convert input into ascii
41 |         if (PHP_OS != 'AIX') {
42 |             $this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input);
43 |         }
44 |         $this->_encoding = 'ASCII';
45 |     }
46 | 
47 |     /**
48 |      * Tokenization stream API
49 |      * Get next token
50 |      * Returns null at the end of stream
51 |      *
52 |      * @return \ZendSearch\Lucene\Analysis\Token|null
53 |      */
54 |     public function nextToken()
55 |     {
56 |         if ($this->_input === null) {
57 |             return null;
58 |         }
59 | 
60 |         do {
61 |             if (! preg_match('/[a-zA-Z0-9]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) {
62 |                 // It covers both cases a) there are no matches (preg_match(...) === 0)
63 |                 // b) error occured (preg_match(...) === FALSE)
64 |                 return null;
65 |             }
66 | 
67 |             $str = $match[0][0];
68 |             $pos = $match[0][1];
69 |             $endpos = $pos + strlen($str);
70 | 
71 |             $this->_position = $endpos;
72 | 
73 |             $token = $this->normalize(new Analysis\Token($str, $pos, $endpos));
74 |         } while ($token === null); // try again if token is skipped
75 | 
76 |         return $token;
77 |     }
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/TextNum/CaseInsensitive.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common\TextNum;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\Common;
14 | use ZendSearch\Lucene\Analysis\TokenFilter;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Analysis
20 |  */
21 | class CaseInsensitive extends Common\TextNum
22 | {
23 |     public function __construct()
24 |     {
25 |         $this->addFilter(new TokenFilter\LowerCase());
26 |     }
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Utf8.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Analysis;
 15 | use ZendSearch\Lucene\Exception\RuntimeException;
 16 | use Zend\Stdlib\ErrorHandler;
 17 | 
 18 | /**
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  * @subpackage Analysis
 22 |  */
 23 | class Utf8 extends AbstractCommon
 24 | {
 25 |     /**
 26 |      * Current char position in an UTF-8 stream
 27 |      *
 28 |      * @var integer
 29 |      */
 30 |     private $_position;
 31 | 
 32 |     /**
 33 |      * Current binary position in an UTF-8 stream
 34 |      *
 35 |      * @var integer
 36 |      */
 37 |     private $_bytePosition;
 38 | 
 39 |     /**
 40 |      * Object constructor
 41 |      *
 42 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 43 |      */
 44 |     public function __construct()
 45 |     {
 46 |         ErrorHandler::start(E_WARNING);
 47 |         $result = preg_match('/\pL/u', 'a');
 48 |         ErrorHandler::stop();
 49 |         if ($result != 1) {
 50 |             // PCRE unicode support is turned off
 51 |             throw new RuntimeException('Utf8 analyzer needs PCRE unicode support to be enabled.');
 52 |         }
 53 |     }
 54 | 
 55 |     /**
 56 |      * Reset token stream
 57 |      */
 58 |     public function reset()
 59 |     {
 60 |         $this->_position     = 0;
 61 |         $this->_bytePosition = 0;
 62 | 
 63 |         // convert input into UTF-8
 64 |         if (strcasecmp($this->_encoding, 'utf8' ) != 0  &&
 65 |             strcasecmp($this->_encoding, 'utf-8') != 0 ) {
 66 |                 $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
 67 |                 $this->_encoding = 'UTF-8';
 68 |         }
 69 |     }
 70 | 
 71 |     /**
 72 |      * Tokenization stream API
 73 |      * Get next token
 74 |      * Returns null at the end of stream
 75 |      *
 76 |      * @return \ZendSearch\Lucene\Analysis\Token|null
 77 |      */
 78 |     public function nextToken()
 79 |     {
 80 |         if ($this->_input === null) {
 81 |             return null;
 82 |         }
 83 | 
 84 |         do {
 85 |             if (! preg_match('/[\p{L}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
 86 |                 // It covers both cases a) there are no matches (preg_match(...) === 0)
 87 |                 // b) error occured (preg_match(...) === FALSE)
 88 |                 return null;
 89 |             }
 90 | 
 91 |             // matched string
 92 |             $matchedWord = $match[0][0];
 93 | 
 94 |             // binary position of the matched word in the input stream
 95 |             $binStartPos = $match[0][1];
 96 | 
 97 |             // character position of the matched word in the input stream
 98 |             $startPos = $this->_position +
 99 |                         iconv_strlen(substr($this->_input,
100 |                                             $this->_bytePosition,
101 |                                             $binStartPos - $this->_bytePosition),
102 |                                      'UTF-8');
103 |             // character postion of the end of matched word in the input stream
104 |             $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
105 | 
106 |             $this->_bytePosition = $binStartPos + strlen($matchedWord);
107 |             $this->_position     = $endPos;
108 | 
109 |             $token = $this->normalize(new Analysis\Token($matchedWord, $startPos, $endPos));
110 |         } while ($token === null); // try again if token is skipped
111 | 
112 |         return $token;
113 |     }
114 | }
115 | 
116 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Utf8/CaseInsensitive.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\Common;
14 | use ZendSearch\Lucene\Analysis\TokenFilter;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Analysis
20 |  */
21 | class CaseInsensitive extends Common\Utf8
22 | {
23 |     public function __construct()
24 |     {
25 |         parent::__construct();
26 | 
27 |         $this->addFilter(new TokenFilter\LowerCaseUtf8());
28 |     }
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Utf8Num.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Analysis;
 15 | use ZendSearch\Lucene\Exception\RuntimeException;
 16 | use Zend\Stdlib\ErrorHandler;
 17 | 
 18 | /**
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  * @subpackage Analysis
 22 |  */
 23 | class Utf8Num extends AbstractCommon
 24 | {
 25 |     /**
 26 |      * Current char position in an UTF-8 stream
 27 |      *
 28 |      * @var integer
 29 |      */
 30 |     private $_position;
 31 | 
 32 |     /**
 33 |      * Current binary position in an UTF-8 stream
 34 |      *
 35 |      * @var integer
 36 |      */
 37 |     private $_bytePosition;
 38 | 
 39 |     /**
 40 |      * Object constructor
 41 |      *
 42 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 43 |      */
 44 |     public function __construct()
 45 |     {
 46 |         ErrorHandler::start(E_WARNING);
 47 |         $result = preg_match('/\pL/u', 'a');
 48 |         ErrorHandler::stop();
 49 |         if ($result != 1) {
 50 |             // PCRE unicode support is turned off
 51 |             throw new RuntimeException('Utf8Num analyzer needs PCRE unicode support to be enabled.');
 52 |         }
 53 |     }
 54 | 
 55 |     /**
 56 |      * Reset token stream
 57 |      */
 58 |     public function reset()
 59 |     {
 60 |         $this->_position     = 0;
 61 |         $this->_bytePosition = 0;
 62 | 
 63 |         // convert input into UTF-8
 64 |         if (strcasecmp($this->_encoding, 'utf8' ) != 0  &&
 65 |             strcasecmp($this->_encoding, 'utf-8') != 0 ) {
 66 |                 $this->_input = iconv($this->_encoding, 'UTF-8', $this->_input);
 67 |                 $this->_encoding = 'UTF-8';
 68 |         }
 69 |     }
 70 | 
 71 |     /**
 72 |      * Tokenization stream API
 73 |      * Get next token
 74 |      * Returns null at the end of stream
 75 |      *
 76 |      * @return \ZendSearch\Lucene\Analysis\Token|null
 77 |      */
 78 |     public function nextToken()
 79 |     {
 80 |         if ($this->_input === null) {
 81 |             return null;
 82 |         }
 83 | 
 84 |         do {
 85 |             if (! preg_match('/[\p{L}\p{N}]+/u', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_bytePosition)) {
 86 |                 // It covers both cases a) there are no matches (preg_match(...) === 0)
 87 |                 // b) error occured (preg_match(...) === FALSE)
 88 |                 return null;
 89 |             }
 90 | 
 91 |             // matched string
 92 |             $matchedWord = $match[0][0];
 93 | 
 94 |             // binary position of the matched word in the input stream
 95 |             $binStartPos = $match[0][1];
 96 | 
 97 |             // character position of the matched word in the input stream
 98 |             $startPos = $this->_position +
 99 |                         iconv_strlen(substr($this->_input,
100 |                                             $this->_bytePosition,
101 |                                             $binStartPos - $this->_bytePosition),
102 |                                      'UTF-8');
103 |             // character postion of the end of matched word in the input stream
104 |             $endPos = $startPos + iconv_strlen($matchedWord, 'UTF-8');
105 | 
106 |             $this->_bytePosition = $binStartPos + strlen($matchedWord);
107 |             $this->_position     = $endPos;
108 | 
109 |             $token = $this->normalize(new Analysis\Token($matchedWord, $startPos, $endPos));
110 |         } while ($token === null); // try again if token is skipped
111 | 
112 |         return $token;
113 |     }
114 | }
115 | 
116 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Analyzer/Common/Utf8Num/CaseInsensitive.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\Analyzer\Common\Utf8Num;
12 | 
13 | use ZendSearch\Lucene\Analysis\Analyzer\Common;
14 | use ZendSearch\Lucene\Analysis\TokenFilter;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Analysis
20 |  */
21 | class CaseInsensitive extends Common\Utf8Num
22 | {
23 |     public function __construct()
24 |     {
25 |         parent::__construct();
26 | 
27 |         $this->addFilter(new TokenFilter\LowerCaseUtf8());
28 |     }
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/Token.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Analysis;
 12 | 
 13 | /**
 14 |  * @category   Zend
 15 |  * @package    Zend_Search_Lucene
 16 |  * @subpackage Analysis
 17 |  */
 18 | class Token
 19 | {
 20 |     /**
 21 |      * The text of the term.
 22 |      *
 23 |      * @var string
 24 |      */
 25 |     private $_termText;
 26 | 
 27 |     /**
 28 |      * Start in source text.
 29 |      *
 30 |      * @var integer
 31 |      */
 32 |     private $_startOffset;
 33 | 
 34 |     /**
 35 |      * End in source text
 36 |      *
 37 |      * @var integer
 38 |      */
 39 |     private $_endOffset;
 40 | 
 41 |     /**
 42 |      * The position of this token relative to the previous Token.
 43 |      *
 44 |      * The default value is one.
 45 |      *
 46 |      * Some common uses for this are:
 47 |      * Set it to zero to put multiple terms in the same position.  This is
 48 |      * useful if, e.g., a word has multiple stems.  Searches for phrases
 49 |      * including either stem will match.  In this case, all but the first stem's
 50 |      * increment should be set to zero: the increment of the first instance
 51 |      * should be one.  Repeating a token with an increment of zero can also be
 52 |      * used to boost the scores of matches on that token.
 53 |      *
 54 |      * Set it to values greater than one to inhibit exact phrase matches.
 55 |      * If, for example, one does not want phrases to match across removed stop
 56 |      * words, then one could build a stop word filter that removes stop words and
 57 |      * also sets the increment to the number of stop words removed before each
 58 |      * non-stop word.  Then exact phrase queries will only match when the terms
 59 |      * occur with no intervening stop words.
 60 |      *
 61 |      * @var integer
 62 |      */
 63 |     private $_positionIncrement;
 64 | 
 65 | 
 66 |     /**
 67 |      * Object constructor
 68 |      *
 69 |      * @param string  $text
 70 |      * @param integer $start
 71 |      * @param integer $end
 72 |      * @param string  $type
 73 |      */
 74 |     public function __construct($text, $start, $end)
 75 |     {
 76 |         $this->_termText    = $text;
 77 |         $this->_startOffset = $start;
 78 |         $this->_endOffset   = $end;
 79 | 
 80 |         $this->_positionIncrement = 1;
 81 |     }
 82 | 
 83 | 
 84 |     /**
 85 |      * positionIncrement setter
 86 |      *
 87 |      * @param integer $positionIncrement
 88 |      */
 89 |     public function setPositionIncrement($positionIncrement)
 90 |     {
 91 |         $this->_positionIncrement = $positionIncrement;
 92 |     }
 93 | 
 94 |     /**
 95 |      * Returns the position increment of this Token.
 96 |      *
 97 |      * @return integer
 98 |      */
 99 |     public function getPositionIncrement()
100 |     {
101 |         return $this->_positionIncrement;
102 |     }
103 | 
104 |     /**
105 |      * Returns the Token's term text.
106 |      *
107 |      * @return string
108 |      */
109 |     public function getTermText()
110 |     {
111 |         return $this->_termText;
112 |     }
113 | 
114 |     /**
115 |      * Returns this Token's starting offset, the position of the first character
116 |      * corresponding to this token in the source text.
117 |      *
118 |      * Note:
119 |      * The difference between getEndOffset() and getStartOffset() may not be equal
120 |      * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
121 |      * by a stemmer or some other filter.
122 |      *
123 |      * @return integer
124 |      */
125 |     public function getStartOffset()
126 |     {
127 |         return $this->_startOffset;
128 |     }
129 | 
130 |     /**
131 |      * Returns this Token's ending offset, one greater than the position of the
132 |      * last character corresponding to this token in the source text.
133 |      *
134 |      * @return integer
135 |      */
136 |     public function getEndOffset()
137 |     {
138 |         return $this->_endOffset;
139 |     }
140 | }
141 | 
142 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/TokenFilter/LowerCase.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\TokenFilter;
12 | 
13 | use ZendSearch\Lucene\Analysis\Token;
14 | 
15 | /**
16 |  * Lower case Token filter.
17 |  *
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Analysis
21 |  */
22 | class LowerCase implements TokenFilterInterface
23 | {
24 |     /**
25 |      * Normalize Token or remove it (if null is returned)
26 |      *
27 |      * @param \ZendSearch\Lucene\Analysis\Token $srcToken
28 |      * @return \ZendSearch\Lucene\Analysis\Token
29 |      */
30 |     public function normalize(Token $srcToken)
31 |     {
32 |         $newToken = new Token(strtolower( $srcToken->getTermText() ),
33 |                                        $srcToken->getStartOffset(),
34 |                                        $srcToken->getEndOffset());
35 | 
36 |         $newToken->setPositionIncrement($srcToken->getPositionIncrement());
37 | 
38 |         return $newToken;
39 |     }
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/TokenFilter/LowerCaseUtf8.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\TokenFilter;
12 | 
13 | use ZendSearch\Lucene;
14 | use ZendSearch\Lucene\Analysis\Token;
15 | use ZendSearch\Lucene\Exception\ExtensionNotLoadedException;
16 | 
17 | /**
18 |  * Lower case Token filter.
19 |  *
20 |  * @category   Zend
21 |  * @package    Zend_Search_Lucene
22 |  * @subpackage Analysis
23 |  */
24 | class LowerCaseUtf8 implements TokenFilterInterface
25 | {
26 |     /**
27 |      * Object constructor
28 |      * @throws \ZendSearch\Lucene\Exception\ExtensionNotLoadedException
29 |      */
30 |     public function __construct()
31 |     {
32 |         if (!function_exists('mb_strtolower')) {
33 |             // mbstring extension is disabled
34 |             throw new ExtensionNotLoadedException('Utf8 compatible lower case filter needs mbstring extension to be enabled.');
35 |         }
36 |     }
37 | 
38 |     /**
39 |      * Normalize Token or remove it (if null is returned)
40 |      *
41 |      * @param \ZendSearch\Lucene\Analysis\Token $srcToken
42 |      * @return \ZendSearch\Lucene\Analysis\Token
43 |      */
44 |     public function normalize(Token $srcToken)
45 |     {
46 |         $newToken = new Token(mb_strtolower($srcToken->getTermText(), 'UTF-8'),
47 |                                        $srcToken->getStartOffset(),
48 |                                        $srcToken->getEndOffset());
49 | 
50 |         $newToken->setPositionIncrement($srcToken->getPositionIncrement());
51 | 
52 |         return $newToken;
53 |     }
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/TokenFilter/ShortWords.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\TokenFilter;
12 | 
13 | use ZendSearch\Lucene\Analysis\Token;
14 | 
15 | /**
16 |  * Token filter that removes short words. What is short word can be configured with constructor.
17 |  *
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Analysis
21 |  */
22 | class ShortWords implements TokenFilterInterface
23 | {
24 |     /**
25 |      * Minimum allowed term length
26 |      * @var integer
27 |      */
28 |     private $length;
29 | 
30 |     /**
31 |      * Constructs new instance of this filter.
32 |      *
33 |      * @param integer $short  minimum allowed length of term which passes this filter (default 2)
34 |      */
35 |     public function __construct($length = 2)
36 |     {
37 |         $this->length = $length;
38 |     }
39 | 
40 |     /**
41 |      * Normalize Token or remove it (if null is returned)
42 |      *
43 |      * @param \ZendSearch\Lucene\Analysis\Token $srcToken
44 |      * @return \ZendSearch\Lucene\Analysis\Token
45 |      */
46 |     public function normalize(Token $srcToken)
47 |     {
48 |         if (strlen($srcToken->getTermText()) < $this->length) {
49 |             return null;
50 |         } else {
51 |             return $srcToken;
52 |         }
53 |     }
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/TokenFilter/StopWords.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\TokenFilter;
12 | 
13 | use ZendSearch\Lucene;
14 | use ZendSearch\Lucene\Analysis\Token;
15 | use ZendSearch\Lucene\Exception\InvalidArgumentException;
16 | use ZendSearch\Lucene\Exception\RuntimeException;
17 | 
18 | /**
19 |  * Token filter that removes stop words. These words must be provided as array (set), example:
20 |  * $stopwords = array('the' => 1, 'an' => '1');
21 |  *
22 |  * We do recommend to provide all words in lowercase and concatenate this class after the lowercase filter.
23 |  *
24 |  * @category   Zend
25 |  * @package    Zend_Search_Lucene
26 |  * @subpackage Analysis
27 |  */
28 | class StopWords implements TokenFilterInterface
29 | {
30 |     /**
31 |      * Stop Words
32 |      * @var array
33 |      */
34 |     private $_stopSet;
35 | 
36 |     /**
37 |      * Constructs new instance of this filter.
38 |      *
39 |      * @param array $stopwords array (set) of words that will be filtered out
40 |      */
41 |     public function __construct($stopwords = array())
42 |     {
43 |         $this->_stopSet = array_flip($stopwords);
44 |     }
45 | 
46 |     /**
47 |      * Normalize Token or remove it (if null is returned)
48 |      *
49 |      * @param \ZendSearch\Lucene\Analysis\Token $srcToken
50 |      * @return \ZendSearch\Lucene\Analysis\Token
51 |      */
52 |     public function normalize(Token $srcToken)
53 |     {
54 |         if (array_key_exists($srcToken->getTermText(), $this->_stopSet)) {
55 |             return null;
56 |         } else {
57 |             return $srcToken;
58 |         }
59 |     }
60 | 
61 |     /**
62 |      * Fills stopwords set from a text file. Each line contains one stopword, lines with '#' in the first
63 |      * column are ignored (as comments).
64 |      *
65 |      * You can call this method one or more times. New stopwords are always added to current set.
66 |      *
67 |      * @param string $filepath full path for text file with stopwords
68 |      * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
69 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
70 |      */
71 |     public function loadFromFile($filepath = null)
72 |     {
73 |         if (! $filepath || ! file_exists($filepath)) {
74 |             throw new InvalidArgumentException('You have to provide valid file path');
75 |         }
76 |         $fd = fopen($filepath, "r");
77 |         if (! $fd) {
78 |             throw new RuntimeException('Cannot open file ' . $filepath);
79 |         }
80 |         while (!feof ($fd)) {
81 |             $buffer = trim(fgets($fd));
82 |             if (strlen($buffer) > 0 && $buffer[0] != '#') {
83 |                 $this->_stopSet[$buffer] = 1;
84 |             }
85 |         }
86 |         if (!fclose($fd)) {
87 |             throw new RuntimeException('Cannot close file ' . $filepath);
88 |         }
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Analysis/TokenFilter/TokenFilterInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Analysis\TokenFilter;
12 | 
13 | use ZendSearch\Lucene\Analysis\Token;
14 | 
15 | /**
16 |  * Token filter converts (normalizes) Token ore removes it from a token stream.
17 |  *
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Analysis
21 |  */
22 | interface TokenFilterInterface
23 | {
24 |     /**
25 |      * Normalize Token or remove it (if null is returned)
26 |      *
27 |      * @param \ZendSearch\Lucene\Analysis\Token $srcToken
28 |      * @return \ZendSearch\Lucene\Analysis\Token
29 |      */
30 |     public function normalize(Token $srcToken);
31 | }
32 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene;
 12 | 
 13 | use ZendSearch\Lucene\Exception\InvalidArgumentException;
 14 | 
 15 | /**
 16 |  * A Document is a set of fields. Each field has a name and a textual value.
 17 |  *
 18 |  * @category   Zend
 19 |  * @package    Zend_Search_Lucene
 20 |  * @subpackage Document
 21 |  */
 22 | class Document
 23 | {
 24 | 
 25 |     /**
 26 |      * Associative array \ZendSearch\Lucene\Document\Field objects where the keys to the
 27 |      * array are the names of the fields.
 28 |      *
 29 |      * @var array
 30 |      */
 31 |     protected $_fields = array();
 32 | 
 33 |     /**
 34 |      * Field boost factor
 35 |      * It's not stored directly in the index, but affects on normalization factor
 36 |      *
 37 |      * @var float
 38 |      */
 39 |     public $boost = 1.0;
 40 | 
 41 |     /**
 42 |      * Magic method for checking the existence of a field
 43 |      *
 44 |      * @param string $offset
 45 |      * @return boolean TRUE if the field exists else FALSE
 46 |      */
 47 |     public function __isset($offset)
 48 |     {
 49 |         return in_array($offset, $this->getFieldNames());
 50 |     }
 51 | 
 52 |     /**
 53 |      * Proxy method for getFieldValue(), provides more convenient access to
 54 |      * the string value of a field.
 55 |      *
 56 |      * @param  $offset
 57 |      * @return string
 58 |      */
 59 |     public function __get($offset)
 60 |     {
 61 |         return $this->getFieldValue($offset);
 62 |     }
 63 | 
 64 | 
 65 |     /**
 66 |      * Add a field object to this document.
 67 |      *
 68 |      * @param \ZendSearch\Lucene\Document\Field $field
 69 |      * @return \ZendSearch\Lucene\Document
 70 |      */
 71 |     public function addField(Document\Field $field)
 72 |     {
 73 |         $this->_fields[$field->name] = $field;
 74 | 
 75 |         return $this;
 76 |     }
 77 | 
 78 | 
 79 |     /**
 80 |      * Return an array with the names of the fields in this document.
 81 |      *
 82 |      * @return array
 83 |      */
 84 |     public function getFieldNames()
 85 |     {
 86 |         return array_keys($this->_fields);
 87 |     }
 88 | 
 89 | 
 90 |     /**
 91 |      * Returns {@link \ZendSearch\Lucene\Document\Field} object for a named field in this document.
 92 |      *
 93 |      * @param string $fieldName
 94 |      * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
 95 |      * @return \ZendSearch\Lucene\Document\Field
 96 |      */
 97 |     public function getField($fieldName)
 98 |     {
 99 |         if (!array_key_exists($fieldName, $this->_fields)) {
100 |             throw new InvalidArgumentException("Field name \"$fieldName\" not found in document.");
101 |         }
102 |         return $this->_fields[$fieldName];
103 |     }
104 | 
105 | 
106 |     /**
107 |      * Returns the string value of a named field in this document.
108 |      *
109 |      * @see __get()
110 |      * @return string
111 |      */
112 |     public function getFieldValue($fieldName)
113 |     {
114 |         return $this->getField($fieldName)->value;
115 |     }
116 | 
117 |     /**
118 |      * Returns the string value of a named field in UTF-8 encoding.
119 |      *
120 |      * @see __get()
121 |      * @return string
122 |      */
123 |     public function getFieldUtf8Value($fieldName)
124 |     {
125 |         return $this->getField($fieldName)->getUtf8Value();
126 |     }
127 | }
128 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/AbstractOpenXML.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Document;
 12 | 
 13 | use ZendSearch\Lucene\Document;
 14 | 
 15 | /**
 16 |  * OpenXML document.
 17 |  *
 18 |  * @category   Zend
 19 |  * @package    Zend_Search_Lucene
 20 |  * @subpackage Document
 21 |  */
 22 | abstract class AbstractOpenXML extends Document
 23 | {
 24 |     /**
 25 |      * Xml Schema - Relationships
 26 |      *
 27 |      * @var string
 28 |      */
 29 |     const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
 30 | 
 31 |     /**
 32 |      * Xml Schema - Office document
 33 |      *
 34 |      * @var string
 35 |      */
 36 |     const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
 37 | 
 38 |     /**
 39 |      * Xml Schema - Core properties
 40 |      *
 41 |      * @var string
 42 |      */
 43 |     const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
 44 | 
 45 |     /**
 46 |      * Xml Schema - Dublin Core
 47 |      *
 48 |      * @var string
 49 |      */
 50 |     const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
 51 | 
 52 |     /**
 53 |      * Xml Schema - Dublin Core Terms
 54 |      *
 55 |      * @var string
 56 |      */
 57 |     const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
 58 | 
 59 |     /**
 60 |      * Extract metadata from document
 61 |      *
 62 |      * @param \ZipArchive $package    ZipArchive AbstractOpenXML package
 63 |      * @return array    Key-value pairs containing document meta data
 64 |      */
 65 |     protected function extractMetaData(\ZipArchive $package)
 66 |     {
 67 |         // Data holders
 68 |         $coreProperties = array();
 69 | 
 70 |         // Prevent php from loading remote resources
 71 |         $loadEntities = libxml_disable_entity_loader(true);
 72 | 
 73 |         // Read relations and search for core properties
 74 |         $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
 75 | 
 76 |         // Restore entity loader state
 77 |         libxml_disable_entity_loader($loadEntities);
 78 | 
 79 |         foreach ($relations->Relationship as $rel) {
 80 |             if ($rel["Type"] == self::SCHEMA_COREPROPERTIES) {
 81 |                 // Found core properties! Read in contents...
 82 |                 $contents = simplexml_load_string(
 83 |                     $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
 84 |                 );
 85 | 
 86 |                 foreach ($contents->children(self::SCHEMA_DUBLINCORE) as $child) {
 87 |                     $coreProperties[$child->getName()] = (string)$child;
 88 |                 }
 89 |                 foreach ($contents->children(self::SCHEMA_COREPROPERTIES) as $child) {
 90 |                     $coreProperties[$child->getName()] = (string)$child;
 91 |                 }
 92 |                 foreach ($contents->children(self::SCHEMA_DUBLINCORETERMS) as $child) {
 93 |                     $coreProperties[$child->getName()] = (string)$child;
 94 |                 }
 95 |             }
 96 |         }
 97 | 
 98 |         return $coreProperties;
 99 |     }
100 | 
101 |     /**
102 |      * Determine absolute zip path
103 |      *
104 |      * @param string $path
105 |      * @return string
106 |      */
107 |     protected function absoluteZipPath($path)
108 |     {
109 |         $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
110 |         $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
111 |         $absolutes = array();
112 |         foreach ($parts as $part) {
113 |             if ('.' == $part) continue;
114 |             if ('..' == $part) {
115 |                 array_pop($absolutes);
116 |             } else {
117 |                 $absolutes[] = $part;
118 |             }
119 |         }
120 |         return implode('/', $absolutes);
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/Docx.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Document;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Document\Exception\InvalidArgumentException;
 15 | use ZendSearch\Lucene\Exception\ExtensionNotLoadedException;
 16 | use ZendSearch\Lucene\Exception\RuntimeException;
 17 | 
 18 | /**
 19 |  * Docx document.
 20 |  *
 21 |  * @category   Zend
 22 |  * @package    Zend_Search_Lucene
 23 |  * @subpackage Document
 24 |  */
 25 | class Docx extends AbstractOpenXML
 26 | {
 27 |     /**
 28 |      * Xml Schema - WordprocessingML
 29 |      *
 30 |      * @var string
 31 |      */
 32 |     const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
 33 | 
 34 |     /**
 35 |      * Object constructor
 36 |      *
 37 |      * @param string  $fileName
 38 |      * @param boolean $storeContent
 39 |      * @throws \ZendSearch\Lucene\Exception\ExtensionNotLoadedException
 40 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 41 |      */
 42 |     private function __construct($fileName, $storeContent)
 43 |     {
 44 |         if (!class_exists('ZipArchive', false)) {
 45 |             throw new ExtensionNotLoadedException(
 46 |                 'MS Office documents processing functionality requires Zip extension to be loaded'
 47 |             );
 48 |         }
 49 | 
 50 |         // Document data holders
 51 |         $documentBody = array();
 52 |         $coreProperties = array();
 53 | 
 54 |         // Open AbstractOpenXML package
 55 |         $package = new \ZipArchive();
 56 |         $package->open($fileName);
 57 | 
 58 |         // Read relations and search for officeDocument
 59 |         $relationsXml = $package->getFromName('_rels/.rels');
 60 |         if ($relationsXml === false) {
 61 |             throw new RuntimeException('Invalid archive or corrupted .docx file.');
 62 |         }
 63 | 
 64 |         // Prevent php from loading remote resources
 65 |         $loadEntities = libxml_disable_entity_loader(true);
 66 | 
 67 |         $relations = simplexml_load_string($relationsXml);
 68 | 
 69 |         // Restore entity loader state
 70 |         libxml_disable_entity_loader($loadEntities);
 71 | 
 72 |         foreach($relations->Relationship as $rel) {
 73 |             if ($rel ["Type"] == AbstractOpenXML::SCHEMA_OFFICEDOCUMENT) {
 74 |                 // Found office document! Read in contents...
 75 |                 $contents = simplexml_load_string($package->getFromName(
 76 |                                                                 $this->absoluteZipPath(dirname($rel['Target'])
 77 |                                                               . '/'
 78 |                                                               . basename($rel['Target']))
 79 |                                                                        ));
 80 | 
 81 |                 $contents->registerXPathNamespace('w', self::SCHEMA_WORDPROCESSINGML);
 82 |                 $paragraphs = $contents->xpath('//w:body/w:p');
 83 | 
 84 |                 foreach ($paragraphs as $paragraph) {
 85 |                     $runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
 86 | 
 87 |                     if ($runs === false) {
 88 |                         // Paragraph doesn't contain any text or breaks
 89 |                         continue;
 90 |                     }
 91 | 
 92 |                     foreach ($runs as $run) {
 93 |                      if ($run->getName() == 'br') {
 94 |                          // Break element
 95 |                          $documentBody[] = ' ';
 96 |                      } else {
 97 |                          $documentBody[] = (string)$run;
 98 |                      }
 99 |                     }
100 | 
101 |                     // Add space after each paragraph. So they are not bound together.
102 |                     $documentBody[] = ' ';
103 |                 }
104 | 
105 |                 break;
106 |             }
107 |         }
108 | 
109 |         // Read core properties
110 |         $coreProperties = $this->extractMetaData($package);
111 | 
112 |         // Close file
113 |         $package->close();
114 | 
115 |         // Store filename
116 |         $this->addField(Field::Text('filename', $fileName, 'UTF-8'));
117 | 
118 |         // Store contents
119 |         if ($storeContent) {
120 |             $this->addField(Field::Text('body', implode('', $documentBody), 'UTF-8'));
121 |         } else {
122 |             $this->addField(Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
123 |         }
124 | 
125 |         // Store meta data properties
126 |         foreach ($coreProperties as $key => $value) {
127 |             $this->addField(Field::Text($key, $value, 'UTF-8'));
128 |         }
129 | 
130 |         // Store title (if not present in meta data)
131 |         if (! isset($coreProperties['title'])) {
132 |             $this->addField(Field::Text('title', $fileName, 'UTF-8'));
133 |         }
134 |     }
135 | 
136 |     /**
137 |      * Load Docx document from a file
138 |      *
139 |      * @param string  $fileName
140 |      * @param boolean $storeContent
141 |      * @throws \ZendSearch\Lucene\Document\Exception\InvalidArgumentException
142 |      * @return \ZendSearch\Lucene\Document\Docx
143 |      */
144 |     public static function loadDocxFile($fileName, $storeContent = false)
145 |     {
146 |         if (!is_readable($fileName)) {
147 |             throw new InvalidArgumentException('Provided file \'' . $fileName . '\' is not readable.');
148 |         }
149 | 
150 |         return new self($fileName, $storeContent);
151 |     }
152 | }
153 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/Exception/ExceptionInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Document\Exception;
12 | 
13 | interface ExceptionInterface extends \ZendSearch\Lucene\Exception\ExceptionInterface
14 | {}
15 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/Exception/InvalidArgumentException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Document\Exception;
12 | 
13 | use ZendSearch\Lucene\Exception;
14 | 
15 | class InvalidArgumentException
16 |     extends Exception\InvalidArgumentException
17 |     implements ExceptionInterface
18 | {}
19 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/Field.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Document;
 12 | 
 13 | /**
 14 |  * A field is a section of a Document.  Each field has two parts,
 15 |  * a name and a value. Values may be free text or they may be atomic
 16 |  * keywords, which are not further processed. Such keywords may
 17 |  * be used to represent dates, urls, etc.  Fields are optionally
 18 |  * stored in the index, so that they may be returned with hits
 19 |  * on the document.
 20 |  *
 21 |  * @category   Zend
 22 |  * @package    Zend_Search_Lucene
 23 |  * @subpackage Document
 24 |  */
 25 | class Field
 26 | {
 27 |     /**
 28 |      * Field name
 29 |      *
 30 |      * @var string
 31 |      */
 32 |     public $name;
 33 | 
 34 |     /**
 35 |      * Field value
 36 |      *
 37 |      * @var boolean
 38 |      */
 39 |     public $value;
 40 | 
 41 |     /**
 42 |      * Field is to be stored in the index for return with search hits.
 43 |      *
 44 |      * @var boolean
 45 |      */
 46 |     public $isStored    = false;
 47 | 
 48 |     /**
 49 |      * Field is to be indexed, so that it may be searched on.
 50 |      *
 51 |      * @var boolean
 52 |      */
 53 |     public $isIndexed   = true;
 54 | 
 55 |     /**
 56 |      * Field should be tokenized as text prior to indexing.
 57 |      *
 58 |      * @var boolean
 59 |      */
 60 |     public $isTokenized = true;
 61 |     /**
 62 |      * Field is stored as binary.
 63 |      *
 64 |      * @var boolean
 65 |      */
 66 |     public $isBinary    = false;
 67 | 
 68 |     /**
 69 |      * Field are stored as a term vector
 70 |      *
 71 |      * @var boolean
 72 |      */
 73 |     public $storeTermVector = false;
 74 | 
 75 |     /**
 76 |      * Field boost factor
 77 |      * It's not stored directly in the index, but affects on normalization factor
 78 |      *
 79 |      * @var float
 80 |      */
 81 |     public $boost = 1.0;
 82 | 
 83 |     /**
 84 |      * Field value encoding.
 85 |      *
 86 |      * @var string
 87 |      */
 88 |     public $encoding;
 89 | 
 90 |     /**
 91 |      * Object constructor
 92 |      *
 93 |      * @param string $name
 94 |      * @param string $value
 95 |      * @param string $encoding
 96 |      * @param boolean $isStored
 97 |      * @param boolean $isIndexed
 98 |      * @param boolean $isTokenized
 99 |      * @param boolean $isBinary
100 |      */
101 |     public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false)
102 |     {
103 |         $this->name  = $name;
104 |         $this->value = $value;
105 | 
106 |         if (!$isBinary) {
107 |             $this->encoding    = $encoding;
108 |             $this->isTokenized = $isTokenized;
109 |         } else {
110 |             $this->encoding    = '';
111 |             $this->isTokenized = false;
112 |         }
113 | 
114 |         $this->isStored  = $isStored;
115 |         $this->isIndexed = $isIndexed;
116 |         $this->isBinary  = $isBinary;
117 | 
118 |         $this->storeTermVector = false;
119 |         $this->boost           = 1.0;
120 |     }
121 | 
122 | 
123 |     /**
124 |      * Constructs a String-valued Field that is not tokenized, but is indexed
125 |      * and stored.  Useful for non-text fields, e.g. date or url.
126 |      *
127 |      * @param string $name
128 |      * @param string $value
129 |      * @param string $encoding
130 |      * @return \ZendSearch\Lucene\Document\Field
131 |      */
132 |     public static function keyword($name, $value, $encoding = 'UTF-8')
133 |     {
134 |         return new self($name, $value, $encoding, true, true, false);
135 |     }
136 | 
137 | 
138 |     /**
139 |      * Constructs a String-valued Field that is not tokenized nor indexed,
140 |      * but is stored in the index, for return with hits.
141 |      *
142 |      * @param string $name
143 |      * @param string $value
144 |      * @param string $encoding
145 |      * @return \ZendSearch\Lucene\Document\Field
146 |      */
147 |     public static function unIndexed($name, $value, $encoding = 'UTF-8')
148 |     {
149 |         return new self($name, $value, $encoding, true, false, false);
150 |     }
151 | 
152 | 
153 |     /**
154 |      * Constructs a Binary String valued Field that is not tokenized nor indexed,
155 |      * but is stored in the index, for return with hits.
156 |      *
157 |      * @param string $name
158 |      * @param string $value
159 |      * @param string $encoding
160 |      * @return \ZendSearch\Lucene\Document\Field
161 |      */
162 |     public static function binary($name, $value)
163 |     {
164 |         return new self($name, $value, '', true, false, false, true);
165 |     }
166 | 
167 |     /**
168 |      * Constructs a String-valued Field that is tokenized and indexed,
169 |      * and is stored in the index, for return with hits.  Useful for short text
170 |      * fields, like "title" or "subject". Term vector will not be stored for this field.
171 |      *
172 |      * @param string $name
173 |      * @param string $value
174 |      * @param string $encoding
175 |      * @return \ZendSearch\Lucene\Document\Field
176 |      */
177 |     public static function text($name, $value, $encoding = 'UTF-8')
178 |     {
179 |         return new self($name, $value, $encoding, true, true, true);
180 |     }
181 | 
182 | 
183 |     /**
184 |      * Constructs a String-valued Field that is tokenized and indexed,
185 |      * but that is not stored in the index.
186 |      *
187 |      * @param string $name
188 |      * @param string $value
189 |      * @param string $encoding
190 |      * @return \ZendSearch\Lucene\Document\Field
191 |      */
192 |     public static function unStored($name, $value, $encoding = 'UTF-8')
193 |     {
194 |         return new self($name, $value, $encoding, false, true, true);
195 |     }
196 | 
197 |     /**
198 |      * Get field value in UTF-8 encoding
199 |      *
200 |      * @return string
201 |      */
202 |     public function getUtf8Value()
203 |     {
204 |         if (strcasecmp($this->encoding, 'utf8' ) == 0  ||
205 |             strcasecmp($this->encoding, 'utf-8') == 0 ) {
206 |                 return $this->value;
207 |         } else {
208 | 
209 |             return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value);
210 |         }
211 |     }
212 | }
213 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Document/Pptx.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Document;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Exception\ExtensionNotLoadedException;
 15 | use ZendSearch\Lucene\Exception\RuntimeException;
 16 | 
 17 | /**
 18 |  * Pptx document.
 19 |  *
 20 |  * @category   Zend
 21 |  * @package    Zend_Search_Lucene
 22 |  * @subpackage Document
 23 |  */
 24 | class Pptx extends AbstractOpenXML
 25 | {
 26 |     /**
 27 |      * Xml Schema - PresentationML
 28 |      *
 29 |      * @var string
 30 |      */
 31 |     const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
 32 | 
 33 |     /**
 34 |      * Xml Schema - DrawingML
 35 |      *
 36 |      * @var string
 37 |      */
 38 |     const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
 39 | 
 40 |     /**
 41 |      * Xml Schema - Slide relation
 42 |      *
 43 |      * @var string
 44 |      */
 45 |     const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide';
 46 | 
 47 |     /**
 48 |      * Xml Schema - Slide notes relation
 49 |      *
 50 |      * @var string
 51 |      */
 52 |     const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
 53 | 
 54 |     /**
 55 |      * Object constructor
 56 |      *
 57 |      * @param string  $fileName
 58 |      * @param boolean $storeContent
 59 |      * @throws \ZendSearch\Lucene\Exception\ExtensionNotLoadedException
 60 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 61 |      */
 62 |     private function __construct($fileName, $storeContent)
 63 |     {
 64 |         if (!class_exists('ZipArchive', false)) {
 65 |             throw new ExtensionNotLoadedException('MS Office documents processing functionality requires Zip extension to be loaded');
 66 |         }
 67 | 
 68 |         // Document data holders
 69 |         $slides = array();
 70 |         $slideNotes = array();
 71 |         $documentBody = array();
 72 |         $coreProperties = array();
 73 | 
 74 |         // Open AbstractOpenXML package
 75 |         $package = new \ZipArchive();
 76 |         $package->open($fileName);
 77 | 
 78 |         // Read relations and search for officeDocument
 79 |         $relationsXml = $package->getFromName('_rels/.rels');
 80 |         if ($relationsXml === false) {
 81 |             throw new RuntimeException('Invalid archive or corrupted .pptx file.');
 82 |         }
 83 | 
 84 |         // Prevent php from loading remote resources
 85 |         $loadEntities = libxml_disable_entity_loader(true);
 86 | 
 87 |         $relations = simplexml_load_string($relationsXml);
 88 | 
 89 |         // Restore entity loader state
 90 |         libxml_disable_entity_loader($loadEntities);
 91 | 
 92 |         foreach ($relations->Relationship as $rel) {
 93 |             if ($rel["Type"] == AbstractOpenXML::SCHEMA_OFFICEDOCUMENT) {
 94 |                 // Found office document! Search for slides...
 95 |                 $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
 96 |                 foreach ($slideRelations->Relationship as $slideRel) {
 97 |                     if ($slideRel["Type"] == self::SCHEMA_SLIDERELATION) {
 98 |                         // Found slide!
 99 |                         $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
100 |                             $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
101 |                         );
102 | 
103 |                         // Search for slide notes
104 |                         $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
105 |                         foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
106 |                             if ($slideNoteRel["Type"] == self::SCHEMA_SLIDENOTESRELATION) {
107 |                                 // Found slide notes!
108 |                                 $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
109 |                                     $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
110 |                                 );
111 | 
112 |                                 break;
113 |                             }
114 |                         }
115 |                     }
116 |                 }
117 | 
118 |                 break;
119 |             }
120 |         }
121 | 
122 |         // Sort slides
123 |         ksort($slides);
124 |         ksort($slideNotes);
125 | 
126 |         // Extract contents from slides
127 |         foreach ($slides as $slideKey => $slide) {
128 |             // Register namespaces
129 |             $slide->registerXPathNamespace("p", self::SCHEMA_PRESENTATIONML);
130 |             $slide->registerXPathNamespace("a", self::SCHEMA_DRAWINGML);
131 | 
132 |             // Fetch all text
133 |             $textElements = $slide->xpath('//a:t');
134 |             foreach ($textElements as $textElement) {
135 |                 $documentBody[] = (string)$textElement;
136 |             }
137 | 
138 |             // Extract contents from slide notes
139 |             if (isset($slideNotes[$slideKey])) {
140 |                 // Fetch slide note
141 |                 $slideNote = $slideNotes[$slideKey];
142 | 
143 |                 // Register namespaces
144 |                 $slideNote->registerXPathNamespace("p", self::SCHEMA_PRESENTATIONML);
145 |                 $slideNote->registerXPathNamespace("a", self::SCHEMA_DRAWINGML);
146 | 
147 |                 // Fetch all text
148 |                 $textElements = $slideNote->xpath('//a:t');
149 |                 foreach ($textElements as $textElement) {
150 |                     $documentBody[] = (string)$textElement;
151 |                 }
152 |             }
153 |         }
154 | 
155 |         // Read core properties
156 |         $coreProperties = $this->extractMetaData($package);
157 | 
158 |         // Close file
159 |         $package->close();
160 | 
161 |         // Store filename
162 |         $this->addField(Field::Text('filename', $fileName, 'UTF-8'));
163 | 
164 |             // Store contents
165 |         if ($storeContent) {
166 |             $this->addField(Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
167 |         } else {
168 |             $this->addField(Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
169 |         }
170 | 
171 |         // Store meta data properties
172 |         foreach ($coreProperties as $key => $value) {
173 |             $this->addField(Field::Text($key, $value, 'UTF-8'));
174 |         }
175 | 
176 |         // Store title (if not present in meta data)
177 |         if (!isset($coreProperties['title'])) {
178 |             $this->addField(Field::Text('title', $fileName, 'UTF-8'));
179 |         }
180 |     }
181 | 
182 |     /**
183 |      * Load Pptx document from a file
184 |      *
185 |      * @param string  $fileName
186 |      * @param boolean $storeContent
187 |      * @return \ZendSearch\Lucene\Document\Pptx
188 |      */
189 |     public static function loadPptxFile($fileName, $storeContent = false)
190 |     {
191 |         return new self($fileName, $storeContent);
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/ExceptionInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  */
17 | interface ExceptionInterface extends \ZendSearch\Exception\ExceptionInterface
18 | {}
19 | 
20 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/ExtensionNotLoadedException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class ExtensionNotLoadedException
14 |     extends \RuntimeException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/InvalidArgumentException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | use ZendSearch\Lucene\Exception\ExceptionInterface;
14 | 
15 | class InvalidArgumentException
16 |     extends \InvalidArgumentException
17 |     implements ExceptionInterface
18 | {}
19 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/InvalidFileFormatException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class InvalidFileFormatException
14 |     extends \RuntimeException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/OutOfBoundsException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class OutOfBoundsException
14 |     extends \OutOfBoundsException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/OutOfRangeException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class OutOfRangeException
14 |     extends \OutOfRangeException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/RuntimeException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class RuntimeException
14 |     extends \RuntimeException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/UnexpectedValueException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class UnexpectedValueException
14 |     extends \UnexpectedValueException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Exception/UnsupportedMethodCallException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Exception;
12 | 
13 | class UnsupportedMethodCallException
14 |     extends \BadMethodCallException
15 |     implements ExceptionInterface
16 | {}
17 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/FSMAction.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene;
12 | 
13 | /**
14 |  * Abstract Finite State Machine
15 |  *
16 |  *
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  */
20 | class FSMAction
21 | {
22 |     /**
23 |      * Object reference
24 |      *
25 |      * @var object
26 |      */
27 |     private $_object;
28 | 
29 |     /**
30 |      * Method name
31 |      *
32 |      * @var string
33 |      */
34 |     private $_method;
35 | 
36 |     /**
37 |      * Object constructor
38 |      *
39 |      * @param object $object
40 |      * @param string $method
41 |      */
42 |     public function __construct($object, $method)
43 |     {
44 |         $this->_object = $object;
45 |         $this->_method = $method;
46 |     }
47 | 
48 |     public function doAction()
49 |     {
50 |         $methodName = $this->_method;
51 |         $this->_object->$methodName();
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/DocsFilter.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index;
12 | 
13 | /**
14 |  * A Zend_Search_Lucene_Index_DocsFilter is used to filter documents while searching.
15 |  *
16 |  * It may or _may_not_ be used for actual filtering, so it's just a hint that upper query limits
17 |  * search result by specified list.
18 |  *
19 |  * @category   Zend
20 |  * @package    Zend_Search_Lucene
21 |  * @subpackage Index
22 |  */
23 | class DocsFilter
24 | {
25 |     /**
26 |      * Set of segment filters:
27 |      *  array( <segmentName> => array(<docId> => <undefined_value>,
28 |      *                                <docId> => <undefined_value>,
29 |      *                                <docId> => <undefined_value>,
30 |      *                                ...                          ),
31 |      *         <segmentName> => array(<docId> => <undefined_value>,
32 |      *                                <docId> => <undefined_value>,
33 |      *                                <docId> => <undefined_value>,
34 |      *                                ...                          ),
35 |      *         <segmentName> => array(<docId> => <undefined_value>,
36 |      *                                <docId> => <undefined_value>,
37 |      *                                <docId> => <undefined_value>,
38 |      *                                ...                          ),
39 |      *         ...
40 |      *       )
41 |      *
42 |      * @var array
43 |      */
44 |     public $segmentFilters = array();
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/FieldInfo.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  * @subpackage Index
17 |  */
18 | class FieldInfo
19 | {
20 |     public $name;
21 |     public $isIndexed;
22 |     public $number;
23 |     public $storeTermVector;
24 |     public $normsOmitted;
25 |     public $payloadsStored;
26 | 
27 |     public function __construct($name, $isIndexed, $number, $storeTermVector, $normsOmitted = false, $payloadsStored = false)
28 |     {
29 |         $this->name            = $name;
30 |         $this->isIndexed       = $isIndexed;
31 |         $this->number          = $number;
32 |         $this->storeTermVector = $storeTermVector;
33 |         $this->normsOmitted    = $normsOmitted;
34 |         $this->payloadsStored  = $payloadsStored;
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/SegmentMerger.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Index;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Document;
 15 | use ZendSearch\Lucene\Exception\RuntimeException;
 16 | use ZendSearch\Lucene\Storage\Directory;
 17 | 
 18 | /**
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  * @subpackage Index
 22 |  */
 23 | class SegmentMerger
 24 | {
 25 |     /**
 26 |      * Target segment writer
 27 |      *
 28 |      * @var \ZendSearch\Lucene\Index\SegmentWriter\StreamWriter
 29 |      */
 30 |     private $_writer;
 31 | 
 32 |     /**
 33 |      * Number of docs in a new segment
 34 |      *
 35 |      * @var integer
 36 |      */
 37 |     private $_docCount;
 38 | 
 39 |     /**
 40 |      * A set of segments to be merged
 41 |      *
 42 |      * @var array|\ZendSearch\Lucene\Index\SegmentInfo
 43 |      */
 44 |     private $_segmentInfos = array();
 45 | 
 46 |     /**
 47 |      * Flag to signal, that merge is already done
 48 |      *
 49 |      * @var boolean
 50 |      */
 51 |     private $_mergeDone = false;
 52 | 
 53 |     /**
 54 |      * Field map
 55 |      * [<segment_name>][<field_number>] => <target_field_number>
 56 |      *
 57 |      * @var array
 58 |      */
 59 |     private $_fieldsMap = array();
 60 | 
 61 | 
 62 | 
 63 |     /**
 64 |      * Object constructor.
 65 |      *
 66 |      * Creates new segment merger with $directory as target to merge segments into
 67 |      * and $name as a name of new segment
 68 |      *
 69 |      * @param \ZendSearch\Lucene\Storage\Directory\DirectoryInterface $directory
 70 |      * @param string $name
 71 |      */
 72 |     public function __construct(Directory\DirectoryInterface $directory, $name)
 73 |     {
 74 |         /** \ZendSearch\Lucene\Index\SegmentWriter\StreamWriter */
 75 |         $this->_writer = new SegmentWriter\StreamWriter($directory, $name);
 76 |     }
 77 | 
 78 | 
 79 |     /**
 80 |      * Add segmnet to a collection of segments to be merged
 81 |      *
 82 |      * @param \ZendSearch\Lucene\Index\SegmentInfo $segment
 83 |      */
 84 |     public function addSource(SegmentInfo $segmentInfo)
 85 |     {
 86 |         $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
 87 |     }
 88 | 
 89 | 
 90 |     /**
 91 |      * Do merge.
 92 |      *
 93 |      * Returns number of documents in newly created segment
 94 |      *
 95 |      * @return \ZendSearch\Lucene\Index\SegmentInfo
 96 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 97 |      */
 98 |     public function merge()
 99 |     {
100 |         if ($this->_mergeDone) {
101 |             throw new RuntimeException('Merge is already done.');
102 |         }
103 | 
104 |         if (count($this->_segmentInfos) < 1) {
105 |             throw new RuntimeException('Wrong number of segments to be merged ('
106 |                                                  . count($this->_segmentInfos)
107 |                                                  . ').');
108 |         }
109 | 
110 |         $this->_mergeFields();
111 |         $this->_mergeNorms();
112 |         $this->_mergeStoredFields();
113 |         $this->_mergeTerms();
114 | 
115 |         $this->_mergeDone = true;
116 | 
117 |         return $this->_writer->close();
118 |     }
119 | 
120 | 
121 |     /**
122 |      * Merge fields information
123 |      */
124 |     private function _mergeFields()
125 |     {
126 |         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
127 |             foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
128 |                 $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
129 |             }
130 |         }
131 |     }
132 | 
133 |     /**
134 |      * Merge field's normalization factors
135 |      */
136 |     private function _mergeNorms()
137 |     {
138 |         foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
139 |             if ($fieldInfo->isIndexed) {
140 |                 foreach ($this->_segmentInfos as $segName => $segmentInfo) {
141 |                     if ($segmentInfo->hasDeletions()) {
142 |                         $srcNorm = $segmentInfo->normVector($fieldInfo->name);
143 |                         $norm    = '';
144 |                         $docs    = $segmentInfo->count();
145 |                         for ($count = 0; $count < $docs; $count++) {
146 |                             if (!$segmentInfo->isDeleted($count)) {
147 |                                 $norm .= $srcNorm[$count];
148 |                             }
149 |                         }
150 |                         $this->_writer->addNorm($fieldInfo->name, $norm);
151 |                     } else {
152 |                         $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
153 |                     }
154 |                 }
155 |             }
156 |         }
157 |     }
158 | 
159 |     /**
160 |      * Merge fields information
161 |      */
162 |     private function _mergeStoredFields()
163 |     {
164 |         $this->_docCount = 0;
165 | 
166 |         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
167 |             $fdtFile = $segmentInfo->openCompoundFile('.fdt');
168 | 
169 |             for ($count = 0; $count < $segmentInfo->count(); $count++) {
170 |                 $fieldCount = $fdtFile->readVInt();
171 |                 $storedFields = array();
172 | 
173 |                 for ($count2 = 0; $count2 < $fieldCount; $count2++) {
174 |                     $fieldNum = $fdtFile->readVInt();
175 |                     $bits = $fdtFile->readByte();
176 |                     $fieldInfo = $segmentInfo->getField($fieldNum);
177 | 
178 |                     if (!($bits & 2)) { // Text data
179 |                         $storedFields[] =
180 |                                  new Document\Field($fieldInfo->name,
181 |                                                     $fdtFile->readString(),
182 |                                                     'UTF-8',
183 |                                                     true,
184 |                                                     $fieldInfo->isIndexed,
185 |                                                     $bits & 1 );
186 |                     } else {            // Binary data
187 |                         $storedFields[] =
188 |                                  new Document\Field($fieldInfo->name,
189 |                                                     $fdtFile->readBinary(),
190 |                                                     '',
191 |                                                     true,
192 |                                                     $fieldInfo->isIndexed,
193 |                                                     $bits & 1,
194 |                                                     true);
195 |                     }
196 |                 }
197 | 
198 |                 if (!$segmentInfo->isDeleted($count)) {
199 |                     $this->_docCount++;
200 |                     $this->_writer->addStoredFields($storedFields);
201 |                 }
202 |             }
203 |         }
204 |     }
205 | 
206 | 
207 |     /**
208 |      * Merge fields information
209 |      */
210 |     private function _mergeTerms()
211 |     {
212 |         $segmentInfoQueue = new TermsPriorityQueue();
213 | 
214 |         $segmentStartId = 0;
215 |         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
216 |             $segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, SegmentInfo::SM_MERGE_INFO);
217 | 
218 |             // Skip "empty" segments
219 |             if ($segmentInfo->currentTerm() !== null) {
220 |                 $segmentInfoQueue->put($segmentInfo);
221 |             }
222 |         }
223 | 
224 |         $this->_writer->initializeDictionaryFiles();
225 | 
226 |         $termDocs = array();
227 |         while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
228 |             // Merge positions array
229 |             $termDocs += $segmentInfo->currentTermPositions();
230 | 
231 |             if ($segmentInfoQueue->top() === null ||
232 |                 $segmentInfoQueue->top()->currentTerm()->key() !=
233 |                             $segmentInfo->currentTerm()->key()) {
234 |                 // We got new term
235 |                 ksort($termDocs, SORT_NUMERIC);
236 | 
237 |                 // Add term if it's contained in any document
238 |                 if (count($termDocs) > 0) {
239 |                     $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
240 |                 }
241 |                 $termDocs = array();
242 |             }
243 | 
244 |             $segmentInfo->nextTerm();
245 |             // check, if segment dictionary is finished
246 |             if ($segmentInfo->currentTerm() !== null) {
247 |                 // Put segment back into the priority queue
248 |                 $segmentInfoQueue->put($segmentInfo);
249 |             }
250 |         }
251 | 
252 |         $this->_writer->closeDictionaryFiles();
253 |     }
254 | }
255 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/SegmentWriter/DocumentWriter.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Index\SegmentWriter;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Analysis\Analyzer;
 15 | use ZendSearch\Lucene\Document;
 16 | use ZendSearch\Lucene\Exception as LuceneException;
 17 | use ZendSearch\Lucene\Index;
 18 | use ZendSearch\Lucene\Search\Similarity\AbstractSimilarity;
 19 | use ZendSearch\Lucene\Storage\Directory;
 20 | 
 21 | /**
 22 |  * @category   Zend
 23 |  * @package    Zend_Search_Lucene
 24 |  * @subpackage Index
 25 |  */
 26 | class DocumentWriter extends AbstractSegmentWriter
 27 | {
 28 |     /**
 29 |      * Term Dictionary
 30 |      * Array of the Zend_Search_Lucene_Index_Term objects
 31 |      * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
 32 |      *
 33 |      * @var array
 34 |      */
 35 |     protected $_termDictionary;
 36 | 
 37 |     /**
 38 |      * Documents, which contain the term
 39 |      *
 40 |      * @var array
 41 |      */
 42 |     protected $_termDocs;
 43 | 
 44 |     /**
 45 |      * Object constructor.
 46 |      *
 47 |      * @param Directory\DirectoryInterface $directory
 48 |      * @param string $name
 49 |      */
 50 |     public function __construct(Directory\DirectoryInterface $directory, $name)
 51 |     {
 52 |         parent::__construct($directory, $name);
 53 | 
 54 |         $this->_termDocs       = array();
 55 |         $this->_termDictionary = array();
 56 |     }
 57 | 
 58 | 
 59 |     /**
 60 |      * Adds a document to this segment.
 61 |      *
 62 |      * @param \ZendSearch\Lucene\Document $document
 63 |      * @throws LuceneException\UnsupportedMethodCallException
 64 |      */
 65 |     public function addDocument(Document $document)
 66 |     {
 67 |         $storedFields = array();
 68 |         $docNorms     = array();
 69 |         $similarity   = AbstractSimilarity::getDefault();
 70 | 
 71 |         foreach ($document->getFieldNames() as $fieldName) {
 72 |             $field = $document->getField($fieldName);
 73 | 
 74 |             if ($field->storeTermVector) {
 75 |                 /**
 76 |                  * @todo term vector storing support
 77 |                  */
 78 |                 throw new LuceneException\UnsupportedMethodCallException('Store term vector functionality is not supported yet.');
 79 |             }
 80 | 
 81 |             if ($field->isIndexed) {
 82 |                 if ($field->isTokenized) {
 83 |                     $analyzer = Analyzer\Analyzer::getDefault();
 84 |                     $analyzer->setInput($field->value, $field->encoding);
 85 | 
 86 |                     $position     = 0;
 87 |                     $tokenCounter = 0;
 88 |                     while (($token = $analyzer->nextToken()) !== null) {
 89 |                         $tokenCounter++;
 90 | 
 91 |                         $term = new Index\Term($token->getTermText(), $field->name);
 92 |                         $termKey = $term->key();
 93 | 
 94 |                         if (!isset($this->_termDictionary[$termKey])) {
 95 |                             // New term
 96 |                             $this->_termDictionary[$termKey] = $term;
 97 |                             $this->_termDocs[$termKey] = array();
 98 |                             $this->_termDocs[$termKey][$this->_docCount] = array();
 99 |                         } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
100 |                             // Existing term, but new term entry
101 |                             $this->_termDocs[$termKey][$this->_docCount] = array();
102 |                         }
103 |                         $position += $token->getPositionIncrement();
104 |                         $this->_termDocs[$termKey][$this->_docCount][] = $position;
105 |                     }
106 | 
107 |                     if ($tokenCounter == 0) {
108 |                         // Field contains empty value. Treat it as non-indexed and non-tokenized
109 |                         $field = clone($field);
110 |                         $field->isIndexed = $field->isTokenized = false;
111 |                     } else {
112 |                         $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name,
113 |                                                                                                        $tokenCounter)*
114 |                                                                                $document->boost*
115 |                                                                                $field->boost ));
116 |                     }
117 |                 } elseif (($fieldUtf8Value = $field->getUtf8Value()) == '') {
118 |                     // Field contains empty value. Treat it as non-indexed and non-tokenized
119 |                     $field = clone($field);
120 |                     $field->isIndexed = $field->isTokenized = false;
121 |                 } else {
122 |                     $term = new Index\Term($fieldUtf8Value, $field->name);
123 |                     $termKey = $term->key();
124 | 
125 |                     if (!isset($this->_termDictionary[$termKey])) {
126 |                         // New term
127 |                         $this->_termDictionary[$termKey] = $term;
128 |                         $this->_termDocs[$termKey] = array();
129 |                         $this->_termDocs[$termKey][$this->_docCount] = array();
130 |                     } elseif (!isset($this->_termDocs[$termKey][$this->_docCount])) {
131 |                         // Existing term, but new term entry
132 |                         $this->_termDocs[$termKey][$this->_docCount] = array();
133 |                     }
134 |                     $this->_termDocs[$termKey][$this->_docCount][] = 0; // position
135 | 
136 |                     $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)*
137 |                                                                            $document->boost*
138 |                                                                            $field->boost ));
139 |                 }
140 |             }
141 | 
142 |             if ($field->isStored) {
143 |                 $storedFields[] = $field;
144 |             }
145 | 
146 |             $this->addField($field);
147 |         }
148 | 
149 |         foreach ($this->_fields as $fieldName => $field) {
150 |             if (!$field->isIndexed) {
151 |                 continue;
152 |             }
153 | 
154 |             if (!isset($this->_norms[$fieldName])) {
155 |                 $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
156 |                                                        $this->_docCount);
157 |             }
158 | 
159 |             if (isset($docNorms[$fieldName])){
160 |                 $this->_norms[$fieldName] .= $docNorms[$fieldName];
161 |             } else {
162 |                 $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) ));
163 |             }
164 |         }
165 | 
166 |         $this->addStoredFields($storedFields);
167 |     }
168 | 
169 | 
170 |     /**
171 |      * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
172 |      */
173 |     protected function _dumpDictionary()
174 |     {
175 |         ksort($this->_termDictionary, SORT_STRING);
176 | 
177 |         $this->initializeDictionaryFiles();
178 | 
179 |         foreach ($this->_termDictionary as $termId => $term) {
180 |             $this->addTerm($term, $this->_termDocs[$termId]);
181 |         }
182 | 
183 |         $this->closeDictionaryFiles();
184 |     }
185 | 
186 | 
187 |     /**
188 |      * Close segment, write it to disk and return segment info
189 |      *
190 |      * @return \ZendSearch\Lucene\Index\SegmentInfo
191 |      */
192 |     public function close()
193 |     {
194 |         if ($this->_docCount == 0) {
195 |             return null;
196 |         }
197 | 
198 |         $this->_dumpFNM();
199 |         $this->_dumpDictionary();
200 | 
201 |         $this->_generateCFS();
202 | 
203 |         return new Index\SegmentInfo($this->_directory,
204 |                                      $this->_name,
205 |                                      $this->_docCount,
206 |                                      -1,
207 |                                      null,
208 |                                      true,
209 |                                      true);
210 |     }
211 | 
212 | }
213 | 
214 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/SegmentWriter/StreamWriter.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index\SegmentWriter;
12 | 
13 | use ZendSearch\Lucene\Index as LuceneIndex;
14 | use ZendSearch\Lucene\Storage\Directory;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Index
20 |  */
21 | class StreamWriter extends AbstractSegmentWriter
22 | {
23 |     /**
24 |      * Object constructor.
25 |      *
26 |      * @param Directory\DirectoryInterface $directory
27 |      * @param string $name
28 |      */
29 |     public function __construct(Directory\DirectoryInterface $directory, $name)
30 |     {
31 |         parent::__construct($directory, $name);
32 |     }
33 | 
34 | 
35 |     /**
36 |      * Create stored fields files and open them for write
37 |      */
38 |     public function createStoredFieldsFiles()
39 |     {
40 |         $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
41 |         $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
42 | 
43 |         $this->_files[] = $this->_name . '.fdx';
44 |         $this->_files[] = $this->_name . '.fdt';
45 |     }
46 | 
47 |     public function addNorm($fieldName, $normVector)
48 |     {
49 |         if (isset($this->_norms[$fieldName])) {
50 |             $this->_norms[$fieldName] .= $normVector;
51 |         } else {
52 |             $this->_norms[$fieldName] = $normVector;
53 |         }
54 |     }
55 | 
56 |     /**
57 |      * Close segment, write it to disk and return segment info
58 |      *
59 |      * @return \ZendSearch\Lucene\Index\SegmentInfo
60 |      */
61 |     public function close()
62 |     {
63 |         if ($this->_docCount == 0) {
64 |             return null;
65 |         }
66 | 
67 |         $this->_dumpFNM();
68 |         $this->_generateCFS();
69 | 
70 |         return new LuceneIndex\SegmentInfo($this->_directory,
71 |                                            $this->_name,
72 |                                            $this->_docCount,
73 |                                            -1,
74 |                                            null,
75 |                                            true,
76 |                                            true);
77 |     }
78 | }
79 | 
80 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/Term.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Index;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | 
 15 | /**
 16 |  * A Term represents a word from text.  This is the unit of search.  It is
 17 |  * composed of two elements, the text of the word, as a string, and the name of
 18 |  * the field that the text occured in, an interned string.
 19 |  *
 20 |  * Note that terms may represent more than words from text fields, but also
 21 |  * things like dates, email addresses, urls, etc.
 22 |  *
 23 |  * @category   Zend
 24 |  * @package    Zend_Search_Lucene
 25 |  * @subpackage Index
 26 |  */
 27 | class Term
 28 | {
 29 |     /**
 30 |      * Field name or field number (depending from context)
 31 |      *
 32 |      * @var mixed
 33 |      */
 34 |     public $field;
 35 | 
 36 |     /**
 37 |      * Term value
 38 |      *
 39 |      * @var string
 40 |      */
 41 |     public $text;
 42 | 
 43 | 
 44 |     /**
 45 |      * Object constructor
 46 |      */
 47 |     public function __construct($text, $field = null)
 48 |     {
 49 |         $this->field = ($field === null)?  Lucene\Lucene::getDefaultSearchField() : $field;
 50 |         $this->text  = $text;
 51 |     }
 52 | 
 53 | 
 54 |     /**
 55 |      * Returns term key
 56 |      *
 57 |      * @return string
 58 |      */
 59 |     public function key()
 60 |     {
 61 |         return $this->field . chr(0) . $this->text;
 62 |     }
 63 | 
 64 |     /**
 65 |      * Get term prefix
 66 |      *
 67 |      * @param string $str
 68 |      * @param integer $length
 69 |      * @return string
 70 |      */
 71 |     public static function getPrefix($str, $length)
 72 |     {
 73 |         /**
 74 |          * @todo !!!!!!! use mb_string or iconv functions if they are available
 75 |          */
 76 |         $prefixBytes = 0;
 77 |         $prefixChars = 0;
 78 |         while (isset($str[$prefixBytes])  &&  $prefixChars < $length) {
 79 |             $charBytes = 1;
 80 |             if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
 81 |                 $charBytes++;
 82 |                 if (ord($str[$prefixBytes]) & 0x20 ) {
 83 |                     $charBytes++;
 84 |                     if (ord($str[$prefixBytes]) & 0x10 ) {
 85 |                         $charBytes++;
 86 |                     }
 87 |                 }
 88 |             }
 89 | 
 90 |             if (! isset($str[$prefixBytes + $charBytes - 1])) {
 91 |                 // wrong character
 92 |                 break;
 93 |             }
 94 | 
 95 |             $prefixChars++;
 96 |             $prefixBytes += $charBytes;
 97 |         }
 98 | 
 99 |         return substr($str, 0, $prefixBytes);
100 |     }
101 | 
102 |     /**
103 |      * Get UTF-8 string length
104 |      *
105 |      * @param string $str
106 |      * @return string
107 |      */
108 |     public static function getLength($str)
109 |     {
110 |         $bytes = 0;
111 |         $chars = 0;
112 |         while ($bytes < strlen($str)) {
113 |             $charBytes = 1;
114 |             if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
115 |                 $charBytes++;
116 |                 if (ord($str[$bytes]) & 0x20 ) {
117 |                     $charBytes++;
118 |                     if (ord($str[$bytes]) & 0x10 ) {
119 |                         $charBytes++;
120 |                     }
121 |                 }
122 |             }
123 | 
124 |             if ($bytes + $charBytes > strlen($str)) {
125 |                 // wrong character
126 |                 break;
127 |             }
128 | 
129 |             $chars++;
130 |             $bytes += $charBytes;
131 |         }
132 | 
133 |         return $chars;
134 |     }
135 | }
136 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/TermInfo.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index;
12 | 
13 | /**
14 |  * A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
15 |  *
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Index
19 |  */
20 | class TermInfo
21 | {
22 |     /**
23 |      * The number of documents which contain the term.
24 |      *
25 |      * @var integer
26 |      */
27 |     public $docFreq;
28 | 
29 |     /**
30 |      * Data offset in a Frequencies file.
31 |      *
32 |      * @var integer
33 |      */
34 |     public $freqPointer;
35 | 
36 |     /**
37 |      * Data offset in a Positions file.
38 |      *
39 |      * @var integer
40 |      */
41 |     public $proxPointer;
42 | 
43 |     /**
44 |      * ScipData offset in a Frequencies file.
45 |      *
46 |      * @var integer
47 |      */
48 |     public $skipOffset;
49 | 
50 |     /**
51 |      * Term offset of the _next_ term in a TermDictionary file.
52 |      * Used only for Term Index
53 |      *
54 |      * @var integer
55 |      */
56 |     public $indexPointer;
57 | 
58 |     public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
59 |     {
60 |         $this->docFreq      = $docFreq;
61 |         $this->freqPointer  = $freqPointer;
62 |         $this->proxPointer  = $proxPointer;
63 |         $this->skipOffset   = $skipOffset;
64 |         $this->indexPointer = $indexPointer;
65 |     }
66 | }
67 | 
68 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/TermsPriorityQueue.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index;
12 | 
13 | use ZendSearch\Lucene;
14 | 
15 | /** @todo !!!!!! convert to SPL class usage */
16 | 
17 | /**
18 |  * @category   Zend
19 |  * @package    Zend_Search_Lucene
20 |  * @subpackage Index
21 |  */
22 | class TermsPriorityQueue extends Lucene\AbstractPriorityQueue
23 | {
24 |     /**
25 |      * Compare elements
26 |      *
27 |      * Returns true, if $termsStream1 is "less" than $termsStream2; else otherwise
28 |      *
29 |      * @param mixed $termsStream1
30 |      * @param mixed $termsStream2
31 |      * @return boolean
32 |      */
33 |     protected function _less($termsStream1, $termsStream2)
34 |     {
35 |         return strcmp($termsStream1->currentTerm()->key(), $termsStream2->currentTerm()->key()) < 0;
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Index/TermsStreamInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Index;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  * @subpackage Index
17 |  */
18 | interface TermsStreamInterface
19 | {
20 |     /**
21 |      * Reset terms stream.
22 |      */
23 |     public function resetTermsStream();
24 | 
25 |     /**
26 |      * Skip terms stream up to specified term preffix.
27 |      *
28 |      * Prefix contains fully specified field info and portion of searched term
29 |      *
30 |      * @param \ZendSearch\Lucene\Index\Term $prefix
31 |      */
32 |     public function skipTo(Term $prefix);
33 | 
34 |     /**
35 |      * Scans terms dictionary and returns next term
36 |      *
37 |      * @return \ZendSearch\Lucene\Index\Term|null
38 |      */
39 |     public function nextTerm();
40 | 
41 |     /**
42 |      * Returns term in current position
43 |      *
44 |      * @return \ZendSearch\Lucene\Index\Term|null
45 |      */
46 |     public function currentTerm();
47 | 
48 |     /**
49 |      * Close terms stream
50 |      *
51 |      * Should be used for resources clean up if stream is not read up to the end
52 |      */
53 |     public function closeTermsStream();
54 | }
55 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/LockManager.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene;
 12 | 
 13 | use ZendSearch\Lucene\Exception\RuntimeException;
 14 | use ZendSearch\Lucene\Storage\Directory\DirectoryInterface as Directory;
 15 | 
 16 | /**
 17 |  * This is an utility class which provides index locks processing functionality
 18 |  *
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  */
 22 | class LockManager
 23 | {
 24 |     /**
 25 |      * consts for name of file to show lock status
 26 |      */
 27 |     const WRITE_LOCK_FILE                = 'write.lock.file';
 28 |     const READ_LOCK_FILE                 = 'read.lock.file';
 29 |     const READ_LOCK_PROCESSING_LOCK_FILE = 'read-lock-processing.lock.file';
 30 |     const OPTIMIZATION_LOCK_FILE         = 'optimization.lock.file';
 31 | 
 32 |     /**
 33 |      * Obtain exclusive write lock on the index
 34 |      *
 35 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
 36 |      * @return \ZendSearch\Lucene\Storage\File\FileInterface
 37 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 38 |      */
 39 |     public static function obtainWriteLock(Directory $lockDirectory)
 40 |     {
 41 |         $lock = $lockDirectory->createFile(self::WRITE_LOCK_FILE);
 42 |         if (!$lock->lock(LOCK_EX)) {
 43 |             throw new RuntimeException('Can\'t obtain exclusive index lock');
 44 |         }
 45 |         return $lock;
 46 |     }
 47 | 
 48 |     /**
 49 |      * Release exclusive write lock
 50 |      *
 51 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
 52 |      */
 53 |     public static function releaseWriteLock(Directory $lockDirectory)
 54 |     {
 55 |         $lock = $lockDirectory->getFileObject(self::WRITE_LOCK_FILE);
 56 |         $lock->unlock();
 57 |     }
 58 | 
 59 |     /**
 60 |      * Obtain the exclusive "read escalation/de-escalation" lock
 61 |      *
 62 |      * Required to protect the escalate/de-escalate read lock process
 63 |      * on GFS (and potentially other) mounted filesystems.
 64 |      *
 65 |      * Why we need this:
 66 |      *  While GFS supports cluster-wide locking via flock(), it's
 67 |      *  implementation isn't quite what it should be.  The locking
 68 |      *  semantics that work consistently on a local filesystem tend to
 69 |      *  fail on GFS mounted filesystems.  This appears to be a design defect
 70 |      *  in the implementation of GFS.  How this manifests itself is that
 71 |      *  conditional promotion of a shared lock to exclusive will always
 72 |      *  fail, lock release requests are honored but not immediately
 73 |      *  processed (causing erratic failures of subsequent conditional
 74 |      *  requests) and the releasing of the exclusive lock before the
 75 |      *  shared lock is set when a lock is demoted (which can open a window
 76 |      *  of opportunity for another process to gain an exclusive lock when
 77 |      *  it shoudln't be allowed to).
 78 |      *
 79 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
 80 |      * @return \ZendSearch\Lucene\Storage\File\FileInterface
 81 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 82 |      */
 83 |     private static function _startReadLockProcessing(Directory $lockDirectory)
 84 |     {
 85 |         $lock = $lockDirectory->createFile(self::READ_LOCK_PROCESSING_LOCK_FILE);
 86 |         if (!$lock->lock(LOCK_EX)) {
 87 |             throw new RuntimeException('Can\'t obtain exclusive lock for the read lock processing file');
 88 |         }
 89 |         return $lock;
 90 |     }
 91 | 
 92 |     /**
 93 |      * Release the exclusive "read escalation/de-escalation" lock
 94 |      *
 95 |      * Required to protect the escalate/de-escalate read lock process
 96 |      * on GFS (and potentially other) mounted filesystems.
 97 |      *
 98 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
 99 |      */
100 |     private static function _stopReadLockProcessing(Directory $lockDirectory)
101 |     {
102 |         $lock = $lockDirectory->getFileObject(self::READ_LOCK_PROCESSING_LOCK_FILE);
103 |         $lock->unlock();
104 |     }
105 | 
106 | 
107 |     /**
108 |      * Obtain shared read lock on the index
109 |      *
110 |      * It doesn't block other read or update processes, but prevent index from the premature cleaning-up
111 |      *
112 |      * @param \ZendSearch\Lucene\Storage\Directory $defaultLockDirectory
113 |      * @return \ZendSearch\Lucene\Storage\File\FileInterface
114 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
115 |      */
116 |     public static function obtainReadLock(Directory $lockDirectory)
117 |     {
118 |         $lock = $lockDirectory->createFile(self::READ_LOCK_FILE);
119 |         if (!$lock->lock(LOCK_SH)) {
120 |             throw new RuntimeException('Can\'t obtain shared reading index lock');
121 |         }
122 |         return $lock;
123 |     }
124 | 
125 |     /**
126 |      * Release shared read lock
127 |      *
128 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
129 |      */
130 |     public static function releaseReadLock(Directory $lockDirectory)
131 |     {
132 |         $lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
133 |         $lock->unlock();
134 |     }
135 | 
136 |     /**
137 |      * Escalate Read lock to exclusive level
138 |      *
139 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
140 |      * @return boolean
141 |      */
142 |     public static function escalateReadLock(Directory $lockDirectory)
143 |     {
144 |         self::_startReadLockProcessing($lockDirectory);
145 | 
146 |         $lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
147 | 
148 |         // First, release the shared lock for the benefit of GFS since
149 |         // it will fail the conditional request to promote the lock to
150 |         // "exclusive" while the shared lock is held (even when we are
151 |         // the only holder).
152 |         $lock->unlock();
153 | 
154 |         // GFS is really poor.  While the above "unlock" returns, GFS
155 |         // doesn't clean up it's tables right away (which will potentially
156 |         // cause the conditional locking for the "exclusive" lock to fail.
157 |         // We will retry the conditional lock request several times on a
158 |         // failure to get past this.  The performance hit is negligible
159 |         // in the grand scheme of things and only will occur with GFS
160 |         // filesystems or if another local process has the shared lock
161 |         // on local filesystems.
162 |         for ($retries = 0; $retries < 10; $retries++) {
163 |             if ($lock->lock(LOCK_EX, true)) {
164 |                 // Exclusive lock is obtained!
165 |                 self::_stopReadLockProcessing($lockDirectory);
166 |                 return true;
167 |             }
168 | 
169 |             // wait 1 microsecond
170 |             usleep(1);
171 |         }
172 | 
173 |         // Restore lock state
174 |         $lock->lock(LOCK_SH);
175 | 
176 |         self::_stopReadLockProcessing($lockDirectory);
177 |         return false;
178 |     }
179 | 
180 |     /**
181 |      * De-escalate Read lock to shared level
182 |      *
183 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
184 |      */
185 |     public static function deEscalateReadLock(Directory $lockDirectory)
186 |     {
187 |         $lock = $lockDirectory->getFileObject(self::READ_LOCK_FILE);
188 |         $lock->lock(LOCK_SH);
189 |     }
190 | 
191 |     /**
192 |      * Obtain exclusive optimization lock on the index
193 |      *
194 |      * Returns lock object on success and false otherwise (doesn't block execution)
195 |      *
196 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
197 |      * @return mixed
198 |      */
199 |     public static function obtainOptimizationLock(Directory $lockDirectory)
200 |     {
201 |         $lock = $lockDirectory->createFile(self::OPTIMIZATION_LOCK_FILE);
202 |         if (!$lock->lock(LOCK_EX, true)) {
203 |             return false;
204 |         }
205 |         return $lock;
206 |     }
207 | 
208 |     /**
209 |      * Release exclusive optimization lock
210 |      *
211 |      * @param \ZendSearch\Lucene\Storage\Directory $lockDirectory
212 |      */
213 |     public static function releaseOptimizationLock(Directory $lockDirectory)
214 |     {
215 |         $lock = $lockDirectory->getFileObject(self::OPTIMIZATION_LOCK_FILE);
216 |         $lock->unlock();
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Lucene.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene;
 12 | 
 13 | use ZendSearch\Lucene\Exception\UnsupportedMethodCallException;
 14 | 
 15 | /**
 16 |  * @category   Zend
 17 |  * @package    Zend_Search_Lucene
 18 |  */
 19 | class Lucene
 20 | {
 21 |     /**
 22 |      * Default field name for search
 23 |      *
 24 |      * Null means search through all fields
 25 |      *
 26 |      * @var string
 27 |      */
 28 |     private static $_defaultSearchField = null;
 29 | 
 30 |     /**
 31 |      * Result set limit
 32 |      *
 33 |      * 0 means no limit
 34 |      *
 35 |      * @var integer
 36 |      */
 37 |     private static $_resultSetLimit = 0;
 38 | 
 39 |     /**
 40 |      * Terms per query limit
 41 |      *
 42 |      * 0 means no limit
 43 |      *
 44 |      * @var integer
 45 |      */
 46 |     private static $_termsPerQueryLimit = 1024;
 47 | 
 48 |     /**
 49 |      * Create index
 50 |      *
 51 |      * @param mixed $directory
 52 |      * @return \ZendSearch\Lucene\SearchIndexInterface
 53 |      */
 54 |     public static function create($directory)
 55 |     {
 56 |         return new Index($directory, true);
 57 |     }
 58 | 
 59 |     /**
 60 |      * Open index
 61 |      *
 62 |      * @param mixed $directory
 63 |      * @return \ZendSearch\Lucene\SearchIndexInterface
 64 |      */
 65 |     public static function open($directory)
 66 |     {
 67 |         return new Index($directory, false);
 68 |     }
 69 | 
 70 |     /**
 71 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 72 |      */
 73 |     public function __construct()
 74 |     {
 75 |         throw new UnsupportedMethodCallException('\ZendSearch\Lucene class is the only container for static methods. Use Lucene::open() or Lucene::create() methods.');
 76 |     }
 77 | 
 78 |     /**
 79 |      * Set default search field.
 80 |      *
 81 |      * Null means, that search is performed through all fields by default
 82 |      *
 83 |      * Default value is null
 84 |      *
 85 |      * @param string $fieldName
 86 |      */
 87 |     public static function setDefaultSearchField($fieldName)
 88 |     {
 89 |         self::$_defaultSearchField = $fieldName;
 90 |     }
 91 | 
 92 |     /**
 93 |      * Get default search field.
 94 |      *
 95 |      * Null means, that search is performed through all fields by default
 96 |      *
 97 |      * @return string
 98 |      */
 99 |     public static function getDefaultSearchField()
100 |     {
101 |         return self::$_defaultSearchField;
102 |     }
103 | 
104 |     /**
105 |      * Set result set limit.
106 |      *
107 |      * 0 (default) means no limit
108 |      *
109 |      * @param integer $limit
110 |      */
111 |     public static function setResultSetLimit($limit)
112 |     {
113 |         self::$_resultSetLimit = $limit;
114 |     }
115 | 
116 |     /**
117 |      * Get result set limit.
118 |      *
119 |      * 0 means no limit
120 |      *
121 |      * @return integer
122 |      */
123 |     public static function getResultSetLimit()
124 |     {
125 |         return self::$_resultSetLimit;
126 |     }
127 | 
128 |     /**
129 |      * Set terms per query limit.
130 |      *
131 |      * 0 means no limit
132 |      *
133 |      * @param integer $limit
134 |      */
135 |     public static function setTermsPerQueryLimit($limit)
136 |     {
137 |         self::$_termsPerQueryLimit = $limit;
138 |     }
139 | 
140 |     /**
141 |      * Get result set limit.
142 |      *
143 |      * 0 (default) means no limit
144 |      *
145 |      * @return integer
146 |      */
147 |     public static function getTermsPerQueryLimit()
148 |     {
149 |         return self::$_termsPerQueryLimit;
150 |     }
151 | }
152 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Exception/ExceptionInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Exception;
12 | 
13 | interface ExceptionInterface
14 |     extends \ZendSearch\Lucene\Exception\ExceptionInterface
15 | {}
16 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Exception/QueryParserException.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Exception;
12 | 
13 | use ZendSearch\Lucene\Exception;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Search
19 |  *
20 |  * Special exception type, which may be used to intercept wrong user input
21 |  */
22 | class QueryParserException
23 |     extends Exception\UnexpectedValueException
24 |     implements ExceptionInterface
25 | {}
26 | 
27 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Highlighter/DefaultHighlighter.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Highlighter;
12 | 
13 | use ZendSearch\Lucene\Document;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Search
19 |  */
20 | class DefaultHighlighter implements HighlighterInterface
21 | {
22 |     /**
23 |      * List of colors for text highlighting
24 |      *
25 |      * @var array
26 |      */
27 |     protected $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
28 |                                         '#ff8888', '#88ff88', '#8888ff',
29 |                                         '#88dddd', '#dd88dd', '#dddd88',
30 |                                         '#aaddff', '#aaffdd', '#ddaaff',
31 |                                         '#ddffaa', '#ffaadd', '#ffddaa');
32 | 
33 |     /**
34 |      * Index of current color for highlighting
35 |      *
36 |      * Index is increased at each highlight() call, so terms matching different queries are highlighted using different colors.
37 |      *
38 |      * @var integer
39 |      */
40 |     protected $_currentColorIndex = 0;
41 | 
42 |     /**
43 |      * HTML document for highlighting
44 |      *
45 |      * @var \ZendSearch\Lucene\Document\HTML
46 |      */
47 |     protected $_doc;
48 | 
49 |     /**
50 |      * Set document for highlighting.
51 |      *
52 |      * @param \ZendSearch\Lucene\Document\HTML $document
53 |      */
54 |     public function setDocument(Document\HTML $document)
55 |     {
56 |         $this->_doc = $document;
57 |     }
58 | 
59 |     /**
60 |      * Get document for highlighting.
61 |      *
62 |      * @return \ZendSearch\Lucene\Document\HTML $document
63 |      */
64 |     public function getDocument()
65 |     {
66 |         return $this->_doc;
67 |     }
68 | 
69 |     /**
70 |      * Highlight specified words
71 |      *
72 |      * @param string|array $words  Words to highlight. They could be organized using the array or string.
73 |      */
74 |     public function highlight($words)
75 |     {
76 |         $color = $this->_highlightColors[$this->_currentColorIndex];
77 |         $this->_currentColorIndex = ($this->_currentColorIndex + 1) % count($this->_highlightColors);
78 | 
79 |         $this->_doc->highlight($words, $color);
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Highlighter/HighlighterInterface.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Highlighter;
12 | 
13 | use ZendSearch\Lucene\Document;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Search
19 |  */
20 | interface HighlighterInterface
21 | {
22 |     /**
23 |      * Set document for highlighting.
24 |      *
25 |      * @param \ZendSearch\Lucene\Document\HTML $document
26 |      */
27 |     public function setDocument(Document\HTML $document);
28 | 
29 |     /**
30 |      * Get document for highlighting.
31 |      *
32 |      * @return \ZendSearch\Lucene\Document\HTML $document
33 |      */
34 |     public function getDocument();
35 | 
36 |     /**
37 |      * Highlight specified words (method is invoked once per subquery)
38 |      *
39 |      * @param string|array $words  Words to highlight. They could be organized using the array or string.
40 |      */
41 |     public function highlight($words);
42 | }
43 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/AbstractQuery.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Document;
 15 | use ZendSearch\Lucene\Search\Highlighter\DefaultHighlighter;
 16 | use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
 17 | 
 18 | /**
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  * @subpackage Search
 22 |  */
 23 | abstract class AbstractQuery
 24 | {
 25 |     /**
 26 |      * query boost factor
 27 |      *
 28 |      * @var float
 29 |      */
 30 |     private $_boost = 1;
 31 | 
 32 |     /**
 33 |      * AbstractQuery weight
 34 |      *
 35 |      * @var \ZendSearch\Lucene\Search\Weight\AbstractWeight
 36 |      */
 37 |     protected $_weight = null;
 38 | 
 39 |     /**
 40 |      * Gets the boost for this clause.  Documents matching
 41 |      * this clause will (in addition to the normal weightings) have their score
 42 |      * multiplied by boost.   The boost is 1.0 by default.
 43 |      *
 44 |      * @return float
 45 |      */
 46 |     public function getBoost()
 47 |     {
 48 |         return $this->_boost;
 49 |     }
 50 | 
 51 |     /**
 52 |      * Sets the boost for this query clause to $boost.
 53 |      *
 54 |      * @param float $boost
 55 |      */
 56 |     public function setBoost($boost)
 57 |     {
 58 |         $this->_boost = $boost;
 59 |     }
 60 | 
 61 |     /**
 62 |      * Score specified document
 63 |      *
 64 |      * @param integer $docId
 65 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 66 |      * @return float
 67 |      */
 68 |     abstract public function score($docId, Lucene\SearchIndexInterface $reader);
 69 | 
 70 |     /**
 71 |      * Get document ids likely matching the query
 72 |      *
 73 |      * It's an array with document ids as keys (performance considerations)
 74 |      *
 75 |      * @return array
 76 |      */
 77 |     abstract public function matchedDocs();
 78 | 
 79 |     /**
 80 |      * Execute query in context of index reader
 81 |      * It also initializes necessary internal structures
 82 |      *
 83 |      * AbstractQuery specific implementation
 84 |      *
 85 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 86 |      * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
 87 |      */
 88 |     abstract public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null);
 89 | 
 90 |     /**
 91 |      * Constructs an appropriate Weight implementation for this query.
 92 |      *
 93 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 94 |      * @return \ZendSearch\Lucene\Search\Weight\AbstractWeight
 95 |      */
 96 |     abstract public function createWeight(Lucene\SearchIndexInterface $reader);
 97 | 
 98 |     /**
 99 |      * Constructs an initializes a Weight for a _top-level_query_.
100 |      *
101 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
102 |      */
103 |     protected function _initWeight(Lucene\SearchIndexInterface $reader)
104 |     {
105 |         // Check, that it's a top-level query and query weight is not initialized yet.
106 |         if ($this->_weight !== null) {
107 |             return $this->_weight;
108 |         }
109 | 
110 |         $this->createWeight($reader);
111 |         $sum = $this->_weight->sumOfSquaredWeights();
112 |         $queryNorm = $reader->getSimilarity()->queryNorm($sum);
113 |         $this->_weight->normalize($queryNorm);
114 |     }
115 | 
116 |     /**
117 |      * Re-write query into primitive queries in the context of specified index
118 |      *
119 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
120 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
121 |      */
122 |     abstract public function rewrite(Lucene\SearchIndexInterface $index);
123 | 
124 |     /**
125 |      * Optimize query in the context of specified index
126 |      *
127 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
128 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
129 |      */
130 |     abstract public function optimize(Lucene\SearchIndexInterface $index);
131 | 
132 |     /**
133 |      * Reset query, so it can be reused within other queries or
134 |      * with other indeces
135 |      */
136 |     public function reset()
137 |     {
138 |         $this->_weight = null;
139 |     }
140 | 
141 | 
142 |     /**
143 |      * Print a query
144 |      *
145 |      * @return string
146 |      */
147 |     abstract public function __toString();
148 | 
149 |     /**
150 |      * Return query terms
151 |      *
152 |      * @return array
153 |      */
154 |     abstract public function getQueryTerms();
155 | 
156 |     /**
157 |      * AbstractQuery specific matches highlighting
158 |      *
159 |      * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
160 |      */
161 |     abstract protected function _highlightMatches(Highlighter $highlighter);
162 | 
163 |     /**
164 |      * Highlight matches in $inputHTML
165 |      *
166 |      * @param string $inputHTML
167 |      * @param string  $defaultEncoding   HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
168 |      * @param Highlighter|null $highlighter
169 |      * @return string
170 |      */
171 |     public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
172 |     {
173 |         if ($highlighter === null) {
174 |             $highlighter = new DefaultHighlighter();
175 |         }
176 | 
177 |         $doc = Document\HTML::loadHTML($inputHTML, false, $defaultEncoding);
178 |         $highlighter->setDocument($doc);
179 | 
180 |         $this->_highlightMatches($highlighter);
181 | 
182 |         return $doc->getHTML();
183 |     }
184 | 
185 |     /**
186 |      * Highlight matches in $inputHTMLFragment and return it (without HTML header and body tag)
187 |      *
188 |      * @param string $inputHTMLFragment
189 |      * @param string  $encoding   Input HTML string encoding
190 |      * @param Highlighter|null $highlighter
191 |      * @return string
192 |      */
193 |     public function htmlFragmentHighlightMatches($inputHTMLFragment, $encoding = 'UTF-8', $highlighter = null)
194 |     {
195 |         if ($highlighter === null) {
196 |             $highlighter = new DefaultHighlighter();
197 |         }
198 | 
199 |         $inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
200 |                    . iconv($encoding, 'UTF-8//IGNORE', $inputHTMLFragment) . '</body></html>';
201 | 
202 |         $doc = Document\HTML::loadHTML($inputHTML);
203 |         $highlighter->setDocument($doc);
204 | 
205 |         $this->_highlightMatches($highlighter);
206 | 
207 |         return $doc->getHTMLBody();
208 |     }
209 | }
210 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/EmptyResult.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
 15 | use ZendSearch\Lucene\Search\Weight;
 16 | 
 17 | /**
 18 |  * @category   Zend
 19 |  * @package    Zend_Search_Lucene
 20 |  * @subpackage Search
 21 |  */
 22 | class EmptyResult extends AbstractQuery
 23 | {
 24 |     /**
 25 |      * Re-write query into primitive queries in the context of specified index
 26 |      *
 27 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 28 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 29 |      */
 30 |     public function rewrite(Lucene\SearchIndexInterface $index)
 31 |     {
 32 |         return $this;
 33 |     }
 34 | 
 35 |     /**
 36 |      * Optimize query in the context of specified index
 37 |      *
 38 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 39 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 40 |      */
 41 |     public function optimize(Lucene\SearchIndexInterface $index)
 42 |     {
 43 |         // "EmptyResult" query is a primitive query and don't need to be optimized
 44 |         return $this;
 45 |     }
 46 | 
 47 |     /**
 48 |      * Constructs an appropriate Weight implementation for this query.
 49 |      *
 50 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 51 |      * @return \ZendSearch\Lucene\Search\Weight\EmptyResultWeight
 52 |      */
 53 |     public function createWeight(Lucene\SearchIndexInterface $reader)
 54 |     {
 55 |         return new Weight\EmptyResultWeight();
 56 |     }
 57 | 
 58 |     /**
 59 |      * Execute query in context of index reader
 60 |      * It also initializes necessary internal structures
 61 |      *
 62 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 63 |      * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
 64 |      */
 65 |     public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
 66 |     {
 67 |         // Do nothing
 68 |     }
 69 | 
 70 |     /**
 71 |      * Get document ids likely matching the query
 72 |      *
 73 |      * It's an array with document ids as keys (performance considerations)
 74 |      *
 75 |      * @return array
 76 |      */
 77 |     public function matchedDocs()
 78 |     {
 79 |         return array();
 80 |     }
 81 | 
 82 |     /**
 83 |      * Score specified document
 84 |      *
 85 |      * @param integer $docId
 86 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 87 |      * @return float
 88 |      */
 89 |     public function score($docId, Lucene\SearchIndexInterface $reader)
 90 |     {
 91 |         return 0;
 92 |     }
 93 | 
 94 |     /**
 95 |      * Return query terms
 96 |      *
 97 |      * @return array
 98 |      */
 99 |     public function getQueryTerms()
100 |     {
101 |         return array();
102 |     }
103 | 
104 |     /**
105 |      * Query specific matches highlighting
106 |      *
107 |      * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
108 |      */
109 |     protected function _highlightMatches(Highlighter $highlighter)
110 |     {
111 |         // Do nothing
112 |     }
113 | 
114 |     /**
115 |      * Print a query
116 |      *
117 |      * @return string
118 |      */
119 |     public function __toString()
120 |     {
121 |         return '<EmptyQuery>';
122 |     }
123 | }
124 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/Insignificant.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
 15 | use ZendSearch\Lucene\Search\Weight;
 16 | 
 17 | /**
 18 |  * The insignificant query returns empty result, but doesn't limit result set as a part of other queries
 19 |  *
 20 |  * @category   Zend
 21 |  * @package    Zend_Search_Lucene
 22 |  * @subpackage Search
 23 |  */
 24 | class Insignificant extends AbstractQuery
 25 | {
 26 |     /**
 27 |      * Re-write query into primitive queries in the context of specified index
 28 |      *
 29 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 30 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 31 |      */
 32 |     public function rewrite(Lucene\SearchIndexInterface $index)
 33 |     {
 34 |         return $this;
 35 |     }
 36 | 
 37 |     /**
 38 |      * Optimize query in the context of specified index
 39 |      *
 40 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 41 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 42 |      */
 43 |     public function optimize(Lucene\SearchIndexInterface $index)
 44 |     {
 45 |         return $this;
 46 |     }
 47 | 
 48 |     /**
 49 |      * Constructs an appropriate Weight implementation for this query.
 50 |      *
 51 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 52 |      * @return \ZendSearch\Lucene\Search\Weight\EmptyResultWeight
 53 |      */
 54 |     public function createWeight(Lucene\SearchIndexInterface $reader)
 55 |     {
 56 |         return new Weight\EmptyResultWeight();
 57 |     }
 58 | 
 59 |     /**
 60 |      * Execute query in context of index reader
 61 |      * It also initializes necessary internal structures
 62 |      *
 63 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 64 |      * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
 65 |      */
 66 |     public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
 67 |     {
 68 |         // Do nothing
 69 |     }
 70 | 
 71 |     /**
 72 |      * Get document ids likely matching the query
 73 |      *
 74 |      * It's an array with document ids as keys (performance considerations)
 75 |      *
 76 |      * @return array
 77 |      */
 78 |     public function matchedDocs()
 79 |     {
 80 |         return array();
 81 |     }
 82 | 
 83 |     /**
 84 |      * Score specified document
 85 |      *
 86 |      * @param integer $docId
 87 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 88 |      * @return float
 89 |      */
 90 |     public function score($docId, Lucene\SearchIndexInterface $reader)
 91 |     {
 92 |         return 0;
 93 |     }
 94 | 
 95 |     /**
 96 |      * Return query terms
 97 |      *
 98 |      * @return array
 99 |      */
100 |     public function getQueryTerms()
101 |     {
102 |         return array();
103 |     }
104 | 
105 |     /**
106 |      * Query specific matches highlighting
107 |      *
108 |      * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
109 |      */
110 |     protected function _highlightMatches(Highlighter $highlighter)
111 |     {
112 |         // Do nothing
113 |     }
114 | 
115 |     /**
116 |      * Print a query
117 |      *
118 |      * @return string
119 |      */
120 |     public function __toString()
121 |     {
122 |         return '<InsignificantQuery>';
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/Preprocessing/AbstractPreprocessing.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query\Preprocessing;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Exception\UnsupportedMethodCallException;
 15 | use ZendSearch\Lucene\Search\Query;
 16 | 
 17 | /**
 18 |  * It's an internal abstract class intended to finalize ase a query processing after query parsing.
 19 |  * This type of query is not actually involved into query execution.
 20 |  *
 21 |  * @category   Zend
 22 |  * @package    Zend_Search_Lucene
 23 |  * @subpackage Search
 24 |  * @internal
 25 |  */
 26 | abstract class AbstractPreprocessing extends Query\AbstractQuery
 27 | {
 28 |     /**
 29 |      * Matched terms.
 30 |      *
 31 |      * Matched terms list.
 32 |      * It's filled during rewrite operation and may be used for search result highlighting
 33 |      *
 34 |      * Array of Zend_Search_Lucene_Index_Term objects
 35 |      *
 36 |      * @var array
 37 |      */
 38 |     protected $_matches = null;
 39 | 
 40 |     /**
 41 |      * Optimize query in the context of specified index
 42 |      *
 43 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 44 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 45 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 46 |      */
 47 |     public function optimize(Lucene\SearchIndexInterface $index)
 48 |     {
 49 |         throw new UnsupportedMethodCallException('This query is not intended to be executed.');
 50 |     }
 51 | 
 52 |     /**
 53 |      * Constructs an appropriate Weight implementation for this query.
 54 |      *
 55 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 56 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 57 |      */
 58 |     public function createWeight(Lucene\SearchIndexInterface $reader)
 59 |     {
 60 |         throw new UnsupportedMethodCallException('This query is not intended to be executed.');
 61 |     }
 62 | 
 63 |     /**
 64 |      * Execute query in context of index reader
 65 |      * It also initializes necessary internal structures
 66 |      *
 67 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 68 |      * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
 69 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 70 |      */
 71 |     public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
 72 |     {
 73 |         throw new UnsupportedMethodCallException('This query is not intended to be executed.');
 74 |     }
 75 | 
 76 |     /**
 77 |      * Get document ids likely matching the query
 78 |      *
 79 |      * It's an array with document ids as keys (performance considerations)
 80 |      *
 81 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 82 |      * @return array
 83 |      */
 84 |     public function matchedDocs()
 85 |     {
 86 |         throw new UnsupportedMethodCallException('This query is not intended to be executed.');
 87 |     }
 88 | 
 89 |     /**
 90 |      * Score specified document
 91 |      *
 92 |      * @param integer $docId
 93 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
 94 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
 95 |      * @return float
 96 |      */
 97 |     public function score($docId, Lucene\SearchIndexInterface $reader)
 98 |     {
 99 |         throw new UnsupportedMethodCallException('This query is not intended to be executed.');
100 |     }
101 | 
102 |     /**
103 |      * Return query terms
104 |      *
105 |      * @throws \ZendSearch\Lucene\Exception\UnsupportedMethodCallException
106 |      * @return array
107 |      */
108 |     public function getQueryTerms()
109 |     {
110 |         throw new UnsupportedMethodCallException('Rewrite operation has to be done before retrieving query terms.');
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/Preprocessing/Phrase.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query\Preprocessing;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Analysis\Analyzer\Analyzer;
 15 | use ZendSearch\Lucene\Analysis\Analyzer\AnalyzerInterface;
 16 | use ZendSearch\Lucene\Index;
 17 | use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
 18 | use ZendSearch\Lucene\Search\Query;
 19 | 
 20 | /**
 21 |  * It's an internal abstract class intended to finalize ase a query processing after query parsing.
 22 |  * This type of query is not actually involved into query execution.
 23 |  *
 24 |  * @category   Zend
 25 |  * @package    Zend_Search_Lucene
 26 |  * @subpackage Search
 27 |  * @internal
 28 |  */
 29 | class Phrase extends AbstractPreprocessing
 30 | {
 31 |     /**
 32 |      * Phrase to find.
 33 |      *
 34 |      * @var string
 35 |      */
 36 |     private $_phrase;
 37 | 
 38 |     /**
 39 |      * Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
 40 |      *
 41 |      * @var string
 42 |      */
 43 |     private $_phraseEncoding;
 44 | 
 45 | 
 46 |     /**
 47 |      * Field name.
 48 |      *
 49 |      * @var string
 50 |      */
 51 |     private $_field;
 52 | 
 53 |     /**
 54 |      * Sets the number of other words permitted between words in query phrase.
 55 |      * If zero, then this is an exact phrase search.  For larger values this works
 56 |      * like a WITHIN or NEAR operator.
 57 |      *
 58 |      * The slop is in fact an edit-distance, where the units correspond to
 59 |      * moves of terms in the query phrase out of position.  For example, to switch
 60 |      * the order of two words requires two moves (the first move places the words
 61 |      * atop one another), so to permit re-orderings of phrases, the slop must be
 62 |      * at least two.
 63 |      * More exact matches are scored higher than sloppier matches, thus search
 64 |      * results are sorted by exactness.
 65 |      *
 66 |      * The slop is zero by default, requiring exact matches.
 67 |      *
 68 |      * @var integer
 69 |      */
 70 |     private $_slop;
 71 | 
 72 |     /**
 73 |      * Class constructor.  Create a new preprocessing object for prase query.
 74 |      *
 75 |      * @param string $phrase          Phrase to search.
 76 |      * @param string $phraseEncoding  Phrase encoding.
 77 |      * @param string $fieldName       Field name.
 78 |      */
 79 |     public function __construct($phrase, $phraseEncoding, $fieldName)
 80 |     {
 81 |         $this->_phrase         = $phrase;
 82 |         $this->_phraseEncoding = $phraseEncoding;
 83 |         $this->_field          = $fieldName;
 84 |     }
 85 | 
 86 |     /**
 87 |      * Set slop
 88 |      *
 89 |      * @param integer $slop
 90 |      */
 91 |     public function setSlop($slop)
 92 |     {
 93 |         $this->_slop = $slop;
 94 |     }
 95 | 
 96 | 
 97 |     /**
 98 |      * Get slop
 99 |      *
100 |      * @return integer
101 |      */
102 |     public function getSlop()
103 |     {
104 |         return $this->_slop;
105 |     }
106 | 
107 |     /**
108 |      * Re-write query into primitive queries in the context of specified index
109 |      *
110 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
111 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
112 |      */
113 |     public function rewrite(Lucene\SearchIndexInterface $index)
114 |     {
115 | // Allow to use wildcards within phrases
116 | // They are either removed by text analyzer or used as a part of keyword for keyword fields
117 | //
118 | //        if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
119 | //            require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
120 | //            throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
121 | //        }
122 | 
123 |         // Split query into subqueries if field name is not specified
124 |         if ($this->_field === null) {
125 |             $query = new Query\Boolean();
126 |             $query->setBoost($this->getBoost());
127 | 
128 |             if (Lucene\Lucene::getDefaultSearchField() === null) {
129 |                 $searchFields = $index->getFieldNames(true);
130 |             } else {
131 |                 $searchFields = array(Lucene\Lucene::getDefaultSearchField());
132 |             }
133 | 
134 |             foreach ($searchFields as $fieldName) {
135 |                 $subquery = new Phrase($this->_phrase,
136 |                                        $this->_phraseEncoding,
137 |                                        $fieldName);
138 |                 $subquery->setSlop($this->getSlop());
139 | 
140 |                 $query->addSubquery($subquery->rewrite($index));
141 |             }
142 | 
143 |             $this->_matches = $query->getQueryTerms();
144 |             return $query;
145 |         }
146 | 
147 |         // Recognize exact term matching (it corresponds to Keyword fields stored in the index)
148 |         // encoding is not used since we expect binary matching
149 |         $term = new Index\Term($this->_phrase, $this->_field);
150 |         if ($index->hasTerm($term)) {
151 |             $query = new Query\Term($term);
152 |             $query->setBoost($this->getBoost());
153 | 
154 |             $this->_matches = $query->getQueryTerms();
155 |             return $query;
156 |         }
157 | 
158 | 
159 |         // tokenize phrase using current analyzer and process it as a phrase query
160 |         $tokens = Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
161 | 
162 |         if (count($tokens) == 0) {
163 |             $this->_matches = array();
164 |             return new Query\Insignificant();
165 |         }
166 | 
167 |         if (count($tokens) == 1) {
168 |             $term  = new Index\Term($tokens[0]->getTermText(), $this->_field);
169 |             $query = new Query\Term($term);
170 |             $query->setBoost($this->getBoost());
171 | 
172 |             $this->_matches = $query->getQueryTerms();
173 |             return $query;
174 |         }
175 | 
176 |         //It's non-trivial phrase query
177 |         $position = -1;
178 |         $query = new Query\Phrase();
179 |         foreach ($tokens as $token) {
180 |             $position += $token->getPositionIncrement();
181 |             $term = new Index\Term($token->getTermText(), $this->_field);
182 |             $query->addTerm($term, $position);
183 |             $query->setSlop($this->getSlop());
184 |         }
185 |         $this->_matches = $query->getQueryTerms();
186 |         return $query;
187 |     }
188 | 
189 |     /**
190 |      * Query specific matches highlighting
191 |      *
192 |      * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
193 |      */
194 |     protected function _highlightMatches(Highlighter $highlighter)
195 |     {
196 |         /** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
197 | 
198 |         /** Skip exact term matching recognition, keyword fields highlighting is not supported */
199 | 
200 |         /** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
201 | 
202 | 
203 |         // tokenize phrase using current analyzer and process it as a phrase query
204 |         $tokens = Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
205 | 
206 |         if (count($tokens) == 0) {
207 |             // Do nothing
208 |             return;
209 |         }
210 | 
211 |         if (count($tokens) == 1) {
212 |             $highlighter->highlight($tokens[0]->getTermText());
213 |             return;
214 |         }
215 | 
216 |         //It's non-trivial phrase query
217 |         $words = array();
218 |         foreach ($tokens as $token) {
219 |             $words[] = $token->getTermText();
220 |         }
221 |         $highlighter->highlight($words);
222 |     }
223 | 
224 |     /**
225 |      * Print a query
226 |      *
227 |      * @return string
228 |      */
229 |     public function __toString()
230 |     {
231 |         // It's used only for query visualisation, so we don't care about characters escaping
232 |         if ($this->_field !== null) {
233 |             $query = $this->_field . ':';
234 |         } else {
235 |             $query = '';
236 |         }
237 | 
238 |         $query .= '"' . $this->_phrase . '"';
239 | 
240 |         if ($this->_slop != 0) {
241 |             $query .= '~' . $this->_slop;
242 |         }
243 | 
244 |         if ($this->getBoost() != 1) {
245 |             $query .= '^' . round($this->getBoost(), 4);
246 |         }
247 | 
248 |         return $query;
249 |     }
250 | }
251 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Query/Term.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Query;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Index;
 15 | use ZendSearch\Lucene\Search\Highlighter\HighlighterInterface as Highlighter;
 16 | use ZendSearch\Lucene\Search\Weight;
 17 | 
 18 | /**
 19 |  * @category   Zend
 20 |  * @package    Zend_Search_Lucene
 21 |  * @subpackage Search
 22 |  */
 23 | class Term extends AbstractQuery
 24 | {
 25 |     /**
 26 |      * Term to find.
 27 |      *
 28 |      * @var \ZendSearch\Lucene\Index\Term
 29 |      */
 30 |     private $_term;
 31 | 
 32 |     /**
 33 |      * Documents vector.
 34 |      *
 35 |      * @var array
 36 |      */
 37 |     private $_docVector = null;
 38 | 
 39 |     /**
 40 |      * Term freqs vector.
 41 |      * array(docId => freq, ...)
 42 |      *
 43 |      * @var array
 44 |      */
 45 |     private $_termFreqs;
 46 | 
 47 | 
 48 |     /**
 49 |      * Zend_Search_Lucene_Search_Query_Term constructor
 50 |      *
 51 |      * @param \ZendSearch\Lucene\Index\Term $term
 52 |      * @param boolean $sign
 53 |      */
 54 |     public function __construct(Index\Term $term)
 55 |     {
 56 |         $this->_term = $term;
 57 |     }
 58 | 
 59 |     /**
 60 |      * Re-write query into primitive queries in the context of specified index
 61 |      *
 62 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 63 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 64 |      */
 65 |     public function rewrite(Lucene\SearchIndexInterface $index)
 66 |     {
 67 |         if ($this->_term->field != null) {
 68 |             return $this;
 69 |         } else {
 70 |             $query = new MultiTerm();
 71 |             $query->setBoost($this->getBoost());
 72 | 
 73 |             foreach ($index->getFieldNames(true) as $fieldName) {
 74 |                 $term = new Index\Term($this->_term->text, $fieldName);
 75 | 
 76 |                 $query->addTerm($term);
 77 |             }
 78 | 
 79 |             return $query->rewrite($index);
 80 |         }
 81 |     }
 82 | 
 83 |     /**
 84 |      * Optimize query in the context of specified index
 85 |      *
 86 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 87 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 88 |      */
 89 |     public function optimize(Lucene\SearchIndexInterface $index)
 90 |     {
 91 |         // Check, that index contains specified term
 92 |         if (!$index->hasTerm($this->_term)) {
 93 |             return new EmptyResult();
 94 |         }
 95 | 
 96 |         return $this;
 97 |     }
 98 | 
 99 | 
100 |     /**
101 |      * Constructs an appropriate Weight implementation for this query.
102 |      *
103 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
104 |      * @return \ZendSearch\Lucene\Search\Weight\Term
105 |      */
106 |     public function createWeight(Lucene\SearchIndexInterface $reader)
107 |     {
108 |         $this->_weight = new Weight\Term($this->_term, $this, $reader);
109 |         return $this->_weight;
110 |     }
111 | 
112 |     /**
113 |      * Execute query in context of index reader
114 |      * It also initializes necessary internal structures
115 |      *
116 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
117 |      * @param \ZendSearch\Lucene\Index\DocsFilter|null $docsFilter
118 |      */
119 |     public function execute(Lucene\SearchIndexInterface $reader, $docsFilter = null)
120 |     {
121 |         $this->_docVector = array_flip($reader->termDocs($this->_term, $docsFilter));
122 |         $this->_termFreqs = $reader->termFreqs($this->_term, $docsFilter);
123 | 
124 |         // Initialize weight if it's not done yet
125 |         $this->_initWeight($reader);
126 |     }
127 | 
128 |     /**
129 |      * Get document ids likely matching the query
130 |      *
131 |      * It's an array with document ids as keys (performance considerations)
132 |      *
133 |      * @return array
134 |      */
135 |     public function matchedDocs()
136 |     {
137 |         return $this->_docVector;
138 |     }
139 | 
140 |     /**
141 |      * Score specified document
142 |      *
143 |      * @param integer $docId
144 |      * @param \ZendSearch\Lucene\SearchIndexInterface $reader
145 |      * @return float
146 |      */
147 |     public function score($docId, Lucene\SearchIndexInterface $reader)
148 |     {
149 |         if (isset($this->_docVector[$docId])) {
150 |             return $reader->getSimilarity()->tf($this->_termFreqs[$docId]) *
151 |                    $this->_weight->getValue() *
152 |                    $reader->norm($docId, $this->_term->field) *
153 |                    $this->getBoost();
154 |         } else {
155 |             return 0;
156 |         }
157 |     }
158 | 
159 |     /**
160 |      * Return query terms
161 |      *
162 |      * @return array
163 |      */
164 |     public function getQueryTerms()
165 |     {
166 |         return array($this->_term);
167 |     }
168 | 
169 |     /**
170 |      * Return query term
171 |      *
172 |      * @return \ZendSearch\Lucene\Index\Term
173 |      */
174 |     public function getTerm()
175 |     {
176 |         return $this->_term;
177 |     }
178 | 
179 |     /**
180 |      * Query specific matches highlighting
181 |      *
182 |      * @param Highlighter $highlighter  Highlighter object (also contains doc for highlighting)
183 |      */
184 |     protected function _highlightMatches(Highlighter $highlighter)
185 |     {
186 |         $highlighter->highlight($this->_term->text);
187 |     }
188 | 
189 |     /**
190 |      * Print a query
191 |      *
192 |      * @return string
193 |      */
194 |     public function __toString()
195 |     {
196 |         // It's used only for query visualisation, so we don't care about characters escaping
197 |         if ($this->_term->field !== null) {
198 |             $query = $this->_term->field . ':';
199 |         } else {
200 |             $query = '';
201 |         }
202 | 
203 |         $query .= $this->_term->text;
204 | 
205 |         if ($this->getBoost() != 1) {
206 |             $query = $query . '^' . round($this->getBoost(), 4);
207 |         }
208 | 
209 |         return $query;
210 |     }
211 | }
212 | 
213 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryEntry/AbstractQueryEntry.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\QueryEntry;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  * @subpackage Search
17 |  */
18 | abstract class AbstractQueryEntry
19 | {
20 |     /**
21 |      * Query entry boost factor
22 |      *
23 |      * @var float
24 |      */
25 |     protected $_boost = 1.0;
26 | 
27 | 
28 |     /**
29 |      * Process modifier ('~')
30 |      *
31 |      * @param mixed $parameter
32 |      */
33 |     abstract public function processFuzzyProximityModifier($parameter = null);
34 | 
35 | 
36 |     /**
37 |      * Transform entry to a subquery
38 |      *
39 |      * @param string $encoding
40 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
41 |      */
42 |     abstract public function getQuery($encoding);
43 | 
44 |     /**
45 |      * Boost query entry
46 |      *
47 |      * @param float $boostFactor
48 |      */
49 |     public function boost($boostFactor)
50 |     {
51 |         $this->_boost *= $boostFactor;
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryEntry/Phrase.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\QueryEntry;
 12 | 
 13 | /**
 14 |  * @category   Zend
 15 |  * @package    Zend_Search_Lucene
 16 |  * @subpackage Search
 17 |  */
 18 | class Phrase extends AbstractQueryEntry
 19 | {
 20 |     /**
 21 |      * Phrase value
 22 |      *
 23 |      * @var string
 24 |      */
 25 |     private $_phrase;
 26 | 
 27 |     /**
 28 |      * Field
 29 |      *
 30 |      * @var string|null
 31 |      */
 32 |     private $_field;
 33 | 
 34 | 
 35 |     /**
 36 |      * Proximity phrase query
 37 |      *
 38 |      * @var boolean
 39 |      */
 40 |     private $_proximityQuery = false;
 41 | 
 42 |     /**
 43 |      * Words distance, used for proximiti queries
 44 |      *
 45 |      * @var integer
 46 |      */
 47 |     private $_wordsDistance = 0;
 48 | 
 49 | 
 50 |     /**
 51 |      * Object constractor
 52 |      *
 53 |      * @param string $phrase
 54 |      * @param string $field
 55 |      */
 56 |     public function __construct($phrase, $field)
 57 |     {
 58 |         $this->_phrase = $phrase;
 59 |         $this->_field  = $field;
 60 |     }
 61 | 
 62 |     /**
 63 |      * Process modifier ('~')
 64 |      *
 65 |      * @param mixed $parameter
 66 |      */
 67 |     public function processFuzzyProximityModifier($parameter = null)
 68 |     {
 69 |         $this->_proximityQuery = true;
 70 | 
 71 |         if ($parameter !== null) {
 72 |             $this->_wordsDistance = $parameter;
 73 |         }
 74 |     }
 75 | 
 76 |     /**
 77 |      * Transform entry to a subquery
 78 |      *
 79 |      * @param string $encoding
 80 |      * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException
 81 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 82 |      */
 83 |     public function getQuery($encoding)
 84 |     {
 85 |         $query = new \ZendSearch\Lucene\Search\Query\Preprocessing\Phrase($this->_phrase,
 86 |                                                                           $encoding,
 87 |                                                                           ($this->_field !== null)?
 88 |                                                                               iconv($encoding, 'UTF-8', $this->_field) :
 89 |                                                                               null);
 90 | 
 91 |         if ($this->_proximityQuery) {
 92 |             $query->setSlop($this->_wordsDistance);
 93 |         }
 94 | 
 95 |         $query->setBoost($this->_boost);
 96 | 
 97 |         return $query;
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryEntry/Subquery.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\QueryEntry;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  * @subpackage Search
17 |  */
18 | class Subquery extends AbstractQueryEntry
19 | {
20 |     /**
21 |      * Query
22 |      *
23 |      * @var \ZendSearch\Lucene\Search\Query\AbstractQuery
24 |      */
25 |     private $_query;
26 | 
27 |     /**
28 |      * Object constractor
29 |      *
30 |      * @param \ZendSearch\Lucene\Search\Query\AbstractQuery $query
31 |      */
32 |     public function __construct(\ZendSearch\Lucene\Search\Query\AbstractQuery $query)
33 |     {
34 |         $this->_query = $query;
35 |     }
36 | 
37 |     /**
38 |      * Process modifier ('~')
39 |      *
40 |      * @param mixed $parameter
41 |      * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException
42 |      */
43 |     public function processFuzzyProximityModifier($parameter = null)
44 |     {
45 |         throw new \ZendSearch\Lucene\Search\Exception\QueryParserException(
46 |             '\'~\' sign must follow term or phrase'
47 |         );
48 |     }
49 | 
50 | 
51 |     /**
52 |      * Transform entry to a subquery
53 |      *
54 |      * @param string $encoding
55 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
56 |      */
57 |     public function getQuery($encoding)
58 |     {
59 |         $this->_query->setBoost($this->_boost);
60 | 
61 |         return $this->_query;
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryEntry/Term.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\QueryEntry;
 12 | 
 13 | /**
 14 |  * @category   Zend
 15 |  * @package    Zend_Search_Lucene
 16 |  * @subpackage Search
 17 |  */
 18 | class Term extends AbstractQueryEntry
 19 | {
 20 |     /**
 21 |      * Term value
 22 |      *
 23 |      * @var string
 24 |      */
 25 |     private $_term;
 26 | 
 27 |     /**
 28 |      * Field
 29 |      *
 30 |      * @var string|null
 31 |      */
 32 |     private $_field;
 33 | 
 34 | 
 35 |     /**
 36 |      * Fuzzy search query
 37 |      *
 38 |      * @var boolean
 39 |      */
 40 |     private $_fuzzyQuery = false;
 41 | 
 42 |     /**
 43 |      * Similarity
 44 |      *
 45 |      * @var float
 46 |      */
 47 |     private $_similarity = 1.;
 48 | 
 49 | 
 50 |     /**
 51 |      * Object constractor
 52 |      *
 53 |      * @param string $term
 54 |      * @param string $field
 55 |      */
 56 |     public function __construct($term, $field)
 57 |     {
 58 |         $this->_term  = $term;
 59 |         $this->_field = $field;
 60 |     }
 61 | 
 62 |     /**
 63 |      * Process modifier ('~')
 64 |      *
 65 |      * @param mixed $parameter
 66 |      */
 67 |     public function processFuzzyProximityModifier($parameter = null)
 68 |     {
 69 |         $this->_fuzzyQuery = true;
 70 | 
 71 |         if ($parameter !== null) {
 72 |             $this->_similarity = $parameter;
 73 |         } else {
 74 |             $this->_similarity = \ZendSearch\Lucene\Search\Query\Fuzzy::DEFAULT_MIN_SIMILARITY;
 75 |         }
 76 |     }
 77 | 
 78 |     /**
 79 |      * Transform entry to a subquery
 80 |      *
 81 |      * @param string $encoding
 82 |      * @return \ZendSearch\Lucene\Search\Query\AbstractQuery
 83 |      * @throws \ZendSearch\Lucene\Search\Exception\QueryParserException
 84 |      */
 85 |     public function getQuery($encoding)
 86 |     {
 87 |         if ($this->_fuzzyQuery) {
 88 |             $query = new \ZendSearch\Lucene\Search\Query\Preprocessing\Fuzzy($this->_term,
 89 |                                                                              $encoding,
 90 |                                                                              ($this->_field !== null)?
 91 |                                                                                   iconv($encoding, 'UTF-8', $this->_field) :
 92 |                                                                                   null,
 93 |                                                                              $this->_similarity
 94 |                                                                              );
 95 |             $query->setBoost($this->_boost);
 96 |             return $query;
 97 |         }
 98 | 
 99 | 
100 |         $query = new \ZendSearch\Lucene\Search\Query\Preprocessing\Term($this->_term,
101 |                                                                         $encoding,
102 |                                                                         ($this->_field !== null)?
103 |                                                                               iconv($encoding, 'UTF-8', $this->_field) :
104 |                                                                               null
105 |                                                                         );
106 |         $query->setBoost($this->_boost);
107 |         return $query;
108 |     }
109 | }
110 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryHit.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Document;
 15 | 
 16 | /**
 17 |  * @category   Zend
 18 |  * @package    Zend_Search_Lucene
 19 |  * @subpackage Search
 20 |  */
 21 | class QueryHit
 22 | {
 23 |     /**
 24 |      * Object handle of the index
 25 |      * @var \ZendSearch\Lucene\SearchIndexInterface
 26 |      */
 27 |     protected $_index = null;
 28 | 
 29 |     /**
 30 |      * Object handle of the document associated with this hit
 31 |      * @var \ZendSearch\Lucene\Document
 32 |      */
 33 |     protected $_document = null;
 34 | 
 35 |     /**
 36 |      * Unique hit id
 37 |      * @var integer
 38 |      */
 39 |     public $id;
 40 | 
 41 |     /**
 42 |      * Number of the document in the index
 43 |      * @var integer
 44 |      */
 45 |     public $document_id;
 46 | 
 47 |     /**
 48 |      * Score of the hit
 49 |      * @var float
 50 |      */
 51 |     public $score;
 52 | 
 53 | 
 54 |     /**
 55 |      * Constructor - pass object handle of Zend_Search_Lucene_Interface index that produced
 56 |      * the hit so the document can be retrieved easily from the hit.
 57 |      *
 58 |      * @param \ZendSearch\Lucene\SearchIndexInterface $index
 59 |      */
 60 | 
 61 |     public function __construct(Lucene\SearchIndexInterface $index)
 62 |     {
 63 |         $this->_index = $index;
 64 |     }
 65 | 
 66 |     /**
 67 |      * Magic method for checking the existence of a field
 68 |      *
 69 |      * @param string $offset
 70 |      * @return boolean TRUE if the field exists else FALSE
 71 |      */
 72 |     public function __isset($offset)
 73 |     {
 74 |         return isset($this->getDocument()->$offset);
 75 |     }
 76 | 
 77 | 
 78 |     /**
 79 |      * Convenience function for getting fields from the document
 80 |      * associated with this hit.
 81 |      *
 82 |      * @param string $offset
 83 |      * @return string
 84 |      */
 85 |     public function __get($offset)
 86 |     {
 87 |         return $this->getDocument()->getFieldValue($offset);
 88 |     }
 89 | 
 90 | 
 91 |     /**
 92 |      * Return the document object for this hit
 93 |      *
 94 |      * @return \ZendSearch\Lucene\Document
 95 |      */
 96 |     public function getDocument()
 97 |     {
 98 |         if (!$this->_document instanceof Document) {
 99 |             $this->_document = $this->_index->getDocument($this->document_id);
100 |         }
101 | 
102 |         return $this->_document;
103 |     }
104 | 
105 | 
106 |     /**
107 |      * Return the index object for this hit
108 |      *
109 |      * @return \ZendSearch\Lucene\SearchIndexInterface
110 |      */
111 |     public function getIndex()
112 |     {
113 |         return $this->_index;
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/QueryToken.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | 
 15 | /**
 16 |  * @category   Zend
 17 |  * @package    Zend_Search_Lucene
 18 |  * @subpackage Search
 19 |  */
 20 | class QueryToken
 21 | {
 22 |     /**
 23 |      * Token types.
 24 |      */
 25 |     const TT_WORD                 = 0;  // Word
 26 |     const TT_PHRASE               = 1;  // Phrase (one or several quoted words)
 27 |     const TT_FIELD                = 2;  // Field name in 'field:word', field:<phrase> or field:(<subquery>) pairs
 28 |     const TT_FIELD_INDICATOR      = 3;  // ':'
 29 |     const TT_REQUIRED             = 4;  // '+'
 30 |     const TT_PROHIBITED           = 5;  // '-'
 31 |     const TT_FUZZY_PROX_MARK      = 6;  // '~'
 32 |     const TT_BOOSTING_MARK        = 7;  // '^'
 33 |     const TT_RANGE_INCL_START     = 8;  // '['
 34 |     const TT_RANGE_INCL_END       = 9;  // ']'
 35 |     const TT_RANGE_EXCL_START     = 10; // '{'
 36 |     const TT_RANGE_EXCL_END       = 11; // '}'
 37 |     const TT_SUBQUERY_START       = 12; // '('
 38 |     const TT_SUBQUERY_END         = 13; // ')'
 39 |     const TT_AND_LEXEME           = 14; // 'AND' or 'and'
 40 |     const TT_OR_LEXEME            = 15; // 'OR'  or 'or'
 41 |     const TT_NOT_LEXEME           = 16; // 'NOT' or 'not'
 42 |     const TT_TO_LEXEME            = 17; // 'TO'  or 'to'
 43 |     const TT_NUMBER               = 18; // Number, like: 10, 0.8, .64, ....
 44 | 
 45 | 
 46 |     /**
 47 |      * Returns all possible lexeme types.
 48 |      * It's used for syntax analyzer state machine initialization
 49 |      *
 50 |      * @return array
 51 |      */
 52 |     public static function getTypes()
 53 |     {
 54 |         return array(   self::TT_WORD,
 55 |                         self::TT_PHRASE,
 56 |                         self::TT_FIELD,
 57 |                         self::TT_FIELD_INDICATOR,
 58 |                         self::TT_REQUIRED,
 59 |                         self::TT_PROHIBITED,
 60 |                         self::TT_FUZZY_PROX_MARK,
 61 |                         self::TT_BOOSTING_MARK,
 62 |                         self::TT_RANGE_INCL_START,
 63 |                         self::TT_RANGE_INCL_END,
 64 |                         self::TT_RANGE_EXCL_START,
 65 |                         self::TT_RANGE_EXCL_END,
 66 |                         self::TT_SUBQUERY_START,
 67 |                         self::TT_SUBQUERY_END,
 68 |                         self::TT_AND_LEXEME,
 69 |                         self::TT_OR_LEXEME,
 70 |                         self::TT_NOT_LEXEME,
 71 |                         self::TT_TO_LEXEME,
 72 |                         self::TT_NUMBER
 73 |                      );
 74 |     }
 75 | 
 76 | 
 77 |     /**
 78 |      * TokenCategories
 79 |      */
 80 |     const TC_WORD           = 0;   // Word
 81 |     const TC_PHRASE         = 1;   // Phrase (one or several quoted words)
 82 |     const TC_NUMBER         = 2;   // Nubers, which are used with syntax elements. Ex. roam~0.8
 83 |     const TC_SYNTAX_ELEMENT = 3;   // +  -  ( )  [ ]  { }  !  ||  && ~ ^
 84 | 
 85 | 
 86 |     /**
 87 |      * Token type.
 88 |      *
 89 |      * @var integer
 90 |      */
 91 |     public $type;
 92 | 
 93 |     /**
 94 |      * Token text.
 95 |      *
 96 |      * @var integer
 97 |      */
 98 |     public $text;
 99 | 
100 |     /**
101 |      * Token position within query.
102 |      *
103 |      * @var integer
104 |      */
105 |     public $position;
106 | 
107 | 
108 |     /**
109 |      * IndexReader constructor needs token type and token text as a parameters.
110 |      *
111 |      * @param integer $tokenCategory
112 |      * @param string  $tokText
113 |      * @param integer $position
114 |      * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
115 |      */
116 |     public function __construct($tokenCategory, $tokenText, $position)
117 |     {
118 |         $this->text     = $tokenText;
119 |         $this->position = $position + 1; // Start from 1
120 | 
121 |         switch ($tokenCategory) {
122 |             case self::TC_WORD:
123 |                 if (  strtolower($tokenText) == 'and') {
124 |                     $this->type = self::TT_AND_LEXEME;
125 |                 } elseif (strtolower($tokenText) == 'or') {
126 |                     $this->type = self::TT_OR_LEXEME;
127 |                 } elseif (strtolower($tokenText) == 'not') {
128 |                     $this->type = self::TT_NOT_LEXEME;
129 |                 } elseif (strtolower($tokenText) == 'to') {
130 |                     $this->type = self::TT_TO_LEXEME;
131 |                 } else {
132 |                     $this->type = self::TT_WORD;
133 |                 }
134 |                 break;
135 | 
136 |             case self::TC_PHRASE:
137 |                 $this->type = self::TT_PHRASE;
138 |                 break;
139 | 
140 |             case self::TC_NUMBER:
141 |                 $this->type = self::TT_NUMBER;
142 |                 break;
143 | 
144 |             case self::TC_SYNTAX_ELEMENT:
145 |                 switch ($tokenText) {
146 |                     case ':':
147 |                         $this->type = self::TT_FIELD_INDICATOR;
148 |                         break;
149 | 
150 |                     case '+':
151 |                         $this->type = self::TT_REQUIRED;
152 |                         break;
153 | 
154 |                     case '-':
155 |                         $this->type = self::TT_PROHIBITED;
156 |                         break;
157 | 
158 |                     case '~':
159 |                         $this->type = self::TT_FUZZY_PROX_MARK;
160 |                         break;
161 | 
162 |                     case '^':
163 |                         $this->type = self::TT_BOOSTING_MARK;
164 |                         break;
165 | 
166 |                     case '[':
167 |                         $this->type = self::TT_RANGE_INCL_START;
168 |                         break;
169 | 
170 |                     case ']':
171 |                         $this->type = self::TT_RANGE_INCL_END;
172 |                         break;
173 | 
174 |                     case '{':
175 |                         $this->type = self::TT_RANGE_EXCL_START;
176 |                         break;
177 | 
178 |                     case '}':
179 |                         $this->type = self::TT_RANGE_EXCL_END;
180 |                         break;
181 | 
182 |                     case '(':
183 |                         $this->type = self::TT_SUBQUERY_START;
184 |                         break;
185 | 
186 |                     case ')':
187 |                         $this->type = self::TT_SUBQUERY_END;
188 |                         break;
189 | 
190 |                     case '!':
191 |                         $this->type = self::TT_NOT_LEXEME;
192 |                         break;
193 | 
194 |                     case '&&':
195 |                         $this->type = self::TT_AND_LEXEME;
196 |                         break;
197 | 
198 |                     case '||':
199 |                         $this->type = self::TT_OR_LEXEME;
200 |                         break;
201 | 
202 |                     default:
203 |                         throw new Lucene\Exception\InvalidArgumentException(
204 |                             'Unrecognized query syntax lexeme: \'' . $tokenText . '\''
205 |                         );
206 |                 }
207 |                 break;
208 | 
209 |             case self::TC_NUMBER:
210 |                 $this->type = self::TT_NUMBER;
211 | 
212 |             default:
213 |                 throw new Lucene\Exception\InvalidArgumentException(
214 |                     'Unrecognized lexeme type: \'' . $tokenCategory . '\''
215 |                 );
216 |         }
217 |     }
218 | }
219 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Similarity/DefaultSimilarity.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Similarity;
12 | 
13 | use ZendSearch\Lucene\Search\Similarity\AbstractSimilarity;
14 | 
15 | /**
16 |  * @category   Zend
17 |  * @package    Zend_Search_Lucene
18 |  * @subpackage Search
19 |  */
20 | class DefaultSimilarity extends AbstractSimilarity
21 | {
22 | 
23 |     /**
24 |      * Implemented as '1/sqrt(numTerms)'.
25 |      *
26 |      * @param string $fieldName
27 |      * @param integer $numTerms
28 |      * @return float
29 |      */
30 |     public function lengthNorm($fieldName, $numTerms)
31 |     {
32 |         if ($numTerms == 0) {
33 |             return 1E10;
34 |         }
35 | 
36 |         return 1.0/sqrt($numTerms);
37 |     }
38 | 
39 |     /**
40 |      * Implemented as '1/sqrt(sumOfSquaredWeights)'.
41 |      *
42 |      * @param float $sumOfSquaredWeights
43 |      * @return float
44 |      */
45 |     public function queryNorm($sumOfSquaredWeights)
46 |     {
47 |         return 1.0/sqrt($sumOfSquaredWeights);
48 |     }
49 | 
50 |     /**
51 |      * Implemented as 'sqrt(freq)'.
52 |      *
53 |      * @param float $freq
54 |      * @return float
55 |      */
56 |     public function tf($freq)
57 |     {
58 |         return sqrt($freq);
59 |     }
60 | 
61 |     /**
62 |      * Implemented as '1/(distance + 1)'.
63 |      *
64 |      * @param integer $distance
65 |      * @return float
66 |      */
67 |     public function sloppyFreq($distance)
68 |     {
69 |         return 1.0/($distance + 1);
70 |     }
71 | 
72 |     /**
73 |      * Implemented as 'log(numDocs/(docFreq+1)) + 1'.
74 |      *
75 |      * @param integer $docFreq
76 |      * @param integer $numDocs
77 |      * @return float
78 |      */
79 |     public function idfFreq($docFreq, $numDocs)
80 |     {
81 |         return log($numDocs/(float)($docFreq+1)) + 1.0;
82 |     }
83 | 
84 |     /**
85 |      * Implemented as 'overlap/maxOverlap'.
86 |      *
87 |      * @param integer $overlap
88 |      * @param integer $maxOverlap
89 |      * @return float
90 |      */
91 |     public function coord($overlap, $maxOverlap)
92 |     {
93 |         return $overlap/(float)$maxOverlap;
94 |     }
95 | }
96 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/AbstractWeight.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Weight;
12 | 
13 | /**
14 |  * Calculate query weights and build query scorers.
15 |  *
16 |  * A AbstractWeight is constructed by a query Query->createWeight().
17 |  * The sumOfSquaredWeights() method is then called on the top-level
18 |  * query to compute the query normalization factor Similarity->queryNorm(float).
19 |  * This factor is then passed to normalize(float).  At this point the weighting
20 |  * is complete.
21 |  *
22 |  * @category   Zend
23 |  * @package    Zend_Search_Lucene
24 |  * @subpackage Search
25 |  */
26 | abstract class AbstractWeight
27 | {
28 |     /**
29 |      * Normalization factor.
30 |      * This value is stored only for query expanation purpose and not used in any other place
31 |      *
32 |      * @var float
33 |      */
34 |     protected $_queryNorm;
35 | 
36 |     /**
37 |      * AbstractWeight value
38 |      *
39 |      * AbstractWeight value may be initialized in sumOfSquaredWeights() or normalize()
40 |      * because they both are invoked either in Query::_initWeight (for top-level query) or
41 |      * in corresponding methods of parent query's weights
42 |      *
43 |      * @var float
44 |      */
45 |     protected $_value;
46 | 
47 | 
48 |     /**
49 |      * The weight for this query.
50 |      *
51 |      * @return float
52 |      */
53 |     public function getValue()
54 |     {
55 |         return $this->_value;
56 |     }
57 | 
58 |     /**
59 |      * The sum of squared weights of contained query clauses.
60 |      *
61 |      * @return float
62 |      */
63 |     abstract public function sumOfSquaredWeights();
64 | 
65 |     /**
66 |      * Assigns the query normalization factor to this.
67 |      *
68 |      * @param $norm
69 |      */
70 |     abstract public function normalize($norm);
71 | }
72 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/Boolean.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Weight;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Search\Query;
 15 | 
 16 | /**
 17 |  * @category   Zend
 18 |  * @package    Zend_Search_Lucene
 19 |  * @subpackage Search
 20 |  */
 21 | class Boolean extends AbstractWeight
 22 | {
 23 |     /**
 24 |      * IndexReader.
 25 |      *
 26 |      * @var \ZendSearch\Lucene\SearchIndexInterface
 27 |      */
 28 |     private $_reader;
 29 | 
 30 |     /**
 31 |      * The query that this concerns.
 32 |      *
 33 |      * @var \ZendSearch\Lucene\Search\Query\AbstractQuery
 34 |      */
 35 |     private $_query;
 36 | 
 37 |     /**
 38 |      * Queries weights
 39 |      * Array of Zend_Search_Lucene_Search_Weight
 40 |      *
 41 |      * @var array
 42 |      */
 43 |     private $_weights;
 44 | 
 45 | 
 46 |     /**
 47 |      * Zend_Search_Lucene_Search_Weight_Boolean constructor
 48 |      * query - the query that this concerns.
 49 |      * reader - index reader
 50 |      *
 51 |      * @param \ZendSearch\Lucene\Search\Query\AbstractQuery $query
 52 |      * @param \ZendSearch\Lucene\SearchIndexInterface    $reader
 53 |      */
 54 |     public function __construct(Query\AbstractQuery $query, Lucene\SearchIndexInterface    $reader)
 55 |     {
 56 |         $this->_query   = $query;
 57 |         $this->_reader  = $reader;
 58 |         $this->_weights = array();
 59 | 
 60 |         $signs = $query->getSigns();
 61 | 
 62 |         foreach ($query->getSubqueries() as $num => $subquery) {
 63 |             if ($signs === null || $signs[$num] === null || $signs[$num]) {
 64 |                 $this->_weights[$num] = $subquery->createWeight($reader);
 65 |             }
 66 |         }
 67 |     }
 68 | 
 69 | 
 70 |     /**
 71 |      * The weight for this query
 72 |      * Standard Weight::$_value is not used for boolean queries
 73 |      *
 74 |      * @return float
 75 |      */
 76 |     public function getValue()
 77 |     {
 78 |         return $this->_query->getBoost();
 79 |     }
 80 | 
 81 | 
 82 |     /**
 83 |      * The sum of squared weights of contained query clauses.
 84 |      *
 85 |      * @return float
 86 |      */
 87 |     public function sumOfSquaredWeights()
 88 |     {
 89 |         $sum = 0;
 90 |         foreach ($this->_weights as $weight) {
 91 |             // sum sub weights
 92 |             $sum += $weight->sumOfSquaredWeights();
 93 |         }
 94 | 
 95 |         // boost each sub-weight
 96 |         $sum *= $this->_query->getBoost() * $this->_query->getBoost();
 97 | 
 98 |         // check for empty query (like '-something -another')
 99 |         if ($sum == 0) {
100 |             $sum = 1.0;
101 |         }
102 |         return $sum;
103 |     }
104 | 
105 | 
106 |     /**
107 |      * Assigns the query normalization factor to this.
108 |      *
109 |      * @param float $queryNorm
110 |      */
111 |     public function normalize($queryNorm)
112 |     {
113 |         // incorporate boost
114 |         $queryNorm *= $this->_query->getBoost();
115 | 
116 |         foreach ($this->_weights as $weight) {
117 |             $weight->normalize($queryNorm);
118 |         }
119 |     }
120 | }
121 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/EmptyResultWeight.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Weight;
12 | 
13 | /**
14 |  * @category   Zend
15 |  * @package    Zend_Search_Lucene
16 |  * @subpackage Search
17 |  */
18 | class EmptyResultWeight extends AbstractWeight
19 | {
20 |     /**
21 |      * The sum of squared weights of contained query clauses.
22 |      *
23 |      * @return float
24 |      */
25 |     public function sumOfSquaredWeights()
26 |     {
27 |         return 1;
28 |     }
29 | 
30 | 
31 |     /**
32 |      * Assigns the query normalization factor to this.
33 |      *
34 |      * @param float $queryNorm
35 |      */
36 |     public function normalize($queryNorm)
37 |     {
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/MultiTerm.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Weight;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Search\Query;
 15 | 
 16 | /**
 17 |  * @category   Zend
 18 |  * @package    Zend_Search_Lucene
 19 |  * @subpackage Search
 20 |  */
 21 | class MultiTerm extends AbstractWeight
 22 | {
 23 |     /**
 24 |      * IndexReader.
 25 |      *
 26 |      * @var \ZendSearch\Lucene\SearchIndexInterface
 27 |      */
 28 |     private $_reader;
 29 | 
 30 |     /**
 31 |      * The query that this concerns.
 32 |      *
 33 |      * @var \ZendSearch\Lucene\Search\Query\AbstractQuery
 34 |      */
 35 |     private $_query;
 36 | 
 37 |     /**
 38 |      * Query terms weights
 39 |      * Array of Zend_Search_Lucene_Search_Weight_Term
 40 |      *
 41 |      * @var array
 42 |      */
 43 |     private $_weights;
 44 | 
 45 | 
 46 |     /**
 47 |      * Zend_Search_Lucene_Search_Weight_MultiTerm constructor
 48 |      * query - the query that this concerns.
 49 |      * reader - index reader
 50 |      *
 51 |      * @param \ZendSearch\Lucene\Search\Query\AbstractQuery $query
 52 |      * @param \ZendSearch\Lucene\SearchIndexInterface             $reader
 53 |      */
 54 |     public function __construct(Query\AbstractQuery $query, Lucene\SearchIndexInterface $reader)
 55 |     {
 56 |         $this->_query   = $query;
 57 |         $this->_reader  = $reader;
 58 |         $this->_weights = array();
 59 | 
 60 |         $signs = $query->getSigns();
 61 | 
 62 |         foreach ($query->getTerms() as $id => $term) {
 63 |             if ($signs === null || $signs[$id] === null || $signs[$id]) {
 64 |                 $this->_weights[$id] = new Term($term, $query, $reader);
 65 |                 $query->setWeight($id, $this->_weights[$id]);
 66 |             }
 67 |         }
 68 |     }
 69 | 
 70 | 
 71 |     /**
 72 |      * The weight for this query
 73 |      * Standard Weight::$_value is not used for boolean queries
 74 |      *
 75 |      * @return float
 76 |      */
 77 |     public function getValue()
 78 |     {
 79 |         return $this->_query->getBoost();
 80 |     }
 81 | 
 82 | 
 83 |     /**
 84 |      * The sum of squared weights of contained query clauses.
 85 |      *
 86 |      * @return float
 87 |      */
 88 |     public function sumOfSquaredWeights()
 89 |     {
 90 |         $sum = 0;
 91 |         foreach ($this->_weights as $weight) {
 92 |             // sum sub weights
 93 |             $sum += $weight->sumOfSquaredWeights();
 94 |         }
 95 | 
 96 |         // boost each sub-weight
 97 |         $sum *= $this->_query->getBoost() * $this->_query->getBoost();
 98 | 
 99 |         // check for empty query (like '-something -another')
100 |         if ($sum == 0) {
101 |             $sum = 1.0;
102 |         }
103 |         return $sum;
104 |     }
105 | 
106 | 
107 |     /**
108 |      * Assigns the query normalization factor to this.
109 |      *
110 |      * @param float $queryNorm
111 |      */
112 |     public function normalize($queryNorm)
113 |     {
114 |         // incorporate boost
115 |         $queryNorm *= $this->_query->getBoost();
116 | 
117 |         foreach ($this->_weights as $weight) {
118 |             $weight->normalize($queryNorm);
119 |         }
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/Phrase.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | /**
 3 |  * Zend Framework (http://framework.zend.com/)
 4 |  *
 5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
 6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
 7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
 8 |  * @package   Zend_Search
 9 |  */
10 | 
11 | namespace ZendSearch\Lucene\Search\Weight;
12 | 
13 | use ZendSearch\Lucene;
14 | use ZendSearch\Lucene\Search\Query;
15 | 
16 | /**
17 |  * @category   Zend
18 |  * @package    Zend_Search_Lucene
19 |  * @subpackage Search
20 |  */
21 | class Phrase extends AbstractWeight
22 | {
23 |     /**
24 |      * IndexReader.
25 |      *
26 |      * @var \ZendSearch\Lucene\SearchIndexInterface
27 |      */
28 |     private $_reader;
29 | 
30 |     /**
31 |      * The query that this concerns.
32 |      *
33 |      * @var \ZendSearch\Lucene\Search\Query\Phrase
34 |      */
35 |     private $_query;
36 | 
37 |     /**
38 |      * Score factor
39 |      *
40 |      * @var float
41 |      */
42 |     private $_idf;
43 | 
44 |     /**
45 |      * Zend_Search_Lucene_Search_Weight_Phrase constructor
46 |      *
47 |      * @param \ZendSearch\Lucene\Search\Query\Phrase $query
48 |      * @param \ZendSearch\Lucene\SearchIndexInterface      $reader
49 |      */
50 |     public function __construct(Query\Phrase $query, Lucene\SearchIndexInterface $reader)
51 |     {
52 |         $this->_query  = $query;
53 |         $this->_reader = $reader;
54 |     }
55 | 
56 |     /**
57 |      * The sum of squared weights of contained query clauses.
58 |      *
59 |      * @return float
60 |      */
61 |     public function sumOfSquaredWeights()
62 |     {
63 |         // compute idf
64 |         $this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader);
65 | 
66 |         // compute query weight
67 |         $this->_queryWeight = $this->_idf * $this->_query->getBoost();
68 | 
69 |         // square it
70 |         return $this->_queryWeight * $this->_queryWeight;
71 |     }
72 | 
73 | 
74 |     /**
75 |      * Assigns the query normalization factor to this.
76 |      *
77 |      * @param float $queryNorm
78 |      */
79 |     public function normalize($queryNorm)
80 |     {
81 |         $this->_queryNorm = $queryNorm;
82 | 
83 |         // normalize query weight
84 |         $this->_queryWeight *= $queryNorm;
85 | 
86 |         // idf for documents
87 |         $this->_value = $this->_queryWeight * $this->_idf;
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Search/Weight/Term.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Search\Weight;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use ZendSearch\Lucene\Index;
 15 | use ZendSearch\Lucene\Search\Query;
 16 | 
 17 | /**
 18 |  * @category   Zend
 19 |  * @package    Zend_Search_Lucene
 20 |  * @subpackage Search
 21 |  */
 22 | class Term extends AbstractWeight
 23 | {
 24 |     /**
 25 |      * IndexReader.
 26 |      *
 27 |      * @var \ZendSearch\Lucene\SearchIndexInterface
 28 |      */
 29 |     private $_reader;
 30 | 
 31 |     /**
 32 |      * Term
 33 |      *
 34 |      * @var \ZendSearch\Lucene\Index\Term
 35 |      */
 36 |     private $_term;
 37 | 
 38 |     /**
 39 |      * The query that this concerns.
 40 |      *
 41 |      * @var \ZendSearch\Lucene\Search\Query\AbstractQuery
 42 |      */
 43 |     private $_query;
 44 | 
 45 |     /**
 46 |      * Score factor
 47 |      *
 48 |      * @var float
 49 |      */
 50 |     private $_idf;
 51 | 
 52 |     /**
 53 |      * Query weight
 54 |      *
 55 |      * @var float
 56 |      */
 57 |     private $_queryWeight;
 58 | 
 59 | 
 60 |     /**
 61 |      * Zend_Search_Lucene_Search_Weight_Term constructor
 62 |      * reader - index reader
 63 |      *
 64 |      * @param \ZendSearch\Lucene\Index\Term                 $term
 65 |      * @param \ZendSearch\Lucene\Search\Query\AbstractQuery $query
 66 |      * @param \ZendSearch\Lucene\SearchIndexInterface             $reader
 67 |      */
 68 |     public function __construct(Index\Term            $term,
 69 |                                 Query\AbstractQuery   $query,
 70 |                                 Lucene\SearchIndexInterface $reader)
 71 |     {
 72 |         $this->_term   = $term;
 73 |         $this->_query  = $query;
 74 |         $this->_reader = $reader;
 75 |     }
 76 | 
 77 | 
 78 |     /**
 79 |      * The sum of squared weights of contained query clauses.
 80 |      *
 81 |      * @return float
 82 |      */
 83 |     public function sumOfSquaredWeights()
 84 |     {
 85 |         // compute idf
 86 |         $this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
 87 | 
 88 |         // compute query weight
 89 |         $this->_queryWeight = $this->_idf * $this->_query->getBoost();
 90 | 
 91 |         // square it
 92 |         return $this->_queryWeight * $this->_queryWeight;
 93 |     }
 94 | 
 95 | 
 96 |     /**
 97 |      * Assigns the query normalization factor to this.
 98 |      *
 99 |      * @param float $queryNorm
100 |      */
101 |     public function normalize($queryNorm)
102 |     {
103 |         $this->_queryNorm = $queryNorm;
104 | 
105 |         // normalize query weight
106 |         $this->_queryWeight *= $queryNorm;
107 | 
108 |         // idf for documents
109 |         $this->_value = $this->_queryWeight * $this->_idf;
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Storage/Directory/DirectoryInterface.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Storage\Directory;
 12 | 
 13 | /**
 14 |  * @category   Zend
 15 |  * @package    Zend_Search_Lucene
 16 |  * @subpackage Storage
 17 |  */
 18 | interface DirectoryInterface
 19 | {
 20 | 
 21 |     /**
 22 |      * Closes the store.
 23 |      *
 24 |      * @return void
 25 |      */
 26 |     public function close();
 27 | 
 28 |     /**
 29 |      * Returns an array of strings, one for each file in the directory.
 30 |      *
 31 |      * @return array
 32 |      */
 33 |     public function fileList();
 34 | 
 35 |     /**
 36 |      * Creates a new, empty file in the directory with the given $filename.
 37 |      *
 38 |      * @param string $filename
 39 |      * @return \ZendSearch\Lucene\Storage\File\FileInterface
 40 |      */
 41 |     public function createFile($filename);
 42 | 
 43 | 
 44 |     /**
 45 |      * Removes an existing $filename in the directory.
 46 |      *
 47 |      * @param string $filename
 48 |      * @return void
 49 |      */
 50 |     public function deleteFile($filename);
 51 | 
 52 |     /**
 53 |      * Purge file if it's cached by directory object
 54 |      *
 55 |      * Method is used to prevent 'too many open files' error
 56 |      *
 57 |      * @param string $filename
 58 |      * @return void
 59 |      */
 60 |     public function purgeFile($filename);
 61 | 
 62 |     /**
 63 |      * Returns true if a file with the given $filename exists.
 64 |      *
 65 |      * @param string $filename
 66 |      * @return boolean
 67 |      */
 68 |     public function fileExists($filename);
 69 | 
 70 | 
 71 |     /**
 72 |      * Returns the length of a $filename in the directory.
 73 |      *
 74 |      * @param string $filename
 75 |      * @return integer
 76 |      */
 77 |     public function fileLength($filename);
 78 | 
 79 | 
 80 |     /**
 81 |      * Returns the UNIX timestamp $filename was last modified.
 82 |      *
 83 |      * @param string $filename
 84 |      * @return integer
 85 |      */
 86 |     public function fileModified($filename);
 87 | 
 88 | 
 89 |     /**
 90 |      * Renames an existing file in the directory.
 91 |      *
 92 |      * @param string $from
 93 |      * @param string $to
 94 |      * @return void
 95 |      */
 96 |     public function renameFile($from, $to);
 97 | 
 98 | 
 99 |     /**
100 |      * Sets the modified time of $filename to now.
101 |      *
102 |      * @param string $filename
103 |      * @return void
104 |      */
105 |     public function touchFile($filename);
106 | 
107 | 
108 |     /**
109 |      * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
110 |      *
111 |      * If $shareHandler option is true, then file handler can be shared between File Object
112 |      * requests. It speed-ups performance, but makes problems with file position.
113 |      * Shared handler are good for short atomic requests.
114 |      * Non-shared handlers are useful for stream file reading (especial for compound files).
115 |      *
116 |      * @param string $filename
117 |      * @param boolean $shareHandler
118 |      * @return \ZendSearch\Lucene\Storage\File\FileInterface
119 |      */
120 |     public function getFileObject($filename, $shareHandler = true);
121 | }
122 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Storage/File/FileInterface.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Storage\File;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | 
 15 | /**
 16 |  * @category   Zend
 17 |  * @package    Zend_Search_Lucene
 18 |  * @subpackage Storage
 19 |  */
 20 | interface FileInterface
 21 | {
 22 |     /**
 23 |      * Sets the file position indicator and advances the file pointer.
 24 |      * The new position, measured in bytes from the beginning of the file,
 25 |      * is obtained by adding offset to the position specified by whence,
 26 |      * whose values are defined as follows:
 27 |      * SEEK_SET - Set position equal to offset bytes.
 28 |      * SEEK_CUR - Set position to current location plus offset.
 29 |      * SEEK_END - Set position to end-of-file plus offset. (To move to
 30 |      * a position before the end-of-file, you need to pass a negative value
 31 |      * in offset.)
 32 |      * Upon success, returns 0; otherwise, returns -1
 33 |      *
 34 |      * @param integer $offset
 35 |      * @param integer $whence
 36 |      * @return integer
 37 |      */
 38 |     public function seek($offset, $whence=SEEK_SET);
 39 | 
 40 |     /**
 41 |      * Get file position.
 42 |      *
 43 |      * @return integer
 44 |      */
 45 |     public function tell();
 46 | 
 47 |     /**
 48 |      * Flush output.
 49 |      *
 50 |      * Returns true on success or false on failure.
 51 |      *
 52 |      * @return boolean
 53 |      */
 54 |     public function flush();
 55 | 
 56 |     /**
 57 |      * Lock file
 58 |      *
 59 |      * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
 60 |      *
 61 |      * @param integer $lockType
 62 |      * @return boolean
 63 |      */
 64 |     public function lock($lockType, $nonBlockinLock = false);
 65 | 
 66 |     /**
 67 |      * Unlock file
 68 |      */
 69 |     public function unlock();
 70 | 
 71 |     /**
 72 |      * Reads a byte from the current position in the file
 73 |      * and advances the file pointer.
 74 |      *
 75 |      * @return integer
 76 |      */
 77 |     public function readByte();
 78 | 
 79 |     /**
 80 |      * Writes a byte to the end of the file.
 81 |      *
 82 |      * @param integer $byte
 83 |      */
 84 |     public function writeByte($byte);
 85 | 
 86 |     /**
 87 |      * Read num bytes from the current position in the file
 88 |      * and advances the file pointer.
 89 |      *
 90 |      * @param integer $num
 91 |      * @return string
 92 |      */
 93 |     public function readBytes($num);
 94 | 
 95 |     /**
 96 |      * Writes num bytes of data (all, if $num===null) to the end
 97 |      * of the string.
 98 |      *
 99 |      * @param string $data
100 |      * @param integer $num
101 |      */
102 |     public function writeBytes($data, $num=null);
103 | 
104 |     /**
105 |      * Reads an integer from the current position in the file
106 |      * and advances the file pointer.
107 |      *
108 |      * @return integer
109 |      */
110 |     public function readInt();
111 | 
112 |     /**
113 |      * Writes an integer to the end of file.
114 |      *
115 |      * @param integer $value
116 |      */
117 |     public function writeInt($value);
118 | 
119 |     /**
120 |      * Returns a long integer from the current position in the file
121 |      * and advances the file pointer.
122 |      *
123 |      * @return integer|float
124 |      */
125 |     public function readLong();
126 | 
127 |     /**
128 |      * Writes long integer to the end of file
129 |      *
130 |      * @param integer $value
131 |      */
132 |     public function writeLong($value);
133 | 
134 |     /**
135 |      * Returns a variable-length integer from the current
136 |      * position in the file and advances the file pointer.
137 |      *
138 |      * @return integer
139 |      */
140 |     public function readVInt();
141 | 
142 |     /**
143 |      * Writes a variable-length integer to the end of file.
144 |      *
145 |      * @param integer $value
146 |      */
147 |     public function writeVInt($value);
148 | 
149 |     /**
150 |      * Reads a string from the current position in the file
151 |      * and advances the file pointer.
152 |      *
153 |      * @return string
154 |      */
155 |     public function readString();
156 | 
157 |     /**
158 |      * Writes a string to the end of file.
159 |      *
160 |      * @param string $str
161 |      */
162 |     public function writeString($str);
163 | 
164 |     /**
165 |      * Reads binary data from the current position in the file
166 |      * and advances the file pointer.
167 |      *
168 |      * @return string
169 |      */
170 |     public function readBinary();
171 | }
172 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/Storage/File/Filesystem.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene\Storage\File;
 12 | 
 13 | use ZendSearch\Lucene;
 14 | use Zend\Stdlib\ErrorHandler;
 15 | 
 16 | /**
 17 |  * @category   Zend
 18 |  * @package    Zend_Search_Lucene
 19 |  * @subpackage Storage
 20 |  */
 21 | class Filesystem extends AbstractFile
 22 | {
 23 |     /**
 24 |      * Resource of the open file
 25 |      *
 26 |      * @var resource
 27 |      */
 28 |     protected $_fileHandle;
 29 | 
 30 | 
 31 |     /**
 32 |      * Class constructor.  Open the file.
 33 |      *
 34 |      * @param string $filename
 35 |      * @param string $mode
 36 |      * @throws \ZendSearch\Lucene\Exception\InvalidArgumentException
 37 |      * @throws \ZendSearch\Lucene\Exception\RuntimeException
 38 |      */
 39 |     public function __construct($filename, $mode='r+b')
 40 |     {
 41 |         global $php_errormsg;
 42 | 
 43 |         if (strpos($mode, 'w') === false  &&  !is_readable($filename)) {
 44 |             // opening for reading non-readable file
 45 |             throw new Lucene\Exception\InvalidArgumentException('File \'' . $filename . '\' is not readable.');
 46 |         }
 47 | 
 48 |         $trackErrors = ini_get('track_errors');
 49 |         ini_set('track_errors', '1');
 50 | 
 51 |         $this->_fileHandle = @fopen($filename, $mode);
 52 | 
 53 |         if ($this->_fileHandle === false) {
 54 |             ini_set('track_errors', $trackErrors);
 55 |             throw new Lucene\Exception\RuntimeException($php_errormsg);
 56 |         }
 57 | 
 58 |         ini_set('track_errors', $trackErrors);
 59 |     }
 60 | 
 61 |     /**
 62 |      * Sets the file position indicator and advances the file pointer.
 63 |      * The new position, measured in bytes from the beginning of the file,
 64 |      * is obtained by adding offset to the position specified by whence,
 65 |      * whose values are defined as follows:
 66 |      * SEEK_SET - Set position equal to offset bytes.
 67 |      * SEEK_CUR - Set position to current location plus offset.
 68 |      * SEEK_END - Set position to end-of-file plus offset. (To move to
 69 |      * a position before the end-of-file, you need to pass a negative value
 70 |      * in offset.)
 71 |      * SEEK_CUR is the only supported offset type for compound files
 72 |      *
 73 |      * Upon success, returns 0; otherwise, returns -1
 74 |      *
 75 |      * @param integer $offset
 76 |      * @param integer $whence
 77 |      * @return integer
 78 |      */
 79 |     public function seek($offset, $whence=SEEK_SET)
 80 |     {
 81 |         return fseek($this->_fileHandle, $offset, $whence);
 82 |     }
 83 | 
 84 | 
 85 |     /**
 86 |      * Get file position.
 87 |      *
 88 |      * @return integer
 89 |      */
 90 |     public function tell()
 91 |     {
 92 |         return ftell($this->_fileHandle);
 93 |     }
 94 | 
 95 |     /**
 96 |      * Flush output.
 97 |      *
 98 |      * Returns true on success or false on failure.
 99 |      *
100 |      * @return boolean
101 |      */
102 |     public function flush()
103 |     {
104 |         return fflush($this->_fileHandle);
105 |     }
106 | 
107 |     /**
108 |      * Close File object
109 |      */
110 |     public function close()
111 |     {
112 |         if ($this->_fileHandle !== null ) {
113 |             ErrorHandler::start(E_WARNING);
114 |             fclose($this->_fileHandle);
115 |             ErrorHandler::stop();
116 |             $this->_fileHandle = null;
117 |         }
118 |     }
119 | 
120 |     /**
121 |      * Get the size of the already opened file
122 |      *
123 |      * @return integer
124 |      */
125 |     public function size()
126 |     {
127 |         $position = ftell($this->_fileHandle);
128 |         fseek($this->_fileHandle, 0, SEEK_END);
129 |         $size = ftell($this->_fileHandle);
130 |         fseek($this->_fileHandle,$position);
131 | 
132 |         return $size;
133 |     }
134 | 
135 |     /**
136 |      * Read a $length bytes from the file and advance the file pointer.
137 |      *
138 |      * @param integer $length
139 |      * @return string
140 |      */
141 |     protected function _fread($length=1)
142 |     {
143 |         if ($length == 0) {
144 |             return '';
145 |         }
146 | 
147 |         if ($length < 1024) {
148 |             return fread($this->_fileHandle, $length);
149 |         }
150 | 
151 |         $data = '';
152 |         while ( $length > 0 && ($nextBlock = fread($this->_fileHandle, $length)) != false ) {
153 |             $data .= $nextBlock;
154 |             $length -= strlen($nextBlock);
155 |         }
156 |         return $data;
157 |     }
158 | 
159 | 
160 |     /**
161 |      * Writes $length number of bytes (all, if $length===null) to the end
162 |      * of the file.
163 |      *
164 |      * @param string $data
165 |      * @param integer $length
166 |      */
167 |     protected function _fwrite($data, $length=null)
168 |     {
169 |         if ($length === null ) {
170 |             fwrite($this->_fileHandle, $data);
171 |         } else {
172 |             fwrite($this->_fileHandle, $data, $length);
173 |         }
174 |     }
175 | 
176 |     /**
177 |      * Lock file
178 |      *
179 |      * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
180 |      *
181 |      * @param integer $lockType
182 |      * @param boolean $nonBlockingLock
183 |      * @return boolean
184 |      */
185 |     public function lock($lockType, $nonBlockingLock = false)
186 |     {
187 |         if ($nonBlockingLock) {
188 |             return flock($this->_fileHandle, $lockType | LOCK_NB);
189 |         } else {
190 |             return flock($this->_fileHandle, $lockType);
191 |         }
192 |     }
193 | 
194 |     /**
195 |      * Unlock file
196 |      *
197 |      * Returns true on success
198 |      *
199 |      * @return boolean
200 |      */
201 |     public function unlock()
202 |     {
203 |         if ($this->_fileHandle !== null ) {
204 |             return flock($this->_fileHandle, LOCK_UN);
205 |         } else {
206 |             return true;
207 |         }
208 |     }
209 | }
210 | 


--------------------------------------------------------------------------------
/library/ZendSearch/Lucene/TermStreamsPriorityQueue.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | /**
  3 |  * Zend Framework (http://framework.zend.com/)
  4 |  *
  5 |  * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 |  * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  7 |  * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 |  * @package   Zend_Search
  9 |  */
 10 | 
 11 | namespace ZendSearch\Lucene;
 12 | 
 13 | /**
 14 |  * @category   Zend
 15 |  * @package    Zend_Search_Lucene
 16 |  * @subpackage Index
 17 |  */
 18 | class TermStreamsPriorityQueue implements Index\TermsStreamInterface
 19 | {
 20 |     /**
 21 |      * Array of term streams (ZendSearch\Lucene\Index\TermsStreamInterface objects)
 22 |      *
 23 |      * @var array
 24 |      */
 25 |     protected $_termStreams;
 26 | 
 27 |     /**
 28 |      * Terms stream queue
 29 |      *
 30 |      * @var \ZendSearch\Lucene\Index\TermsPriorityQueue
 31 |      */
 32 |     protected $_termsStreamQueue = null;
 33 | 
 34 |     /**
 35 |      * Last Term in a terms stream
 36 |      *
 37 |      * @var \ZendSearch\Lucene\Index\Term
 38 |      */
 39 |     protected $_lastTerm = null;
 40 | 
 41 | 
 42 |     /**
 43 |      * Object constructor
 44 |      *
 45 |      * @param array $termStreams  array of term streams (\ZendSearch\Lucene\Index\TermsStreamInterface objects)
 46 |      */
 47 |     public function __construct(array $termStreams)
 48 |     {
 49 |         $this->_termStreams = $termStreams;
 50 | 
 51 |         $this->resetTermsStream();
 52 |     }
 53 | 
 54 |     /**
 55 |      * Reset terms stream.
 56 |      */
 57 |     public function resetTermsStream()
 58 |     {
 59 |         $this->_termsStreamQueue = new Index\TermsPriorityQueue();
 60 | 
 61 |         foreach ($this->_termStreams as $termStream) {
 62 |             $termStream->resetTermsStream();
 63 | 
 64 |             // Skip "empty" containers
 65 |             if ($termStream->currentTerm() !== null) {
 66 |                 $this->_termsStreamQueue->put($termStream);
 67 |             }
 68 |         }
 69 | 
 70 |         $this->nextTerm();
 71 |     }
 72 | 
 73 |     /**
 74 |      * Skip terms stream up to specified term preffix.
 75 |      *
 76 |      * Prefix contains fully specified field info and portion of searched term
 77 |      *
 78 |      * @param \ZendSearch\Lucene\Index\Term $prefix
 79 |      */
 80 |     public function skipTo(Index\Term $prefix)
 81 |     {
 82 |         $termStreams = array();
 83 | 
 84 |         while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
 85 |             $termStreams[] = $termStream;
 86 |         }
 87 | 
 88 |         foreach ($termStreams as $termStream) {
 89 |             $termStream->skipTo($prefix);
 90 | 
 91 |             if ($termStream->currentTerm() !== null) {
 92 |                 $this->_termsStreamQueue->put($termStream);
 93 |             }
 94 |         }
 95 | 
 96 |         $this->nextTerm();
 97 |     }
 98 | 
 99 |     /**
100 |      * Scans term streams and returns next term
101 |      *
102 |      * @return \ZendSearch\Lucene\Index\Term|null
103 |      */
104 |     public function nextTerm()
105 |     {
106 |         while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
107 |             if ($this->_termsStreamQueue->top() === null ||
108 |                 $this->_termsStreamQueue->top()->currentTerm()->key() !=
109 |                             $termStream->currentTerm()->key()) {
110 |                 // We got new term
111 |                 $this->_lastTerm = $termStream->currentTerm();
112 | 
113 |                 if ($termStream->nextTerm() !== null) {
114 |                     // Put segment back into the priority queue
115 |                     $this->_termsStreamQueue->put($termStream);
116 |                 }
117 | 
118 |                 return $this->_lastTerm;
119 |             }
120 | 
121 |             if ($termStream->nextTerm() !== null) {
122 |                 // Put segment back into the priority queue
123 |                 $this->_termsStreamQueue->put($termStream);
124 |             }
125 |         }
126 | 
127 |         // End of stream
128 |         $this->_lastTerm = null;
129 | 
130 |         return null;
131 |     }
132 | 
133 |     /**
134 |      * Returns term in current position
135 |      *
136 |      * @return \ZendSearch\Lucene\Index\Term|null
137 |      */
138 |     public function currentTerm()
139 |     {
140 |         return $this->_lastTerm;
141 |     }
142 | 
143 |     /**
144 |      * Close terms stream
145 |      *
146 |      * Should be used for resources clean up if stream is not read up to the end
147 |      */
148 |     public function closeTermsStream()
149 |     {
150 |         while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
151 |             $termStream->closeTermsStream();
152 |         }
153 | 
154 |         $this->_termsStreamQueue = null;
155 |         $this->_lastTerm         = null;
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------