import v1.1.0_beta1 | 2009-08-21
This commit is contained in:
@ -69,7 +69,7 @@ abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_L
|
||||
$token = $filter->normalize($token);
|
||||
|
||||
// resulting token can be null if the filter removes it
|
||||
if (is_null($token)) {
|
||||
if ($token === null) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -19,11 +19,8 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* Token filter that removes stop words. These words must be provided as array (set), example:
|
||||
@ -80,11 +77,13 @@ class Zend_Search_Lucene_Analysis_TokenFilter_StopWords extends Zend_Search_Luce
|
||||
*/
|
||||
public function loadFromFile($filepath = null) {
|
||||
if (! $filepath || ! file_exists($filepath)) {
|
||||
throw new Zend_Search_Exception('You have to provide valid file path');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('You have to provide valid file path');
|
||||
}
|
||||
$fd = fopen($filepath, "r");
|
||||
if (! $fd) {
|
||||
throw new Zend_Search_Exception('Cannot open file ' . $filepath);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Cannot open file ' . $filepath);
|
||||
}
|
||||
while (!feof ($fd)) {
|
||||
$buffer = trim(fgets($fd));
|
||||
@ -93,7 +92,8 @@ class Zend_Search_Lucene_Analysis_TokenFilter_StopWords extends Zend_Search_Luce
|
||||
}
|
||||
}
|
||||
if (!fclose($fd)) {
|
||||
throw new Zend_Search_Exception('Cannot close file ' . $filepath);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Cannot close file ' . $filepath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -22,98 +22,122 @@
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
if (class_exists ( 'ZipArchive' )) {
|
||||
|
||||
if (class_exists('ZipArchive', false)) {
|
||||
|
||||
/**
|
||||
* Docx document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Docx extends Zend_Search_Lucene_Document_OpenXml {
|
||||
/**
|
||||
* Docx document.
|
||||
* Xml Schema - WordprocessingML
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
* @var string
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Docx extends Zend_Search_Lucene_Document_OpenXml {
|
||||
/**
|
||||
* Xml Schema - WordprocessingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
*/
|
||||
private function __construct($fileName, $storeContent) {
|
||||
// Document data holders
|
||||
$documentBody = array ( );
|
||||
$coreProperties = array ( );
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive ( );
|
||||
$package->open ( $fileName );
|
||||
|
||||
// Read relations and search for officeDocument
|
||||
$relations = simplexml_load_string ( $package->getFromName ( "_rels/.rels" ) );
|
||||
foreach ( $relations->Relationship as $rel ) {
|
||||
if ($rel ["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
|
||||
// Found office document! Read in contents...
|
||||
$contents = simplexml_load_string ( $package->getFromName ( $this->absoluteZipPath ( dirname ( $rel ["Target"] ) . "/" . basename ( $rel ["Target"] ) ) ) );
|
||||
|
||||
$contents->registerXPathNamespace ( "w", Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML );
|
||||
$paragraphs = $contents->xpath ( '//w:body/w:p' );
|
||||
|
||||
foreach ( $paragraphs as $paragraph ) {
|
||||
$runs = $paragraph->xpath ( '//w:r/w:t' );
|
||||
foreach ( $runs as $run ) {
|
||||
$documentBody [] = ( string ) $run;
|
||||
}
|
||||
const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
*/
|
||||
private function __construct($fileName, $storeContent) {
|
||||
// Document data holders
|
||||
$documentBody = array();
|
||||
$coreProperties = array();
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive();
|
||||
$package->open($fileName);
|
||||
|
||||
// Read relations and search for officeDocument
|
||||
$relations = simplexml_load_string($package->getFromName('_rels/.rels'));
|
||||
foreach($relations->Relationship as $rel) {
|
||||
if ($rel ["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
|
||||
// Found office document! Read in contents...
|
||||
$contents = simplexml_load_string($package->getFromName(
|
||||
$this->absoluteZipPath(dirname($rel['Target'])
|
||||
. '/'
|
||||
. basename($rel['Target']))
|
||||
));
|
||||
|
||||
$contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
|
||||
$paragraphs = $contents->xpath('//w:body/w:p');
|
||||
|
||||
foreach ($paragraphs as $paragraph) {
|
||||
$runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
|
||||
|
||||
if ($runs === false) {
|
||||
// Paragraph doesn't contain any text or breaks
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
foreach ($runs as $run) {
|
||||
if ($run->getName() == 'br') {
|
||||
// Break element
|
||||
$documentBody[] = ' ';
|
||||
} else {
|
||||
$documentBody[] = (string)$run;
|
||||
}
|
||||
}
|
||||
|
||||
// Add space after each paragraph. So they are not bound together.
|
||||
$documentBody[] = ' ';
|
||||
}
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData ( $package );
|
||||
|
||||
// Close file
|
||||
$package->close ();
|
||||
|
||||
// Store filename
|
||||
$this->addField ( Zend_Search_Lucene_Field::Text ( 'filename', $fileName ) );
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField ( Zend_Search_Lucene_Field::Text ( 'body', implode ( ' ', $documentBody ) ) );
|
||||
} else {
|
||||
$this->addField ( Zend_Search_Lucene_Field::UnStored ( 'body', implode ( ' ', $documentBody ) ) );
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ( $coreProperties as $key => $value ) {
|
||||
$this->addField ( Zend_Search_Lucene_Field::Text ( $key, $value ) );
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (! isset ( $coreProperties ['title'] )) {
|
||||
$this->addField ( Zend_Search_Lucene_Field::Text ( 'title', $fileName ) );
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Docx document from a file
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @return Zend_Search_Lucene_Document_Docx
|
||||
*/
|
||||
public static function loadDocxFile($fileName, $storeContent = false) {
|
||||
return new Zend_Search_Lucene_Document_Docx ( $fileName, $storeContent );
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
|
||||
// Close file
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (! isset($coreProperties['title'])) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load Docx document from a file
|
||||
*
|
||||
* @param string $fileName
|
||||
* @param boolean $storeContent
|
||||
* @return Zend_Search_Lucene_Document_Docx
|
||||
* @throws Zend_Search_Lucene_Document_Exception
|
||||
*/
|
||||
public static function loadDocxFile($fileName, $storeContent = false) {
|
||||
if (!is_readable($fileName)) {
|
||||
require_once 'Zend/Search/Lucene/Document/Exception.php';
|
||||
throw new Zend_Search_Lucene_Document_Exception('Provided file \'' . $fileName . '\' is not readable.');
|
||||
}
|
||||
|
||||
return new Zend_Search_Lucene_Document_Docx($fileName, $storeContent);
|
||||
}
|
||||
}
|
||||
|
||||
} // end if (class_exists('ZipArchive'))
|
||||
|
36
libs/Zend/Search/Lucene/Document/Exception.php
Normal file
36
libs/Zend/Search/Lucene/Document/Exception.php
Normal file
@ -0,0 +1,36 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document_Exception extends Zend_Search_Lucene_Exception
|
||||
{}
|
||||
|
@ -69,11 +69,12 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $data
|
||||
* @param string $data HTML string (may be HTML fragment, )
|
||||
* @param boolean $isFile
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
*/
|
||||
private function __construct($data, $isFile, $storeContent)
|
||||
private function __construct($data, $isFile, $storeContent, $defaultEncoding = '')
|
||||
{
|
||||
$this->_doc = new DOMDocument();
|
||||
$this->_doc->substituteEntities = true;
|
||||
@ -85,6 +86,37 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
}
|
||||
@$this->_doc->loadHTML($htmlData);
|
||||
|
||||
if ($this->_doc->encoding === null) {
|
||||
// Document encoding is not recognized
|
||||
|
||||
/** @todo improve HTML vs HTML fragment recognition */
|
||||
if (preg_match('/<html>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
|
||||
// It's an HTML document
|
||||
// Add additional HEAD section and recognize document
|
||||
$htmlTagOffset = $matches[0][1] + strlen($matches[0][1]);
|
||||
|
||||
@$this->_doc->loadHTML(iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, 0, $htmlTagOffset))
|
||||
. '<head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head>'
|
||||
. iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, $htmlTagOffset)));
|
||||
|
||||
// Remove additional HEAD section
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
$head = $xpath->query('/html/head')->item(0);
|
||||
$head->parentNode->removeChild($head);
|
||||
} else {
|
||||
// It's an HTML fragment
|
||||
@$this->_doc->loadHTML('<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
|
||||
. iconv($defaultEncoding, 'UTF-8//IGNORE', $htmlData)
|
||||
. '</body></html>');
|
||||
}
|
||||
|
||||
}
|
||||
/** @todo Add correction of wrong HTML encoding recognition processing
|
||||
* The case is:
|
||||
* Content-type HTTP-EQUIV meta tag is presented, but ISO-8859-5 encoding is actually used,
|
||||
* even $this->_doc->encoding demonstrates another recognized encoding
|
||||
*/
|
||||
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
|
||||
$docTitle = '';
|
||||
@ -93,13 +125,13 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
// title should always have only one entry, but we process all nodeset entries
|
||||
$docTitle .= $titleNode->nodeValue . ' ';
|
||||
}
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, $this->_doc->actualEncoding));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, 'UTF-8'));
|
||||
|
||||
$metaNodes = $xpath->query('/html/head/meta[@name]');
|
||||
foreach ($metaNodes as $metaNode) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'),
|
||||
$metaNode->getAttribute('content'),
|
||||
$this->_doc->actualEncoding));
|
||||
'UTF-8'));
|
||||
}
|
||||
|
||||
$docBody = '';
|
||||
@ -109,9 +141,9 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
$this->_retrieveNodeText($bodyNode, $docBody);
|
||||
}
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, $this->_doc->actualEncoding));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, $this->_doc->actualEncoding));
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, 'UTF-8'));
|
||||
}
|
||||
|
||||
$linkNodes = $this->_doc->getElementsByTagName('a');
|
||||
@ -196,25 +228,27 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
/**
|
||||
* Load HTML document from a string
|
||||
*
|
||||
* @param string $data
|
||||
* @param string $data
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @return Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
public static function loadHTML($data, $storeContent = false)
|
||||
public static function loadHTML($data, $storeContent = false, $defaultEncoding = '')
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Html($data, false, $storeContent);
|
||||
return new Zend_Search_Lucene_Document_Html($data, false, $storeContent, $defaultEncoding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Load HTML document from a file
|
||||
*
|
||||
* @param string $file
|
||||
* @param string $file
|
||||
* @param boolean $storeContent
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @return Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
public static function loadHTMLFile($file, $storeContent = false)
|
||||
public static function loadHTMLFile($file, $storeContent = false, $defaultEncoding = '')
|
||||
{
|
||||
return new Zend_Search_Lucene_Document_Html($file, true, $storeContent);
|
||||
return new Zend_Search_Lucene_Document_Html($file, true, $storeContent, $defaultEncoding);
|
||||
}
|
||||
|
||||
|
||||
@ -223,12 +257,14 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
*
|
||||
* @param DOMText $node
|
||||
* @param array $wordsToHighlight
|
||||
* @param string $color
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function _highlightTextNode(DOMText $node, $wordsToHighlight, $color)
|
||||
protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params)
|
||||
{
|
||||
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
|
||||
$analyzer->setInput($node->nodeValue, $this->_doc->encoding);
|
||||
$analyzer->setInput($node->nodeValue, 'UTF-8');
|
||||
|
||||
$matchedTokens = array();
|
||||
|
||||
@ -251,10 +287,32 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
// Cut matched node
|
||||
$matchedWordNode = $node->splitText($token->getStartOffset());
|
||||
|
||||
$highlightedNode = $this->_doc->createElement('b', $matchedWordNode->nodeValue);
|
||||
$highlightedNode->setAttribute('style', 'color:black;background-color:' . $color);
|
||||
// Retrieve HTML string representation for highlihted word
|
||||
$fullCallbackparamsList = $params;
|
||||
array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
|
||||
$highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList);
|
||||
|
||||
$node->parentNode->replaceChild($highlightedNode, $matchedWordNode);
|
||||
// Transform HTML string to a DOM representation and automatically transform retrieved string
|
||||
// into valid XHTML (It's automatically done by loadHTML() method)
|
||||
$highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8');
|
||||
$success = @$highlightedWordNodeSetDomDocument->
|
||||
loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>'
|
||||
. $highlightedWordNodeSetHtml
|
||||
. '</body></html>');
|
||||
if (!$success) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '$highlightedNodeHtml'.");
|
||||
}
|
||||
$highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument);
|
||||
$highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
|
||||
|
||||
for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
|
||||
$nodeToImport = $highlightedWordNodeSet->item($count);
|
||||
$node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true /* deep copy */),
|
||||
$matchedWordNode);
|
||||
}
|
||||
|
||||
$node->parentNode->removeChild($matchedWordNode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -264,9 +322,10 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
*
|
||||
* @param DOMNode $contextNode
|
||||
* @param array $wordsToHighlight
|
||||
* @param string $color
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
|
||||
*/
|
||||
public function _highlightNode(DOMNode $contextNode, $wordsToHighlight, $color)
|
||||
protected function _highlightNodeRecursive(DOMNode $contextNode, $wordsToHighlight, $callback, $params)
|
||||
{
|
||||
$textNodes = array();
|
||||
|
||||
@ -279,38 +338,66 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
// process node later to leave childNodes structure untouched
|
||||
$textNodes[] = $childNode;
|
||||
} else {
|
||||
// Skip script nodes
|
||||
// Process node if it's not a script node
|
||||
if ($childNode->nodeName != 'script') {
|
||||
$this->_highlightNode($childNode, $wordsToHighlight, $color);
|
||||
$this->_highlightNodeRecursive($childNode, $wordsToHighlight, $callback, $params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($textNodes as $textNode) {
|
||||
$this->_highlightTextNode($textNode, $wordsToHighlight, $color);
|
||||
$this->_highlightTextNode($textNode, $wordsToHighlight, $callback, $params);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Standard callback method used to highlight words.
|
||||
*
|
||||
* @param string $stringToHighlight
|
||||
* @return string
|
||||
* @internal
|
||||
*/
|
||||
public function applyColour($stringToHighlight, $colour)
|
||||
{
|
||||
return '<b style="color:black;background-color:' . $colour . '">' . $stringToHighlight . '</b>';
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight text with specified color
|
||||
*
|
||||
* @param string|array $words
|
||||
* @param string $color
|
||||
* @param string $colour
|
||||
* @return string
|
||||
*/
|
||||
public function highlight($words, $color = '#66ffff')
|
||||
public function highlight($words, $colour = '#66ffff')
|
||||
{
|
||||
return $this->highlightExtended($words, array($this, 'applyColour'), array($colour));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Highlight text using specified View helper or callback function.
|
||||
*
|
||||
* @param string|array $words Words to highlight. Words could be organized using the array or string.
|
||||
* @param callback $callback Callback method, used to transform (highlighting) text.
|
||||
* @param array $params Array of additionall callback parameters passed through into it
|
||||
* (first non-optional parameter is an HTML fragment for highlighting)
|
||||
* @return string
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function highlightExtended($words, $callback, $params = array())
|
||||
{
|
||||
if (!is_array($words)) {
|
||||
$words = array($words);
|
||||
}
|
||||
$wordsToHighlight = array();
|
||||
|
||||
$wordsToHighlightList = array();
|
||||
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
|
||||
foreach ($words as $wordString) {
|
||||
$wordsToHighlight = array_merge($wordsToHighlight, $analyzer->tokenize($wordString));
|
||||
$wordsToHighlightList[] = $analyzer->tokenize($wordString);
|
||||
}
|
||||
$wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
|
||||
|
||||
if (count($wordsToHighlight) == 0) {
|
||||
return $this->_doc->saveHTML();
|
||||
@ -321,15 +408,20 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
$wordsToHighlightFlipped[$token->getTermText()] = $id;
|
||||
}
|
||||
|
||||
if (!is_callable($callback)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('$viewHelper parameter mast be a View Helper name, View Helper object or callback.');
|
||||
}
|
||||
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
|
||||
$matchedNodes = $xpath->query("/html/body");
|
||||
foreach ($matchedNodes as $matchedNode) {
|
||||
$this->_highlightNode($matchedNode, $wordsToHighlightFlipped, $color);
|
||||
$this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get HTML
|
||||
*
|
||||
@ -339,5 +431,23 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
|
||||
{
|
||||
return $this->_doc->saveHTML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get HTML body
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getHtmlBody()
|
||||
{
|
||||
$xpath = new DOMXPath($this->_doc);
|
||||
$bodyNodes = $xpath->query('/html/body')->item(0)->childNodes;
|
||||
|
||||
$outputFragments = array();
|
||||
for ($count = 0; $count < $bodyNodes->length; $count++) {
|
||||
$outputFragments[] = $this->_doc->saveXML($bodyNodes->item($count));
|
||||
}
|
||||
|
||||
return implode($outputFragments);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
/** Zend_Search_Lucene_Document */
|
||||
require_once 'Zend/Search/Lucene/Document.php';
|
||||
|
||||
if (class_exists('ZipArchive')) {
|
||||
if (class_exists('ZipArchive', false)) {
|
||||
|
||||
/**
|
||||
* OpenXML document.
|
||||
|
@ -23,7 +23,7 @@
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
if (class_exists('ZipArchive')) {
|
||||
if (class_exists('ZipArchive', false)) {
|
||||
|
||||
/**
|
||||
* Pptx document.
|
||||
@ -42,14 +42,14 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
|
||||
|
||||
|
||||
/**
|
||||
* Xml Schema - DrawingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - Slide relation
|
||||
*
|
||||
@ -63,7 +63,7 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
@ -94,7 +94,7 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
$slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
|
||||
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
|
||||
);
|
||||
|
||||
|
||||
// Search for slide notes
|
||||
$slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
|
||||
foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
|
||||
@ -103,27 +103,27 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
$slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
|
||||
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
|
||||
);
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sort slides
|
||||
ksort($slides);
|
||||
ksort($slideNotes);
|
||||
|
||||
|
||||
// Extract contents from slides
|
||||
foreach ($slides as $slideKey => $slide) {
|
||||
// Register namespaces
|
||||
$slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
|
||||
$slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
|
||||
|
||||
|
||||
// Fetch all text
|
||||
$textElements = $slide->xpath('//a:t');
|
||||
foreach ($textElements as $textElement) {
|
||||
@ -138,15 +138,15 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
// Register namespaces
|
||||
$slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
|
||||
$slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
|
||||
|
||||
|
||||
// Fetch all text
|
||||
$textElements = $slideNote->xpath('//a:t');
|
||||
foreach ($textElements as $textElement) {
|
||||
$documentBody[] = (string)$textElement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
|
||||
@ -154,25 +154,25 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody)));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody)));
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value)
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (!isset($coreProperties['title']))
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
/** Zend_Search_Lucene_Document_OpenXml */
|
||||
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
|
||||
|
||||
if (class_exists('ZipArchive')) {
|
||||
if (class_exists('ZipArchive', false)) {
|
||||
|
||||
/**
|
||||
* Xlsx document.
|
||||
@ -42,21 +42,21 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
|
||||
/**
|
||||
* Xml Schema - DrawingML
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
|
||||
|
||||
/**
|
||||
* Xml Schema - Shared Strings
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
|
||||
|
||||
|
||||
/**
|
||||
* Xml Schema - Worksheet relation
|
||||
*
|
||||
@ -70,7 +70,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
* @var string
|
||||
*/
|
||||
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
@ -84,7 +84,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
$worksheets = array();
|
||||
$documentBody = array();
|
||||
$coreProperties = array();
|
||||
|
||||
|
||||
// Open OpenXML package
|
||||
$package = new ZipArchive();
|
||||
$package->open($fileName);
|
||||
@ -96,11 +96,11 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
// Found office document! Read relations for workbook...
|
||||
$workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
|
||||
$workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
|
||||
|
||||
|
||||
// Read shared strings
|
||||
$sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
|
||||
$sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
|
||||
$xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
|
||||
$sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
|
||||
$xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
|
||||
if (isset($xmlStrings) && isset($xmlStrings->si)) {
|
||||
foreach ($xmlStrings->si as $val) {
|
||||
if (isset($val->t)) {
|
||||
@ -119,14 +119,14 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Sort worksheets
|
||||
ksort($worksheets);
|
||||
|
||||
|
||||
// Extract contents from worksheets
|
||||
foreach ($worksheets as $sheetKey => $worksheet) {
|
||||
foreach ($worksheet->sheetData->row as $row) {
|
||||
@ -143,7 +143,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case "b":
|
||||
// Value is boolean
|
||||
$value = (string)$c->v;
|
||||
@ -156,13 +156,13 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case "inlineStr":
|
||||
// Value is rich text inline
|
||||
$value = $this->_parseRichText($c->is);
|
||||
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case "e":
|
||||
// Value is an error message
|
||||
if ((string)$c->v != '') {
|
||||
@ -184,11 +184,11 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
elseif ($value == (double)$value) $value = (double)$value;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
$documentBody[] = $value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Read core properties
|
||||
$coreProperties = $this->extractMetaData($package);
|
||||
@ -197,28 +197,28 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
|
||||
$package->close();
|
||||
|
||||
// Store filename
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
|
||||
|
||||
// Store contents
|
||||
if ($storeContent) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody)));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
} else {
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody)));
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store meta data properties
|
||||
foreach ($coreProperties as $key => $value)
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
|
||||
}
|
||||
|
||||
// Store title (if not present in meta data)
|
||||
if (!isset($coreProperties['title']))
|
||||
{
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse rich text XML
|
||||
*
|
||||
|
@ -18,14 +18,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_FSMAction */
|
||||
require_once 'Zend/Search/Lucene/FSMAction.php';
|
||||
|
||||
/** Zend_Search_Exception */
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* Abstract Finite State Machine
|
||||
*
|
||||
@ -181,6 +176,7 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function setState($state)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('State \'' . $state . '\' is not on of the possible FSM states.');
|
||||
}
|
||||
|
||||
@ -251,12 +247,15 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function addRule($sourceState, $input, $targetState, $inputAction = null)
|
||||
{
|
||||
if (!isset($this->_states[$sourceState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $sourceState . ').');
|
||||
}
|
||||
if (!isset($this->_states[$targetState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined target state (' . $targetState . ').');
|
||||
}
|
||||
if (!isset($this->_inputAphabet[$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined input symbol (' . $input . ').');
|
||||
}
|
||||
|
||||
@ -264,6 +263,7 @@ abstract class Zend_Search_Lucene_FSM
|
||||
$this->_rules[$sourceState] = array();
|
||||
}
|
||||
if (isset($this->_rules[$sourceState][$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Rule for {state,input} pair (' . $sourceState . ', '. $input . ') is already defined.');
|
||||
}
|
||||
|
||||
@ -287,6 +287,7 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function addEntryAction($state, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
|
||||
@ -308,6 +309,7 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function addExitAction($state, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
|
||||
@ -330,9 +332,11 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function addInputAction($state, $inputSymbol, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$state])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
|
||||
}
|
||||
if (!isset($this->_inputAphabet[$inputSymbol])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined input symbol (' . $inputSymbol. ').');
|
||||
}
|
||||
|
||||
@ -358,9 +362,11 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function addTransitionAction($sourceState, $targetState, Zend_Search_Lucene_FSMAction $action)
|
||||
{
|
||||
if (!isset($this->_states[$sourceState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $sourceState. ').');
|
||||
}
|
||||
if (!isset($this->_states[$targetState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('Undefined source state (' . $targetState. ').');
|
||||
}
|
||||
|
||||
@ -384,9 +390,11 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function process($input)
|
||||
{
|
||||
if (!isset($this->_rules[$this->_currentState])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any rule for current state (' . $this->_currentState . ').');
|
||||
}
|
||||
if (!isset($this->_rules[$this->_currentState][$input])) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any rule for {current state, input} pair (' . $this->_currentState . ', ' . $input . ').');
|
||||
}
|
||||
|
||||
@ -424,6 +432,7 @@ abstract class Zend_Search_Lucene_FSM
|
||||
public function reset()
|
||||
{
|
||||
if (count($this->_states) == 0) {
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
throw new Zend_Search_Exception('There is no any state defined for FSM.');
|
||||
}
|
||||
|
||||
|
@ -19,11 +19,6 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* Dictionary loader
|
||||
*
|
||||
@ -63,7 +58,8 @@ class Zend_Search_Lucene_Index_DictionaryLoader
|
||||
$pos += 4;
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
}
|
||||
|
||||
// $indexTermCount = $tiiFile->readLong();
|
||||
@ -82,7 +78,8 @@ class Zend_Search_Lucene_Index_DictionaryLoader
|
||||
(ord($data[$pos+2]) != 0) ||
|
||||
(ord($data[$pos+3]) != 0) ||
|
||||
((ord($data[$pos+4]) & 0x80) != 0)) {
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
$indexTermCount = ord($data[$pos+4]) << 24 |
|
||||
@ -99,6 +96,7 @@ class Zend_Search_Lucene_Index_DictionaryLoader
|
||||
$skipInterval = ord($data[$pos]) << 24 | ord($data[$pos+1]) << 16 | ord($data[$pos+2]) << 8 | ord($data[$pos+3]);
|
||||
$pos += 4;
|
||||
if ($indexTermCount < 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of terms in a term dictionary index');
|
||||
}
|
||||
|
||||
@ -254,13 +252,16 @@ class Zend_Search_Lucene_Index_DictionaryLoader
|
||||
|
||||
// Check special index entry mark
|
||||
if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
} else if (PHP_INT_SIZE > 4){
|
||||
}
|
||||
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
// Treat 64-bit 0xFFFFFFFF as -1
|
||||
$termDictionary[0][0] = -1;
|
||||
}
|
||||
|
||||
return array(&$termDictionary, &$termInfos);
|
||||
return array($termDictionary, $termInfos);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -22,16 +22,11 @@
|
||||
/** Zend_Search_Lucene_Index_DictionaryLoader */
|
||||
require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_LockManager */
|
||||
require_once 'Zend/Search/Lucene/LockManager.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_DocsFilter */
|
||||
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
@ -40,7 +35,7 @@ require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentInfo
|
||||
class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* "Full scan vs fetch" boundary.
|
||||
@ -261,6 +256,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
} else {
|
||||
// It's a pre-2.1 segment or isCompound is set to 'unknown'
|
||||
// Detect if segment uses compound file
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
// Try to open compound file
|
||||
$this->_directory->getFileObject($name . '.cfs');
|
||||
@ -321,110 +317,165 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
$this->_fieldsDicPositions = array_flip($fieldNums);
|
||||
|
||||
if ($this->_delGen == -2) {
|
||||
$this->_detectLatestDelGen();
|
||||
// SegmentInfo constructor is invoked from index writer
|
||||
// Autodetect current delete file generation number
|
||||
$this->_delGen = $this->_detectLatestDelGen();
|
||||
}
|
||||
|
||||
// Load deletions
|
||||
$this->_deleted = $this->_loadDelFile();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadDelFile()
|
||||
{
|
||||
if ($this->_delGen == -1) {
|
||||
// There is no delete file for this segment
|
||||
// Do nothing
|
||||
return null;
|
||||
} else if ($this->_delGen == 0) {
|
||||
// It's a segment with pre-2.1 format delete file
|
||||
// Try to find delete file
|
||||
try {
|
||||
// '.del' files always stored in a separate file
|
||||
// Segment compound is not used
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '.del');
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$byteCount = ceil($byteCount/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = $delBytes;
|
||||
} else {
|
||||
$this->_deleted = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$this->_deleted[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch(Zend_Search_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') === false ) {
|
||||
throw $e;
|
||||
}
|
||||
// There is no delete file
|
||||
// Do nothing
|
||||
}
|
||||
// Try to load deletions file
|
||||
return $this->_loadPre21DelFile();
|
||||
} else {
|
||||
// It's 2.1+ format delete file
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
// It's 2.1+ format deleteions file
|
||||
return $this->_load21DelFile();
|
||||
}
|
||||
}
|
||||
|
||||
$format = $delFile->readInt();
|
||||
/**
|
||||
* Load pre-2.1 detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _loadPre21DelFile()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
// '.del' files always stored in a separate file
|
||||
// Segment compound is not used
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '.del');
|
||||
|
||||
if ($format == (int)0xFFFFFFFF) {
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = bitset_empty();
|
||||
} else {
|
||||
$this->_deleted = array();
|
||||
}
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
$delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
$byteNum = 0;
|
||||
|
||||
do {
|
||||
$dgap = $delFile->readVInt();
|
||||
$nonZeroByte = $delFile->readByte();
|
||||
|
||||
$byteNum += $dgap;
|
||||
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($nonZeroByte & (1<<$bit)) {
|
||||
if (extension_loaded('bitset')) {
|
||||
bitset_incl($this->_deleted, $byteNum*8 + $bit);
|
||||
} else {
|
||||
$this->_deleted[$byteNum*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while ($delFile->tell() < $delFileSize);
|
||||
$byteCount = $delFile->readInt();
|
||||
$byteCount = ceil($byteCount/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
// $format is actually byte count
|
||||
$byteCount = ceil($format/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = $delBytes;
|
||||
} else {
|
||||
$this->_deleted = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$this->_deleted[$count*8 + $bit] = 1;
|
||||
}
|
||||
if (extension_loaded('bitset')) {
|
||||
return $delBytes;
|
||||
} else {
|
||||
$deletions = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$deletions[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $deletions;
|
||||
}
|
||||
} catch(Zend_Search_Lucene_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'is not readable') === false) {
|
||||
throw $e;
|
||||
}
|
||||
// There is no deletion file
|
||||
$this->_delGen = -1;
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Load 2.1+ format detetions file
|
||||
*
|
||||
* Returns bitset or an array depending on bitset extension availability
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
private function _load21DelFile()
|
||||
{
|
||||
$delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
|
||||
$format = $delFile->readInt();
|
||||
|
||||
if ($format == (int)0xFFFFFFFF) {
|
||||
if (extension_loaded('bitset')) {
|
||||
$deletions = bitset_empty();
|
||||
} else {
|
||||
$deletions = array();
|
||||
}
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
$delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
|
||||
$byteNum = 0;
|
||||
|
||||
do {
|
||||
$dgap = $delFile->readVInt();
|
||||
$nonZeroByte = $delFile->readByte();
|
||||
|
||||
$byteNum += $dgap;
|
||||
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($nonZeroByte & (1<<$bit)) {
|
||||
bitset_incl($deletions, $byteNum*8 + $bit);
|
||||
}
|
||||
}
|
||||
return $deletions;
|
||||
} else {
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($nonZeroByte & (1<<$bit)) {
|
||||
$deletions[$byteNum*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
return (count($deletions) > 0) ? $deletions : null;
|
||||
}
|
||||
|
||||
} while ($delFile->tell() < $delFileSize);
|
||||
} else {
|
||||
// $format is actually byte count
|
||||
$byteCount = ceil($format/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return $delBytes;
|
||||
} else {
|
||||
$deletions = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes[$count]);
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$deletions[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (count($deletions) > 0) ? $deletions : null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -462,10 +513,12 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
|
||||
if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
|
||||
. $fdxFName . ' file.' );
|
||||
}
|
||||
if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
|
||||
. $fdtFName . ' file.' );
|
||||
}
|
||||
@ -500,6 +553,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
|
||||
if( !isset($this->_segFiles[$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
@ -525,6 +579,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
|
||||
if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
@ -541,6 +596,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
|
||||
if( !isset($this->_segFileSizes[$filename]) ) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
@ -811,6 +867,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
$tiVersion = $tisFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
|
||||
}
|
||||
|
||||
@ -890,6 +947,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
@ -1012,6 +1070,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
@ -1136,6 +1195,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
|
||||
if ($docsFilter !== null) {
|
||||
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
|
||||
}
|
||||
|
||||
@ -1304,6 +1364,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
$headerFormatVersion = $normfFile->readByte();
|
||||
|
||||
if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
|
||||
}
|
||||
|
||||
@ -1439,13 +1500,14 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Detect latest delete generation
|
||||
*
|
||||
* Is actualy used from writeChanges() method or from the constructor if it's invoked from
|
||||
* Index writer. In both cases index write lock is already obtained, so we shouldn't care
|
||||
* about it
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
private function _detectLatestDelGen()
|
||||
{
|
||||
@ -1462,12 +1524,12 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
|
||||
if (count($delFileList) == 0) {
|
||||
// There is no deletions file for current segment in the directory
|
||||
// Set detetions file generation number to 1
|
||||
$this->_delGen = -1;
|
||||
// Set deletions file generation number to 1
|
||||
return -1;
|
||||
} else {
|
||||
// There are some deletions files for current segment in the directory
|
||||
// Set deletions file generation number to the highest nuber
|
||||
$this->_delGen = max($delFileList);
|
||||
return max($delFileList);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1478,11 +1540,43 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
* so index Write lock has to be already obtained.
|
||||
*
|
||||
* @internal
|
||||
* @throws Zend_Search_Lucene_Exceptions
|
||||
*/
|
||||
public function writeChanges()
|
||||
{
|
||||
// Get new generation number
|
||||
$latestDelGen = $this->_detectLatestDelGen();
|
||||
|
||||
if (!$this->_deletedDirty) {
|
||||
return;
|
||||
// There was no deletions by current process
|
||||
|
||||
if ($latestDelGen == $this->_delGen) {
|
||||
// Delete file hasn't been updated by any concurrent process
|
||||
return;
|
||||
} else if ($latestDelGen > $this->_delGen) {
|
||||
// Delete file has been updated by some concurrent process
|
||||
// Reload deletions file
|
||||
$this->_delGen = $latestDelGen;
|
||||
$this->_deleted = $this->_loadDelFile();
|
||||
|
||||
return;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
|
||||
}
|
||||
}
|
||||
|
||||
if ($latestDelGen > $this->_delGen) {
|
||||
// Merge current deletions with latest deletions file
|
||||
$this->_delGen = $latestDelGen;
|
||||
|
||||
$latestDelete = $this->_loadDelFile();
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = bitset_union($this->_deleted, $latestDelete);
|
||||
} else {
|
||||
$this->_deleted += $latestDelete;
|
||||
}
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
@ -1503,10 +1597,6 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
$bitCount = count($this->_deleted);
|
||||
}
|
||||
|
||||
|
||||
// Get new generation number
|
||||
$this->_detectLatestDelGen();
|
||||
|
||||
if ($this->_delGen == -1) {
|
||||
// Set delete file generation number to 1
|
||||
$this->_delGen = 1;
|
||||
@ -1524,7 +1614,6 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Term Dictionary File object for stream like terms reading
|
||||
*
|
||||
@ -1664,8 +1753,28 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
* @return integer
|
||||
*/
|
||||
public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
|
||||
public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
|
||||
{
|
||||
/**
|
||||
* SegmentInfo->resetTermsStream() method actually takes two optional parameters:
|
||||
* $startId (default value is 0)
|
||||
* $mode (default value is self::SM_TERMS_ONLY)
|
||||
*/
|
||||
$argList = func_get_args();
|
||||
if (count($argList) > 2) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
|
||||
} else if (count($argList) == 2) {
|
||||
$startId = $argList[0];
|
||||
$mode = $argList[1];
|
||||
} else if (count($argList) == 1) {
|
||||
$startId = $argList[0];
|
||||
$mode = self::SM_TERMS_ONLY;
|
||||
} else {
|
||||
$startId = 0;
|
||||
$mode = self::SM_TERMS_ONLY;
|
||||
}
|
||||
|
||||
if ($this->_tisFile !== null) {
|
||||
$this->_tisFile = null;
|
||||
}
|
||||
@ -1676,6 +1785,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
$tiVersion = $this->_tisFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
|
||||
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
|
||||
}
|
||||
|
||||
@ -1723,6 +1833,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
|
||||
break;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
|
||||
break;
|
||||
}
|
||||
|
@ -19,19 +19,14 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
@ -117,10 +112,12 @@ class Zend_Search_Lucene_Index_SegmentMerger
|
||||
public function merge()
|
||||
{
|
||||
if ($this->_mergeDone) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Merge is already done.');
|
||||
}
|
||||
|
||||
if (count($this->_segmentInfos) < 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
|
||||
. count($this->_segmentInfos)
|
||||
. ').');
|
||||
@ -228,11 +225,11 @@ class Zend_Search_Lucene_Index_SegmentMerger
|
||||
*/
|
||||
private function _mergeTerms()
|
||||
{
|
||||
$segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
|
||||
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
|
||||
$segmentStartId = $segmentInfo->reset($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
|
||||
$segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
|
||||
|
||||
// Skip "empty" segments
|
||||
if ($segmentInfo->currentTerm() !== null) {
|
||||
|
@ -19,14 +19,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
|
@ -19,17 +19,12 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -90,6 +85,7 @@ class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_
|
||||
/**
|
||||
* @todo term vector storing support
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
|
||||
}
|
||||
|
||||
|
@ -19,17 +19,12 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
|
48
libs/Zend/Search/Lucene/Index/TermsPriorityQueue.php
Normal file
48
libs/Zend/Search/Lucene/Index/TermsPriorityQueue.php
Normal file
@ -0,0 +1,48 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene */
|
||||
require_once 'Zend/Search/Lucene/PriorityQueue.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_TermsPriorityQueue extends Zend_Search_Lucene_PriorityQueue
|
||||
{
|
||||
/**
|
||||
* Compare elements
|
||||
*
|
||||
* Returns true, if $termsStream1 is "less" than $termsStream2; else otherwise
|
||||
*
|
||||
* @param mixed $termsStream1
|
||||
* @param mixed $termsStream2
|
||||
* @return boolean
|
||||
*/
|
||||
protected function _less($termsStream1, $termsStream2)
|
||||
{
|
||||
return strcmp($termsStream1->currentTerm()->key(), $termsStream2->currentTerm()->key()) < 0;
|
||||
}
|
||||
|
||||
}
|
65
libs/Zend/Search/Lucene/Index/TermsStream/Interface.php
Normal file
65
libs/Zend/Search/Lucene/Index/TermsStream/Interface.php
Normal file
@ -0,0 +1,65 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream();
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm();
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm();
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream();
|
||||
}
|
@ -189,11 +189,8 @@ class Zend_Search_Lucene_Index_Writer
|
||||
$segmentsFile = $directory->createFile('segments');
|
||||
$segmentsFile->writeInt((int)0xFFFFFFFF);
|
||||
|
||||
// write version (is initialized by current time
|
||||
// $segmentsFile->writeLong((int)microtime(true));
|
||||
$version = microtime(true);
|
||||
$segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
|
||||
$segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
|
||||
// write version (initialized by current time)
|
||||
$segmentsFile->writeLong(round(microtime(true)));
|
||||
|
||||
// write name counter
|
||||
$segmentsFile->writeInt($nameCount);
|
||||
@ -214,11 +211,8 @@ class Zend_Search_Lucene_Index_Writer
|
||||
$segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
|
||||
$segmentsFile->writeInt((int)0xFFFFFFFD);
|
||||
|
||||
// write version (is initialized by current time
|
||||
// $segmentsFile->writeLong((int)microtime(true));
|
||||
$version = microtime(true);
|
||||
$segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
|
||||
$segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
|
||||
// write version (initialized by current time)
|
||||
$segmentsFile->writeLong(round(microtime(true)));
|
||||
|
||||
// write name counter
|
||||
$segmentsFile->writeInt($nameCount);
|
||||
@ -435,9 +429,9 @@ class Zend_Search_Lucene_Index_Writer
|
||||
|
||||
try {
|
||||
// Write format marker
|
||||
if ($this->_targetFormatVersion == Zend_Search_lucene::FORMAT_2_1) {
|
||||
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFD);
|
||||
} else if ($this->_targetFormatVersion == Zend_Search_lucene::FORMAT_2_3) {
|
||||
} else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFC);
|
||||
}
|
||||
|
||||
@ -453,16 +447,9 @@ class Zend_Search_Lucene_Index_Writer
|
||||
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
|
||||
}
|
||||
|
||||
// $version = $segmentsFile->readLong() + $this->_versionUpdate;
|
||||
// Process version on 32-bit platforms
|
||||
$versionHigh = $segmentsFile->readInt();
|
||||
$versionLow = $segmentsFile->readInt();
|
||||
$version = $versionHigh * ((double)0xFFFFFFFF + 1) +
|
||||
(($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
|
||||
$version += $this->_versionUpdate;
|
||||
$version = $segmentsFile->readLong() + $this->_versionUpdate;
|
||||
$this->_versionUpdate = 0;
|
||||
$newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
|
||||
$newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
|
||||
$newSegmentFile->writeLong($version);
|
||||
|
||||
// Write segment name counter
|
||||
$newSegmentFile->writeInt($segmentsFile->readInt());
|
||||
@ -482,21 +469,18 @@ class Zend_Search_Lucene_Index_Writer
|
||||
|
||||
if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
|
||||
// pre-2.1 index format
|
||||
$delGenHigh = 0;
|
||||
$delGenLow = 0;
|
||||
$delGen = 0;
|
||||
$hasSingleNormFile = false;
|
||||
$numField = (int)0xFFFFFFFF;
|
||||
$isCompoundByte = 0;
|
||||
$docStoreOptions = null;
|
||||
} else {
|
||||
//$delGen = $segmentsFile->readLong();
|
||||
$delGenHigh = $segmentsFile->readInt();
|
||||
$delGenLow = $segmentsFile->readInt();
|
||||
$delGen = $segmentsFile->readLong();
|
||||
|
||||
if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
$docStoreOffset = $segmentsFile->readInt();
|
||||
|
||||
if ($docStoreOffset != -1) {
|
||||
if ($docStoreOffset != (int)0xFFFFFFFF) {
|
||||
$docStoreSegment = $segmentsFile->readString();
|
||||
$docStoreIsCompoundFile = $segmentsFile->readByte();
|
||||
|
||||
@ -525,8 +509,6 @@ class Zend_Search_Lucene_Index_Writer
|
||||
if (!in_array($segName, $this->_segmentsToDelete)) {
|
||||
// Load segment if necessary
|
||||
if (!isset($this->_segmentInfos[$segName])) {
|
||||
$delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) +
|
||||
(($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow);
|
||||
if ($isCompoundByte == 0xFF) {
|
||||
// The segment is not a compound file
|
||||
$isCompound = false;
|
||||
@ -549,19 +531,11 @@ class Zend_Search_Lucene_Index_Writer
|
||||
} else {
|
||||
// Retrieve actual deletions file generation number
|
||||
$delGen = $this->_segmentInfos[$segName]->getDelGen();
|
||||
|
||||
if ($delGen >= 0) {
|
||||
$delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1));
|
||||
$delGenLow =(int)($delGen & 0xFFFFFFFF);
|
||||
} else {
|
||||
$delGenHigh = $delGenLow = (int)0xFFFFFFFF;
|
||||
}
|
||||
}
|
||||
|
||||
$newSegmentFile->writeString($segName);
|
||||
$newSegmentFile->writeInt($segSize);
|
||||
$newSegmentFile->writeInt($delGenHigh);
|
||||
$newSegmentFile->writeInt($delGenLow);
|
||||
$newSegmentFile->writeLong($delGen);
|
||||
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
|
||||
if ($docStoreOptions !== null) {
|
||||
$newSegmentFile->writeInt($docStoreOffset);
|
||||
|
@ -18,6 +18,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
@ -25,7 +28,7 @@
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Interface
|
||||
interface Zend_Search_Lucene_Interface extends Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Get current generation number
|
||||
@ -376,43 +379,6 @@ interface Zend_Search_Lucene_Interface
|
||||
*/
|
||||
public function terms();
|
||||
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream();
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm();
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm();
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream();
|
||||
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*/
|
||||
|
@ -18,18 +18,12 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Storage_Directory */
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory.php';
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This is an utility class which provides index locks processing functionality
|
||||
*
|
||||
@ -59,6 +53,7 @@ class Zend_Search_Lucene_LockManager
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::WRITE_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_EX)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
|
||||
}
|
||||
return $lock;
|
||||
@ -99,11 +94,11 @@ class Zend_Search_Lucene_LockManager
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
|
||||
private static function _startReadLockProcessing(Zend_Search_Lucene_Storage_Directory $lockDirectory)
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::READ_LOCK_PROCESSING_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_EX)) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive lock for the read lock processing file');
|
||||
}
|
||||
return $lock;
|
||||
@ -137,7 +132,7 @@ class Zend_Search_Lucene_LockManager
|
||||
{
|
||||
$lock = $lockDirectory->createFile(self::READ_LOCK_FILE);
|
||||
if (!$lock->lock(LOCK_SH)) {
|
||||
self::_stopReadLockProcessing($lockDirectory);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Can\'t obtain shared reading index lock');
|
||||
}
|
||||
return $lock;
|
||||
|
962
libs/Zend/Search/Lucene/MultiSearcher.php
Normal file
962
libs/Zend/Search/Lucene/MultiSearcher.php
Normal file
@ -0,0 +1,962 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
|
||||
|
||||
/** Zend_Search_Lucene_Interface */
|
||||
require_once 'Zend/Search/Lucene/Interface.php';
|
||||
|
||||
/**
|
||||
* Multisearcher allows to search through several independent indexes.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface
|
||||
{
|
||||
/**
|
||||
* List of indices for searching.
|
||||
* Array of Zend_Search_Lucene_Interface objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_indices;
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param array $indices Arrays of indices for search
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($indices = array())
|
||||
{
|
||||
$this->_indices = $indices;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if (!$index instanceof Zend_Search_Lucene_Interface) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add index for searching.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
*/
|
||||
public function addIndex(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
$this->_indices[] = $index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get current generation number
|
||||
*
|
||||
* Returns generation number
|
||||
* 0 means pre-2.1 index format
|
||||
* -1 means there are no segments files.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get segments file name
|
||||
*
|
||||
* @param integer $generation
|
||||
* @return string
|
||||
*/
|
||||
public static function getSegmentFileName($generation)
|
||||
{
|
||||
return Zend_Search_Lucene::getSegmentFileName($generation);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get index format version
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getFormatVersion()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index format version.
|
||||
* Index is converted to this format at the nearest upfdate time
|
||||
*
|
||||
* @param int $formatVersion
|
||||
*/
|
||||
public function setFormatVersion($formatVersion)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setFormatVersion($formatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index (including deleted documents).
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
$count = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$count += $this->_indices->count();
|
||||
}
|
||||
|
||||
return $count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns one greater than the largest possible document number.
|
||||
* This may be used to, e.g., determine how big to allocate a structure which will have
|
||||
* an element for every document number in an index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function maxDoc()
|
||||
{
|
||||
return $this->count();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of non-deleted documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function numDocs()
|
||||
{
|
||||
$docs = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$docs += $this->_indices->numDocs();
|
||||
}
|
||||
|
||||
return $docs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer $id
|
||||
* @return boolean
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->isDeleted($id);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* Default value is null
|
||||
*
|
||||
* @param string $fieldName
|
||||
*/
|
||||
public static function setDefaultSearchField($fieldName)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setDefaultSearchField($fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get default search field.
|
||||
*
|
||||
* Null means, that search is performed through all fields by default
|
||||
*
|
||||
* @return string
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getDefaultSearchField()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$defaultSearchField = reset($this->_indices)->getDefaultSearchField();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getDefaultSearchField() !== $defaultSearchField) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $defaultSearchField;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 (default) means no limit
|
||||
*
|
||||
* @param integer $limit
|
||||
*/
|
||||
public static function setResultSetLimit($limit)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setResultSetLimit($limit);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set result set limit.
|
||||
*
|
||||
* 0 means no limit
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public static function getResultSetLimit()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$defaultResultSetLimit = reset($this->_indices)->getResultSetLimit();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $defaultResultSetLimit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMaxBufferedDocs()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $maxBufferedDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxBufferedDocs option
|
||||
*
|
||||
* maxBufferedDocs is a minimal number of documents required before
|
||||
* the buffered in-memory documents are written into a new Segment
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxBufferedDocs
|
||||
*/
|
||||
public function setMaxBufferedDocs($maxBufferedDocs)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxBufferedDocs($maxBufferedDocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMaxMergeDocs()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$maxMergeDocs = reset($this->_indices)->getMaxMergeDocs();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $maxMergeDocs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index maxMergeDocs option
|
||||
*
|
||||
* maxMergeDocs is a largest number of documents ever merged by addDocument().
|
||||
* Small values (e.g., less than 10,000) are best for interactive indexing,
|
||||
* as this limits the length of pauses while indexing to a few seconds.
|
||||
* Larger values are best for batched indexing and speedier searches.
|
||||
*
|
||||
* Default value is PHP_INT_MAX
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMaxMergeDocs($maxMergeDocs)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxMergeDocs($maxMergeDocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getMergeFactor()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$mergeFactor = reset($this->_indices)->getMergeFactor();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getMergeFactor() !== $mergeFactor) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
|
||||
}
|
||||
}
|
||||
|
||||
return $mergeFactor;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set index mergeFactor option
|
||||
*
|
||||
* mergeFactor determines how often segment indices are merged by addDocument().
|
||||
* With smaller values, less RAM is used while indexing,
|
||||
* and searches on unoptimized indices are faster,
|
||||
* but indexing speed is slower.
|
||||
* With larger values, more RAM is used during indexing,
|
||||
* and while searches on unoptimized indices are slower,
|
||||
* indexing is faster.
|
||||
* Thus larger values (> 10) are best for batch index creation,
|
||||
* and smaller values (< 10) for indices that are interactively maintained.
|
||||
*
|
||||
* Default value is 10
|
||||
*
|
||||
* @param integer $maxMergeDocs
|
||||
*/
|
||||
public function setMergeFactor($mergeFactor)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->setMaxMergeDocs($maxMergeDocs);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param mixed $query
|
||||
* @return array Zend_Search_Lucene_Search_QueryHit
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function find($query)
|
||||
{
|
||||
$hitsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$hits = $index->find($query);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
foreach ($hits as $hit) {
|
||||
$hit->id += $indexShift;
|
||||
}
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$hitsList[] = $hits;
|
||||
}
|
||||
|
||||
/** @todo Implement advanced sorting */
|
||||
|
||||
return call_user_func_array('array_merge', $hitsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false)
|
||||
{
|
||||
$fieldNamesList = array();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$fieldNamesList[] = $index->getFieldNames($indexed);
|
||||
}
|
||||
|
||||
return array_unique(call_user_func_array('array_merge', $fieldNamesList));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
|
||||
*/
|
||||
public function getDocument($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->getDocument($id);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if index contain documents with specified term.
|
||||
*
|
||||
* Is used for query optimization.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->hasTerm($term)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns IDs of all the documents containing term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$docsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$docs = $index->termDocs($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
foreach ($docs as $id => $docId) {
|
||||
$docs[$id] += $indexShift;
|
||||
}
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$docsList[] = $docs;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $docsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns documents filter for all documents containing term.
|
||||
*
|
||||
* It performs the same operation as termDocs, but return result as
|
||||
* Zend_Search_Lucene_Index_DocsFilter object
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return Zend_Search_Lucene_Index_DocsFilter
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term freqs.
|
||||
* Return array structure: array( docId => freq, ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return integer
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$freqsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$freqs = $index->termFreqs($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
$freqsShifted = array();
|
||||
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$freqsShifted[$docId + $indexShift] = $freq;
|
||||
}
|
||||
$freqs = $freqsShifted;
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$freqsList[] = $freqs;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $freqsList);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
* @return array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
|
||||
{
|
||||
if ($docsFilter != null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
|
||||
}
|
||||
|
||||
$termPositionsList = array();
|
||||
|
||||
$indexShift = 0;
|
||||
foreach ($this->_indices as $index) {
|
||||
$termPositions = $index->termPositions($term);
|
||||
|
||||
if ($indexShift != 0) {
|
||||
$termPositionsShifted = array();
|
||||
|
||||
foreach ($termPositions as $docId => $positions) {
|
||||
$termPositions[$docId + $indexShift] = $positions;
|
||||
}
|
||||
$termPositions = $termPositionsShifted;
|
||||
}
|
||||
|
||||
$indexShift += $index->count();
|
||||
$termPositionsList[] = $termPositions;
|
||||
}
|
||||
|
||||
return call_user_func_array('array_merge', $termPositions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$docFreq = 0;
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$docFreq += $index->docFreq($term);
|
||||
}
|
||||
|
||||
return $docFreq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function getSimilarity()
|
||||
{
|
||||
if (count($this->_indices) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices list is empty');
|
||||
}
|
||||
|
||||
$similarity = reset($this->_indices)->getSimilarity();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->getSimilarity() !== $similarity) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
|
||||
}
|
||||
}
|
||||
|
||||
return $similarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return float
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
return $index->norm($id, $fieldName);
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
if ($index->hasDeletions()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$indexCount = $index->count();
|
||||
|
||||
if ($indexCount > $id) {
|
||||
$index->delete($id);
|
||||
return;
|
||||
}
|
||||
|
||||
$id -= $indexCount;
|
||||
}
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Callback used to choose target index for new documents
|
||||
*
|
||||
* Function/method signature:
|
||||
* Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
|
||||
*
|
||||
* null means "default documents distributing algorithm"
|
||||
*
|
||||
* @var callback
|
||||
*/
|
||||
protected $_documentDistributorCallBack = null;
|
||||
|
||||
/**
|
||||
* Set callback for choosing target index.
|
||||
*
|
||||
* @param callback $callback
|
||||
*/
|
||||
public function setDocumentDistributorCallback($callback)
|
||||
{
|
||||
if ($callback !== null && !is_callable($callback))
|
||||
$this->_documentDistributorCallBack = $callback;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get callback for choosing target index.
|
||||
*
|
||||
* @return callback
|
||||
*/
|
||||
public function getDocumentDistributorCallback()
|
||||
{
|
||||
return $this->_documentDistributorCallBack;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
if ($this->_documentDistributorCallBack !== null) {
|
||||
$index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices);
|
||||
} else {
|
||||
$index = $this->_indices[ array_rand($this->_indices) ];
|
||||
}
|
||||
|
||||
$index->addDocument($document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->commit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Optimize index.
|
||||
*
|
||||
* Merges all segments into one
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->_optimise();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function terms()
|
||||
{
|
||||
$termsList = array();
|
||||
|
||||
foreach ($this->_indices as $index) {
|
||||
$termsList[] = $index->terms();
|
||||
}
|
||||
|
||||
return array_unique(call_user_func_array('array_merge', $termsList));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Terms stream priority queue object
|
||||
*
|
||||
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
|
||||
*/
|
||||
private $_termsStream = null;
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
if ($this->_termsStream === null) {
|
||||
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices);
|
||||
} else {
|
||||
$this->_termsStream->resetTermsStream();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
$this->_termsStream->skipTo($prefix);
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
return $this->_termsStream->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_termsStream->currentTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
$this->_termsStream->closeTermsStream();
|
||||
$this->_termsStream = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*/
|
||||
public function undeleteAll()
|
||||
{
|
||||
foreach ($this->_indices as $index) {
|
||||
$index->undeleteAll();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add reference to the index object
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function addReference()
|
||||
{
|
||||
// Do nothing, since it's never referenced by indices
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove reference from the index object
|
||||
*
|
||||
* When reference count becomes zero, index is closed and resources are cleaned up
|
||||
*
|
||||
* @internal
|
||||
*/
|
||||
public function removeReference()
|
||||
{
|
||||
// Do nothing, since it's never referenced by indices
|
||||
}
|
||||
}
|
@ -29,10 +29,6 @@ require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -203,6 +199,7 @@ class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_
|
||||
public function finishExpression()
|
||||
{
|
||||
if ($this->getState() != self::ST_LITERAL) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Literal expected.');
|
||||
}
|
||||
|
||||
|
93
libs/Zend/Search/Lucene/Search/Highlighter/Default.php
Normal file
93
libs/Zend/Search/Lucene/Search/Highlighter/Default.php
Normal file
@ -0,0 +1,93 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Search_Highlighter_Interface */
|
||||
require_once 'Zend/Search/Lucene/Search/Highlighter/Interface.php';
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Highlighter_Default implements Zend_Search_Lucene_Search_Highlighter_Interface
|
||||
{
|
||||
/**
|
||||
* List of colors for text highlighting
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
|
||||
'#ff8888', '#88ff88', '#8888ff',
|
||||
'#88dddd', '#dd88dd', '#dddd88',
|
||||
'#aaddff', '#aaffdd', '#ddaaff',
|
||||
'#ddffaa', '#ffaadd', '#ffddaa');
|
||||
|
||||
/**
|
||||
* Index of current color for highlighting
|
||||
*
|
||||
* Index is increased at each highlight() call, so terms matching different queries are highlighted using different colors.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
protected $_currentColorIndex = 0;
|
||||
|
||||
/**
|
||||
* HTML document for highlighting
|
||||
*
|
||||
* @var Zend_Search_Lucene_Document_Html
|
||||
*/
|
||||
protected $_doc;
|
||||
|
||||
/**
|
||||
* Set document for highlighting.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function setDocument(Zend_Search_Lucene_Document_Html $document)
|
||||
{
|
||||
$this->_doc = $document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document for highlighting.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function getDocument()
|
||||
{
|
||||
return $this->_doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight specified words
|
||||
*
|
||||
* @param string|array $words Words to highlight. They could be organized using the array or string.
|
||||
*/
|
||||
public function highlight($words)
|
||||
{
|
||||
$color = $this->_highlightColors[$this->_currentColorIndex];
|
||||
$this->_currentColorIndex = ($this->_currentColorIndex + 1) % count($this->_highlightColors);
|
||||
|
||||
$this->_doc->highlight($words, $color);
|
||||
}
|
||||
|
||||
}
|
52
libs/Zend/Search/Lucene/Search/Highlighter/Interface.php
Normal file
52
libs/Zend/Search/Lucene/Search/Highlighter/Interface.php
Normal file
@ -0,0 +1,52 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
interface Zend_Search_Lucene_Search_Highlighter_Interface
|
||||
{
|
||||
/**
|
||||
* Set document for highlighting.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function setDocument(Zend_Search_Lucene_Document_Html $document);
|
||||
|
||||
/**
|
||||
* Get document for highlighting.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document_Html $document
|
||||
*/
|
||||
public function getDocument();
|
||||
|
||||
/**
|
||||
* Highlight specified words (method is invoked once per subquery)
|
||||
*
|
||||
* @param string|array $words Words to highlight. They could be organized using the array or string.
|
||||
*/
|
||||
public function highlight($words);
|
||||
}
|
@ -25,6 +25,9 @@ require_once 'Zend/Search/Lucene/Document/Html.php';
|
||||
/** Zend_Search_Lucene_Index_DocsFilter */
|
||||
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Highlighter_Default */
|
||||
require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
@ -35,7 +38,6 @@ require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
|
||||
/**
|
||||
* query boost factor
|
||||
*
|
||||
@ -57,17 +59,6 @@ abstract class Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_currentColorIndex = 0;
|
||||
|
||||
/**
|
||||
* List of colors for text highlighting
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
|
||||
'#ff8888', '#88ff88', '#8888ff',
|
||||
'#88dddd', '#dd88dd', '#dddd88',
|
||||
'#aaddff', '#aaffdd', '#ddaaff', '#ddffaa', '#ffaadd', '#ffddaa');
|
||||
|
||||
|
||||
/**
|
||||
* Gets the boost for this clause. Documents matching
|
||||
* this clause will (in addition to the normal weightings) have their score
|
||||
@ -186,42 +177,57 @@ abstract class Zend_Search_Lucene_Search_Query
|
||||
abstract public function getQueryTerms();
|
||||
|
||||
/**
|
||||
* Get highlight color and shift to next
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @return string
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _getHighlightColor(&$colorIndex)
|
||||
{
|
||||
$color = $this->_highlightColors[$colorIndex++];
|
||||
|
||||
$colorIndex %= count($this->_highlightColors);
|
||||
|
||||
return $color;
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
*/
|
||||
abstract public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex);
|
||||
abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
|
||||
|
||||
/**
|
||||
* Highlight matches in $inputHTML
|
||||
*
|
||||
* @param string $inputHTML
|
||||
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
|
||||
* @return string
|
||||
*/
|
||||
public function highlightMatches($inputHTML)
|
||||
public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
|
||||
{
|
||||
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
|
||||
if ($highlighter === null) {
|
||||
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
|
||||
}
|
||||
|
||||
$colorIndex = 0;
|
||||
$this->highlightMatchesDOM($doc, $colorIndex);
|
||||
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
|
||||
$highlighter->setDocument($doc);
|
||||
|
||||
$this->_highlightMatches($highlighter);
|
||||
|
||||
return $doc->getHTML();
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
|
||||
*
|
||||
* @param string $inputHtmlFragment
|
||||
* @param string $encoding Input HTML string encoding
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
|
||||
* @return string
|
||||
*/
|
||||
public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
|
||||
{
|
||||
if ($highlighter === null) {
|
||||
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
|
||||
}
|
||||
|
||||
$inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
|
||||
. iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
|
||||
|
||||
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
|
||||
$highlighter->setDocument($doc);
|
||||
|
||||
$this->_highlightMatches($highlighter);
|
||||
|
||||
return $doc->getHtmlBody();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -757,16 +757,15 @@ class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
foreach ($this->_subqueries as $id => $subquery) {
|
||||
if ($this->_signs === null || $this->_signs[$id] !== false) {
|
||||
$subquery->highlightMatchesDOM($doc, $colorIndex);
|
||||
$subquery->_highlightMatches($highlighter);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -794,10 +793,10 @@ class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_
|
||||
}
|
||||
|
||||
$query .= '(' . $subquery->__toString() . ')';
|
||||
}
|
||||
|
||||
if ($subquery->getBoost() != 1) {
|
||||
$query .= '^' . round($subquery->getBoost(), 4);
|
||||
}
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
|
@ -117,12 +117,11 @@ class Zend_Search_Lucene_Search_Query_Empty extends Zend_Search_Lucene_Search_Qu
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
@ -106,27 +106,59 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
private $_termKeys = null;
|
||||
|
||||
/**
|
||||
* Default non-fuzzy prefix length
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_defaultPrefixLength = 3;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $pattern
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param float $minimumSimilarity
|
||||
* @param integer $prefixLength
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = 0)
|
||||
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
|
||||
{
|
||||
if ($minimumSimilarity < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
|
||||
}
|
||||
if ($minimumSimilarity >= 1) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
|
||||
}
|
||||
if ($prefixLength < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
|
||||
}
|
||||
|
||||
$this->_term = $term;
|
||||
$this->_minimumSimilarity = $minimumSimilarity;
|
||||
$this->_prefixLength = $prefixLength;
|
||||
$this->_prefixLength = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get default non-fuzzy prefix length
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getDefaultPrefixLength()
|
||||
{
|
||||
return self::$_defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set default non-fuzzy prefix length
|
||||
*
|
||||
* @param integer $defaultPrefixLength
|
||||
*/
|
||||
public static function setDefaultPrefixLength($defaultPrefixLength)
|
||||
{
|
||||
self::$_defaultPrefixLength = $defaultPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -148,6 +180,7 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
@ -174,6 +207,7 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
|
||||
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
|
||||
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
@ -212,6 +246,11 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
$this->_termKeys[] = $index->currentTerm()->key();
|
||||
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
@ -243,6 +282,11 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
$this->_termKeys[] = $index->currentTerm()->key();
|
||||
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
@ -288,7 +332,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
@ -300,7 +345,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_matches === null) {
|
||||
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
|
||||
}
|
||||
|
||||
return $this->_matches;
|
||||
@ -315,7 +361,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
|
||||
@ -329,7 +376,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
@ -342,7 +390,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
@ -355,24 +404,70 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
foreach ($this->_matches as $term) {
|
||||
$words[] = $term->text;
|
||||
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
|
||||
$prefixByteLength = strlen($prefix);
|
||||
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
|
||||
|
||||
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
|
||||
|
||||
$termRest = substr($this->_term->text, $prefixByteLength);
|
||||
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
|
||||
$termRestLength = strlen($termRest);
|
||||
|
||||
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
|
||||
|
||||
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
|
||||
if (substr($termText, 0, $prefixByteLength) == $prefix) {
|
||||
// Calculate similarity
|
||||
$target = substr($termText, $prefixByteLength);
|
||||
|
||||
$maxDistance = isset($this->_maxDistances[strlen($target)])?
|
||||
$this->_maxDistances[strlen($target)] :
|
||||
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
|
||||
|
||||
if ($termRestLength == 0) {
|
||||
// we don't have anything to compare. That means if we just add
|
||||
// the letters for current term we get the new word
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
|
||||
} else if (strlen($target) == 0) {
|
||||
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
|
||||
} else if ($maxDistance < abs($termRestLength - strlen($target))){
|
||||
//just adding the characters of term to target or vice-versa results in too many edits
|
||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
||||
//which is 8-3 or more precisesly abs(3-8).
|
||||
//if our maximum edit distance is 4, then we can discard this word
|
||||
//without looking at it.
|
||||
$similarity = 0;
|
||||
} else {
|
||||
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
|
||||
}
|
||||
|
||||
if ($similarity > $this->_minimumSimilarity) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -385,7 +480,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
return (($this->_term->field === null)? '' : $this->_term->field . ':')
|
||||
. $this->_term->text . '~'
|
||||
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '');
|
||||
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
|
||||
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,12 +118,11 @@ class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_S
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
@ -103,10 +103,15 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
|
||||
*
|
||||
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
|
||||
* @param array $signs Array of signs. Sign is boolean|null.
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($terms = null, $signs = null)
|
||||
{
|
||||
if (is_array($terms)) {
|
||||
if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$this->_terms = $terms;
|
||||
|
||||
$this->_signs = null;
|
||||
@ -594,12 +599,11 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
@ -615,7 +619,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
|
||||
}
|
||||
}
|
||||
|
||||
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -647,7 +651,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = '(' . $query . ')^' . $this->getBoost();
|
||||
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
|
@ -26,7 +26,7 @@
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_MultiTerm
|
||||
* Zend_Search_Lucene_Search_Weight_Phrase
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
|
||||
|
||||
@ -517,19 +517,18 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
foreach ($this->_terms as $term) {
|
||||
$words[] = $term->text;
|
||||
}
|
||||
|
||||
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -540,11 +539,10 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
|
||||
$query = '';
|
||||
|
||||
if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
|
||||
$query .= $this->_terms[0]->field . ':';
|
||||
$query = $this->_terms[0]->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= '"';
|
||||
@ -562,6 +560,10 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
|
||||
$query .= '~' . $this->_slop;
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
133
libs/Zend/Search/Lucene/Search/Query/Preprocessing.php
Normal file
133
libs/Zend/Search/Lucene/Search/Query/Preprocessing.php
Normal file
@ -0,0 +1,133 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Query_Preprocessing extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Matched terms.
|
||||
*
|
||||
* Matched terms list.
|
||||
* It's filled during rewrite operation and may be used for search result highlighting
|
||||
*
|
||||
* Array of Zend_Search_Lucene_Index_Term objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_matches = null;
|
||||
|
||||
/**
|
||||
* Optimize query in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute query in context of index reader
|
||||
* It also initializes necessary internal structures
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get document ids likely matching the query
|
||||
*
|
||||
* It's an array with document ids as keys (performance considerations)
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene_Interface $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Return query terms
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getQueryTerms()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Rewrite operation has to be done before retrieving query terms.');
|
||||
}
|
||||
}
|
||||
|
286
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php
Normal file
286
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php
Normal file
@ -0,0 +1,286 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Insignificant */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Empty */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* word (query parser lexeme) to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_word;
|
||||
|
||||
/**
|
||||
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* A value between 0 and 1 to set the required similarity
|
||||
* between the query term and the matching terms. For example, for a
|
||||
* _minimumSimilarity of 0.5 a term of the same length
|
||||
* as the query term is considered similar to the query term if the edit distance
|
||||
* between both terms is less than length(term)*0.5
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_minimumSimilarity;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $word Non-tokenized word (query parser lexeme) to search.
|
||||
* @param string $encoding Word encoding.
|
||||
* @param string $fieldName Field name.
|
||||
* @param float $minimumSimilarity minimum similarity
|
||||
*/
|
||||
public function __construct($word, $encoding, $fieldName, $minimumSimilarity)
|
||||
{
|
||||
$this->_word = $word;
|
||||
$this->_encoding = $encoding;
|
||||
$this->_field = $fieldName;
|
||||
$this->_minimumSimilarity = $minimumSimilarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if ($this->_field === null) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
|
||||
$hasInsignificantSubqueries = false;
|
||||
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_word,
|
||||
$this->_encoding,
|
||||
$fieldName,
|
||||
$this->_minimumSimilarity);
|
||||
|
||||
$rewrittenSubquery = $subquery->rewrite($index);
|
||||
|
||||
if ( !($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant ||
|
||||
$rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Empty) ) {
|
||||
$query->addSubquery($rewrittenSubquery);
|
||||
}
|
||||
|
||||
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||||
$hasInsignificantSubqueries = true;
|
||||
}
|
||||
}
|
||||
|
||||
$subqueries = $query->getSubqueries();
|
||||
|
||||
if (count($subqueries) == 0) {
|
||||
$this->_matches = array();
|
||||
if ($hasInsignificantSubqueries) {
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
} else {
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
}
|
||||
|
||||
if (count($subqueries) == 1) {
|
||||
$query = reset($subqueries);
|
||||
}
|
||||
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
|
||||
} else {
|
||||
$subPatterns = preg_split('/[*?]/', $this->_word);
|
||||
}
|
||||
if (count($subPatterns) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search doesn\'t support wildcards (except within Keyword fields).');
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
// Word is tokenized into several tokens
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
|
||||
} else {
|
||||
$subPatterns = preg_split('/[*?]/', $this->_word);
|
||||
}
|
||||
if (count($subPatterns) > 1) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
if (count($tokens) == 1) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
|
||||
|
||||
$query->_highlightMatches($highlighter);
|
||||
return;
|
||||
}
|
||||
|
||||
// Word is tokenized into several tokens
|
||||
// But fuzzy search is supported only for non-multiple word terms
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_word;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
273
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php
Normal file
273
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php
Normal file
@ -0,0 +1,273 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Insignificant */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Empty */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Phrase extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* Phrase to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_phrase;
|
||||
|
||||
/**
|
||||
* Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_phraseEncoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* Sets the number of other words permitted between words in query phrase.
|
||||
* If zero, then this is an exact phrase search. For larger values this works
|
||||
* like a WITHIN or NEAR operator.
|
||||
*
|
||||
* The slop is in fact an edit-distance, where the units correspond to
|
||||
* moves of terms in the query phrase out of position. For example, to switch
|
||||
* the order of two words requires two moves (the first move places the words
|
||||
* atop one another), so to permit re-orderings of phrases, the slop must be
|
||||
* at least two.
|
||||
* More exact matches are scored higher than sloppier matches, thus search
|
||||
* results are sorted by exactness.
|
||||
*
|
||||
* The slop is zero by default, requiring exact matches.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_slop;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $phrase Phrase to search.
|
||||
* @param string $phraseEncoding Phrase encoding.
|
||||
* @param string $fieldName Field name.
|
||||
*/
|
||||
public function __construct($phrase, $phraseEncoding, $fieldName)
|
||||
{
|
||||
$this->_phrase = $phrase;
|
||||
$this->_phraseEncoding = $phraseEncoding;
|
||||
$this->_field = $fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set slop
|
||||
*
|
||||
* @param integer $slop
|
||||
*/
|
||||
public function setSlop($slop)
|
||||
{
|
||||
$this->_slop = $slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get slop
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getSlop()
|
||||
{
|
||||
return $this->_slop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
// Allow to use wildcards within phrases
|
||||
// They are either removed by text analyzer or used as a part of keyword for keyword fields
|
||||
//
|
||||
// if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
|
||||
// require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
// throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
|
||||
// }
|
||||
|
||||
// Split query into subqueries if field name is not specified
|
||||
if ($this->_field === null) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Boolean();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
|
||||
$this->_phraseEncoding,
|
||||
$fieldName);
|
||||
$subquery->setSlop($this->getSlop());
|
||||
|
||||
$query->addSubquery($subquery->rewrite($index));
|
||||
}
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
// tokenize phrase using current analyzer and process it as a phrase query
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
//It's non-trivial phrase query
|
||||
$position = -1;
|
||||
$query = new Zend_Search_Lucene_Search_Query_Phrase();
|
||||
foreach ($tokens as $token) {
|
||||
$position += $token->getPositionIncrement();
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, $position);
|
||||
$query->setSlop($this->getSlop());
|
||||
}
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
/** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
|
||||
|
||||
// tokenize phrase using current analyzer and process it as a phrase query
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$highlighter->highlight($tokens[0]->getTermText());
|
||||
return;
|
||||
}
|
||||
|
||||
//It's non-trivial phrase query
|
||||
$words = array();
|
||||
foreach ($tokens as $token) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= '"' . $this->_phrase . '"';
|
||||
|
||||
if ($this->_slop != 0) {
|
||||
$query .= '~' . $this->_slop;
|
||||
}
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
334
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Term.php
Normal file
334
libs/Zend/Search/Lucene/Search/Query/Preprocessing/Term.php
Normal file
@ -0,0 +1,334 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Processing */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Insignificant */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Empty */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
|
||||
/**
|
||||
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
|
||||
* This type of query is not actually involved into query execution.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @internal
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Preprocessing_Term extends Zend_Search_Lucene_Search_Query_Preprocessing
|
||||
{
|
||||
/**
|
||||
* word (query parser lexeme) to find.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_word;
|
||||
|
||||
/**
|
||||
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_encoding;
|
||||
|
||||
|
||||
/**
|
||||
* Field name.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_field;
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new preprocessing object for prase query.
|
||||
*
|
||||
* @param string $word Non-tokenized word (query parser lexeme) to search.
|
||||
* @param string $encoding Word encoding.
|
||||
* @param string $fieldName Field name.
|
||||
*/
|
||||
public function __construct($word, $encoding, $fieldName)
|
||||
{
|
||||
$this->_word = $word;
|
||||
$this->_encoding = $encoding;
|
||||
$this->_field = $fieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-write query into primitive queries in the context of specified index
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
if ($this->_field === null) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$hasInsignificantSubqueries = false;
|
||||
|
||||
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
|
||||
$searchFields = $index->getFieldNames(true);
|
||||
} else {
|
||||
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
|
||||
}
|
||||
|
||||
foreach ($searchFields as $fieldName) {
|
||||
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_word,
|
||||
$this->_encoding,
|
||||
$fieldName);
|
||||
$rewrittenSubquery = $subquery->rewrite($index);
|
||||
foreach ($rewrittenSubquery->getQueryTerms() as $term) {
|
||||
$query->addTerm($term);
|
||||
}
|
||||
|
||||
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
|
||||
$hasInsignificantSubqueries = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($query->getTerms()) == 0) {
|
||||
$this->_matches = array();
|
||||
if ($hasInsignificantSubqueries) {
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
} else {
|
||||
return new Zend_Search_Lucene_Search_Query_Empty();
|
||||
}
|
||||
}
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
|
||||
// encoding is not used since we expect binary matching
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
|
||||
if ($index->hasTerm($term)) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
|
||||
$wildcardsPattern = '/[*?]/u';
|
||||
$subPatternsEncoding = 'UTF-8';
|
||||
} else {
|
||||
$word = $this->_word;
|
||||
$wildcardsPattern = '/[*?]/';
|
||||
$subPatternsEncoding = $this->_encoding;
|
||||
}
|
||||
|
||||
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
|
||||
|
||||
if (count($subPatterns) > 1) {
|
||||
// Wildcard query is recognized
|
||||
|
||||
$pattern = '';
|
||||
|
||||
foreach ($subPatterns as $id => $subPattern) {
|
||||
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
|
||||
if ($id != 0) {
|
||||
$pattern .= $word[ $subPattern[1] - 1 ];
|
||||
}
|
||||
|
||||
// Check if each subputtern is a single word in terms of current analyzer
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
|
||||
}
|
||||
foreach ($tokens as $token) {
|
||||
$pattern .= $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
// Get rewritten query. Important! It also fills terms matching container.
|
||||
$rewrittenQuery = $query->rewrite($index);
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
|
||||
return $rewrittenQuery;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
$this->_matches = array();
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
//It's not insignificant or one term query
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
|
||||
/**
|
||||
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
|
||||
* analizer design features
|
||||
*/
|
||||
foreach ($tokens as $token) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, true); // all subterms are required
|
||||
}
|
||||
|
||||
$query->setBoost($this->getBoost());
|
||||
|
||||
$this->_matches = $query->getQueryTerms();
|
||||
return $query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
|
||||
|
||||
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize wildcard queries
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
|
||||
$wildcardsPattern = '/[*?]/u';
|
||||
$subPatternsEncoding = 'UTF-8';
|
||||
} else {
|
||||
$word = $this->_word;
|
||||
$wildcardsPattern = '/[*?]/';
|
||||
$subPatternsEncoding = $this->_encoding;
|
||||
}
|
||||
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
|
||||
if (count($subPatterns) > 1) {
|
||||
// Wildcard query is recognized
|
||||
|
||||
$pattern = '';
|
||||
|
||||
foreach ($subPatterns as $id => $subPattern) {
|
||||
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
|
||||
if ($id != 0) {
|
||||
$pattern .= $word[ $subPattern[1] - 1 ];
|
||||
}
|
||||
|
||||
// Check if each subputtern is a single word in terms of current analyzer
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
|
||||
if (count($tokens) > 1) {
|
||||
// Do nothing (nothing is highlighted)
|
||||
return;
|
||||
}
|
||||
foreach ($tokens as $token) {
|
||||
$pattern .= $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
|
||||
|
||||
$query->_highlightMatches($highlighter);
|
||||
return;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
// Recognize one-term multi-term and "insignificant" queries
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
// Do nothing
|
||||
return;
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$highlighter->highlight($tokens[0]->getTermText());
|
||||
return;
|
||||
}
|
||||
|
||||
//It's not insignificant or one term query
|
||||
$words = array();
|
||||
foreach ($tokens as $token) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a query
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
if ($this->_field !== null) {
|
||||
$query = $this->_field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_word;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query .= '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
@ -76,7 +76,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_matches;
|
||||
private $_matches = null;
|
||||
|
||||
|
||||
/**
|
||||
@ -90,9 +90,11 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
public function __construct($lowerTerm, $upperTerm, $inclusive)
|
||||
{
|
||||
if ($lowerTerm === null && $upperTerm === null) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('At least one term must be non-null');
|
||||
}
|
||||
if ($lowerTerm !== null && $upperTerm !== null && $lowerTerm->field != $upperTerm->field) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Both terms must be for the same field');
|
||||
}
|
||||
|
||||
@ -159,6 +161,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
$fields = array($this->_field);
|
||||
}
|
||||
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
@ -185,6 +188,12 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
$index->currentTerm()->field == $field &&
|
||||
$index->currentTerm()->text < $upperTerm->text) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
|
||||
@ -196,6 +205,12 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
// Walk up to the end of field data
|
||||
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
}
|
||||
}
|
||||
@ -226,6 +241,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function optimize(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
@ -238,7 +254,8 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
public function getQueryTerms()
|
||||
{
|
||||
if ($this->_matches === null) {
|
||||
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
|
||||
}
|
||||
|
||||
return $this->_matches;
|
||||
@ -253,6 +270,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function createWeight(Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
@ -267,6 +285,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
@ -280,6 +299,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function matchedDocs()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
@ -293,24 +313,44 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
*/
|
||||
public function score($docId, Zend_Search_Lucene_Interface $reader)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
foreach ($this->_matches as $term) {
|
||||
$words[] = $term->text;
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
|
||||
$lowerTermText = ($this->_lowerTerm !== null)? $this->_lowerTerm->text : null;
|
||||
$upperTermText = ($this->_upperTerm !== null)? $this->_upperTerm->text : null;
|
||||
|
||||
if ($this->_inclusive) {
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
if (($lowerTermText == null || $lowerTermText <= $termText) &&
|
||||
($upperTermText == null || $termText <= $upperTermText)) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
foreach ($tokens as $token) {
|
||||
$termText = $token->getTermText();
|
||||
if (($lowerTermText == null || $lowerTermText < $termText) &&
|
||||
($upperTermText == null || $termText < $upperTermText)) {
|
||||
$words[] = $termText;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -326,7 +366,8 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
|
||||
. (($this->_lowerTerm !== null)? $this->_lowerTerm->text : 'null')
|
||||
. ' TO '
|
||||
. (($this->_upperTerm !== null)? $this->_upperTerm->text : 'null')
|
||||
. (($this->_inclusive)? ']' : '}');
|
||||
. (($this->_inclusive)? ']' : '}')
|
||||
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -191,24 +191,13 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns query term
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @return array
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function getTerms()
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
{
|
||||
$doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($this->_term->text);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -219,7 +208,19 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
return (($this->_term->field === null)? '':$this->_term->field . ':') . $this->_term->text;
|
||||
if ($this->_term->field !== null) {
|
||||
$query = $this->_term->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_term->text;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = $query . '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,6 +59,13 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
*/
|
||||
private $_matches = null;
|
||||
|
||||
/**
|
||||
* Minimum term prefix length (number of minimum non-wildcard characters)
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private static $_minPrefixLength = 3;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
|
||||
*
|
||||
@ -69,6 +76,26 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
$this->_pattern = $pattern;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get minimum prefix length
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public static function getMinPrefixLength()
|
||||
{
|
||||
return self::$_minPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set minimum prefix length
|
||||
*
|
||||
* @param integer $minPrefixLength
|
||||
*/
|
||||
public static function setMinPrefixLength($minPrefixLength)
|
||||
{
|
||||
self::$_minPrefixLength = $minPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get terms prefix
|
||||
*
|
||||
@ -98,6 +125,7 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
*
|
||||
* @param Zend_Search_Lucene_Interface $index
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function rewrite(Zend_Search_Lucene_Interface $index)
|
||||
{
|
||||
@ -114,6 +142,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
$prefixLength = strlen($prefix);
|
||||
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
|
||||
|
||||
if ($prefixLength < self::$_minPrefixLength) {
|
||||
throw new Zend_Search_Lucene_Exception('At least ' . self::$_minPrefixLength . ' non-wildcard terms are required.');
|
||||
}
|
||||
|
||||
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
|
||||
if (@preg_match('/\pL/u', 'a') == 1) {
|
||||
// PCRE unicode support is turned on
|
||||
@ -121,7 +153,7 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
$matchExpression .= 'u';
|
||||
}
|
||||
|
||||
|
||||
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
|
||||
foreach ($fields as $field) {
|
||||
$index->resetTermsStream();
|
||||
|
||||
@ -133,6 +165,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) {
|
||||
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
@ -143,6 +179,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
|
||||
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
|
||||
$this->_matches[] = $index->currentTerm();
|
||||
|
||||
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
|
||||
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
|
||||
}
|
||||
}
|
||||
|
||||
$index->nextTerm();
|
||||
@ -258,12 +298,11 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
}
|
||||
|
||||
/**
|
||||
* Highlight query terms
|
||||
* Query specific matches highlighting
|
||||
*
|
||||
* @param integer &$colorIndex
|
||||
* @param Zend_Search_Lucene_Document_Html $doc
|
||||
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
|
||||
*/
|
||||
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
|
||||
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
|
||||
{
|
||||
$words = array();
|
||||
|
||||
@ -274,14 +313,15 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
$matchExpression .= 'u';
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($doc->getFieldUtf8Value('body'), 'UTF-8');
|
||||
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
|
||||
foreach ($tokens as $token) {
|
||||
if (preg_match($matchExpression, $token->getTermText()) === 1) {
|
||||
$words[] = $token->getTermText();
|
||||
}
|
||||
}
|
||||
|
||||
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
|
||||
$highlighter->highlight($words);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -292,7 +332,19 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
|
||||
public function __toString()
|
||||
{
|
||||
// It's used only for query visualisation, so we don't care about characters escaping
|
||||
return (($this->_pattern->field === null)? '' : $this->_pattern->field . ':') . $this->_pattern->text;
|
||||
if ($this->_pattern->field !== null) {
|
||||
$query = $this->_pattern->field . ':';
|
||||
} else {
|
||||
$query = '';
|
||||
}
|
||||
|
||||
$query .= $this->_pattern->text;
|
||||
|
||||
if ($this->getBoost() != 1) {
|
||||
$query = $query . '^' . round($this->getBoost(), 4);
|
||||
}
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,13 +19,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Term.php';
|
||||
|
||||
@ -35,11 +31,6 @@ require_once 'Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
|
||||
/** Zend_Search_Lucene_Search_QueryEntry_Subquery */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
|
@ -19,24 +19,15 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -111,32 +102,11 @@ class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Sea
|
||||
*/
|
||||
public function getQuery($encoding)
|
||||
{
|
||||
if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
//It's not empty or one term query
|
||||
$position = -1;
|
||||
$query = new Zend_Search_Lucene_Search_Query_Phrase();
|
||||
foreach ($tokens as $token) {
|
||||
$position += $token->getPositionIncrement();
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, $position);
|
||||
}
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null);
|
||||
|
||||
if ($this->_proximityQuery) {
|
||||
$query->setSlop($this->_wordsDistance);
|
||||
|
@ -19,20 +19,12 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -67,6 +59,7 @@ class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_S
|
||||
*/
|
||||
public function processFuzzyProximityModifier($parameter = null)
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
|
||||
}
|
||||
|
||||
|
@ -19,24 +19,15 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -113,91 +104,26 @@ class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Searc
|
||||
*/
|
||||
public function getQuery($encoding)
|
||||
{
|
||||
if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
|
||||
if ($this->_fuzzyQuery) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported for terms with wildcards.');
|
||||
}
|
||||
|
||||
$pattern = '';
|
||||
|
||||
$subPatterns = explode('*', $this->_term);
|
||||
|
||||
$astericFirstPass = true;
|
||||
foreach ($subPatterns as $subPattern) {
|
||||
if (!$astericFirstPass) {
|
||||
$pattern .= '*';
|
||||
} else {
|
||||
$astericFirstPass = false;
|
||||
}
|
||||
|
||||
$subPatternsL2 = explode('?', $subPattern);
|
||||
|
||||
$qMarkFirstPass = true;
|
||||
foreach ($subPatternsL2 as $subPatternL2) {
|
||||
if (!$qMarkFirstPass) {
|
||||
$pattern .= '?';
|
||||
} else {
|
||||
$qMarkFirstPass = false;
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPatternL2, $encoding);
|
||||
if (count($tokens) > 1) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
|
||||
}
|
||||
|
||||
foreach ($tokens as $token) {
|
||||
$pattern .= $token->getTermText();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
|
||||
if ($this->_fuzzyQuery) {
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_term,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null,
|
||||
$this->_similarity
|
||||
);
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
|
||||
|
||||
if (count($tokens) == 0) {
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
if (count($tokens) == 1 && !$this->_fuzzyQuery) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Term($term);
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
if (count($tokens) == 1 && $this->_fuzzyQuery) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
|
||||
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_similarity);
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
return $query;
|
||||
}
|
||||
|
||||
if ($this->_fuzzyQuery) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
|
||||
}
|
||||
|
||||
//It's not empty or one term query
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
|
||||
/**
|
||||
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
|
||||
* analizer design features
|
||||
*/
|
||||
foreach ($tokens as $token) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
|
||||
$query->addTerm($term, true); // all subterms are required
|
||||
}
|
||||
|
||||
$query->setBoost($this->_boost);
|
||||
|
||||
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_term,
|
||||
$encoding,
|
||||
($this->_field !== null)?
|
||||
iconv($encoding, 'UTF-8', $this->_field) :
|
||||
null
|
||||
);
|
||||
$query->setBoost($this->_boost);
|
||||
return $query;
|
||||
}
|
||||
}
|
||||
|
@ -19,20 +19,12 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -364,6 +356,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
|
||||
$this->process(self::IN_WHITE_SPACE);
|
||||
|
||||
if ($this->getState() != self::ST_WHITE_SPACE) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
|
||||
}
|
||||
|
||||
@ -397,6 +390,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
|
||||
// check,
|
||||
if ($this->_queryStringPosition == count($this->_queryString) ||
|
||||
$this->_queryString[$this->_queryStringPosition] != $lexeme) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
|
||||
}
|
||||
|
||||
@ -413,6 +407,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
|
||||
if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
|
||||
$token = array_pop($this->_lexemes);
|
||||
if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
|
||||
}
|
||||
|
||||
@ -497,14 +492,17 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
|
||||
*********************************************************************/
|
||||
public function lexModifierErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
|
||||
}
|
||||
public function quoteWithinLexemeErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
|
||||
}
|
||||
public function wrongNumberErrException()
|
||||
{
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
|
||||
}
|
||||
}
|
||||
|
@ -19,7 +19,6 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
@ -32,8 +31,14 @@ require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
/** Zend_Search_Lucene_Search_Query_Boolean */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Wildcard */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
|
||||
@ -50,24 +55,15 @@ require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
|
||||
/** Zend_Search_Lucene_Search_Query_Insignificant */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryLexer */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryLexer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserContext */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserContext.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -153,21 +149,21 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
|
||||
/**
|
||||
* Defines query parsing mode.
|
||||
*
|
||||
*
|
||||
* If this option is turned on, then query parser suppress query parser exceptions
|
||||
* and constructs multi-term query using all words from a query.
|
||||
*
|
||||
*
|
||||
* That helps to avoid exceptions caused by queries, which don't conform to query language,
|
||||
* but limits possibilities to check, that query entered by user has some inconsistencies.
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* Default is true.
|
||||
*
|
||||
*
|
||||
* Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
|
||||
* {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
|
||||
* {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
|
||||
* with this setting.
|
||||
*
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_suppressQueryParsingExceptions = true;
|
||||
@ -290,7 +286,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
|
||||
/**
|
||||
* Get query parser instance
|
||||
*
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_QueryParser
|
||||
*/
|
||||
private static function _getInstance()
|
||||
@ -363,9 +359,9 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
return self::_getInstance()->_suppressQueryParsingExceptions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Parses a query string
|
||||
*
|
||||
@ -377,42 +373,43 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
public static function parse($strQuery, $encoding = null)
|
||||
{
|
||||
self::_getInstance();
|
||||
|
||||
// Reset FSM if previous parse operation didn't return it into a correct state
|
||||
|
||||
// Reset FSM if previous parse operation didn't return it into a correct state
|
||||
self::$_instance->reset();
|
||||
|
||||
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
try {
|
||||
self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
|
||||
self::$_instance->_lastToken = null;
|
||||
self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
|
||||
self::$_instance->_contextStack = array();
|
||||
self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
|
||||
|
||||
|
||||
// Empty query
|
||||
if (count(self::$_instance->_tokens) == 0) {
|
||||
return new Zend_Search_Lucene_Search_Query_Insignificant();
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
foreach (self::$_instance->_tokens as $token) {
|
||||
try {
|
||||
self::$_instance->_currentToken = $token;
|
||||
self::$_instance->process($token->type);
|
||||
|
||||
|
||||
self::$_instance->_lastToken = $token;
|
||||
} catch (Exception $e) {
|
||||
if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
|
||||
}
|
||||
|
||||
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (count(self::$_instance->_contextStack) != 0) {
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
|
||||
}
|
||||
|
||||
|
||||
return self::$_instance->_context->getQuery();
|
||||
} catch (Zend_Search_Lucene_Search_QueryParserException $e) {
|
||||
if (self::$_instance->_suppressQueryParsingExceptions) {
|
||||
@ -421,12 +418,12 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
|
||||
$termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
|
||||
null /* optional term */;
|
||||
|
||||
|
||||
foreach ($queryTokens as $token) {
|
||||
$query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
return $query;
|
||||
} else {
|
||||
throw $e;
|
||||
@ -434,7 +431,6 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*********************************************************************
|
||||
* Actions implementation
|
||||
*
|
||||
@ -492,6 +488,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
public function processModifierParameter()
|
||||
{
|
||||
if ($this->_lastToken === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
|
||||
}
|
||||
|
||||
@ -506,6 +503,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
|
||||
default:
|
||||
// It's not a user input exception
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
|
||||
}
|
||||
}
|
||||
@ -526,6 +524,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
public function subqueryEnd()
|
||||
{
|
||||
if (count($this->_contextStack) == 0) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
|
||||
}
|
||||
|
||||
@ -560,6 +559,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
@ -569,6 +569,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
@ -577,6 +578,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
}
|
||||
|
||||
if ($from === null && $to === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
|
||||
}
|
||||
|
||||
@ -602,6 +604,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
{
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
@ -611,6 +614,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
|
||||
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
|
||||
if (count($tokens) > 1) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
|
||||
} else if (count($tokens) == 1) {
|
||||
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
|
||||
@ -619,6 +623,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
|
||||
}
|
||||
|
||||
if ($from === null && $to === null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,6 @@
|
||||
/** Zend_Search_Lucene_FSM */
|
||||
require_once 'Zend/Search/Lucene/FSM.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
@ -41,19 +40,12 @@ require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParserException */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
|
||||
require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryEntry */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -171,6 +163,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
public function setNextEntrySign($sign)
|
||||
{
|
||||
if ($this->_mode === self::GM_BOOLEAN) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
|
||||
}
|
||||
|
||||
@ -181,6 +174,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
} else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
|
||||
$this->_nextEntrySign = false;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
|
||||
}
|
||||
}
|
||||
@ -213,6 +207,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
{
|
||||
// Check, that modifier has came just after word or phrase
|
||||
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
|
||||
}
|
||||
|
||||
@ -220,6 +215,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
|
||||
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
// there are no entries or last entry is boolean operator
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
|
||||
}
|
||||
|
||||
@ -237,6 +233,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
{
|
||||
// Check, that modifier has came just after word or phrase
|
||||
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
|
||||
}
|
||||
|
||||
@ -244,6 +241,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
|
||||
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
// there are no entries or last entry is boolean operator
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
|
||||
}
|
||||
|
||||
@ -260,6 +258,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
public function addLogicalOperator($operator)
|
||||
{
|
||||
if ($this->_mode === self::GM_SIGNS) {
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
|
||||
}
|
||||
|
||||
@ -316,6 +315,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
|
||||
$expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
|
||||
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
try {
|
||||
foreach ($this->_entries as $entry) {
|
||||
if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
|
||||
@ -345,6 +345,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
|
||||
// throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
|
||||
// $e->getMessage() . '\'.' );
|
||||
// It's query syntax error message and it should be user friendly. So FSM message is omitted
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
|
||||
throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
|
||||
}
|
||||
|
||||
|
@ -19,11 +19,6 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -213,6 +208,7 @@ class Zend_Search_Lucene_Search_QueryToken
|
||||
break;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
|
||||
}
|
||||
break;
|
||||
@ -221,8 +217,8 @@ class Zend_Search_Lucene_Search_QueryToken
|
||||
$this->type = self::TT_NUMBER;
|
||||
|
||||
default:
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
|
||||
|
||||
public static function mkdirs($dir, $mode = 0777, $recursive = true)
|
||||
{
|
||||
if (is_null($dir) || $dir === '') {
|
||||
if (($dir === null) || $dir === '') {
|
||||
return false;
|
||||
}
|
||||
if (is_dir($dir) || $dir === '/') {
|
||||
|
@ -19,12 +19,6 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -184,18 +178,18 @@ abstract class Zend_Search_Lucene_Storage_File
|
||||
* Returns a long integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong()
|
||||
{
|
||||
$str = $this->_fread(8);
|
||||
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
$str = $this->_fread(8);
|
||||
|
||||
return ord($str[0]) << 56 |
|
||||
ord($str[1]) << 48 |
|
||||
ord($str[2]) << 40 |
|
||||
@ -205,18 +199,7 @@ abstract class Zend_Search_Lucene_Storage_File
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
} else {
|
||||
if ((ord($str[0]) != 0) ||
|
||||
(ord($str[1]) != 0) ||
|
||||
(ord($str[2]) != 0) ||
|
||||
(ord($str[3]) != 0) ||
|
||||
((ord($str[0]) & 0x80) != 0)) {
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
return ord($str[4]) << 24 |
|
||||
ord($str[5]) << 16 |
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
return $this->readLong32Bit();
|
||||
}
|
||||
}
|
||||
|
||||
@ -243,19 +226,80 @@ abstract class Zend_Search_Lucene_Storage_File
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 8 );
|
||||
} else {
|
||||
if ($value > 0x7FFFFFFF) {
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
$this->_fwrite( "\x00\x00\x00\x00" .
|
||||
chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 8 );
|
||||
$this->writeLong32Bit($value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file,
|
||||
* advances the file pointer and return it as float (for 32-bit platforms).
|
||||
*
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong32Bit()
|
||||
{
|
||||
$wordHigh = $this->readInt();
|
||||
$wordLow = $this->readInt();
|
||||
|
||||
if ($wordHigh & (int)0x80000000) {
|
||||
// It's a negative value since the highest bit is set
|
||||
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
|
||||
return $wordLow;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ($wordLow < 0) {
|
||||
// Value is large than 0x7FFF FFFF. Represent low word as float.
|
||||
$wordLow &= 0x7FFFFFFF;
|
||||
$wordLow += (float)0x80000000;
|
||||
}
|
||||
|
||||
if ($wordHigh == 0) {
|
||||
// Return value as integer if possible
|
||||
return $wordLow;
|
||||
}
|
||||
|
||||
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file (32-bit platforms implementation)
|
||||
*
|
||||
* @param integer|float $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong32Bit($value)
|
||||
{
|
||||
if ($value < (int)0x80000000) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
if ($value < 0) {
|
||||
$wordHigh = (int)0xFFFFFFFF;
|
||||
$wordLow = (int)$value;
|
||||
} else {
|
||||
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
|
||||
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
|
||||
|
||||
if ($wordLow > 0x7FFFFFFF) {
|
||||
// Highest bit of low word is set. Translate it to the corresponding negative integer value
|
||||
$wordLow -= 0x80000000;
|
||||
$wordLow |= 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
$this->writeInt($wordHigh);
|
||||
$this->writeInt($wordLow);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a variable-length integer from the current
|
||||
@ -402,6 +446,7 @@ abstract class Zend_Search_Lucene_Storage_File
|
||||
}
|
||||
|
||||
if ($chars < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
|
||||
}
|
||||
|
||||
|
@ -19,14 +19,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -56,6 +51,7 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
|
||||
|
||||
if (strpos($mode, 'w') === false && !is_readable($filename)) {
|
||||
// opening for reading non-readable file
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('File \'' . $filename . '\' is not readable.');
|
||||
}
|
||||
|
||||
@ -66,6 +62,7 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
|
||||
|
||||
if ($this->_fileHandle === false) {
|
||||
ini_set('track_errors', $trackErrors);
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception($php_errormsg);
|
||||
}
|
||||
|
||||
|
@ -19,14 +19,9 @@
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
@ -294,14 +289,14 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
|
||||
*/
|
||||
public function readLong()
|
||||
{
|
||||
$str = substr($this->_data, $this->_position, 8);
|
||||
$this->_position += 8;
|
||||
|
||||
/**
|
||||
* Check, that we work in 64-bit mode.
|
||||
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
|
||||
*/
|
||||
if (PHP_INT_SIZE > 4) {
|
||||
$str = substr($this->_data, $this->_position, 8);
|
||||
$this->_position += 8;
|
||||
|
||||
return ord($str[0]) << 56 |
|
||||
ord($str[1]) << 48 |
|
||||
ord($str[2]) << 40 |
|
||||
@ -311,18 +306,7 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
} else {
|
||||
if ((ord($str[0]) != 0) ||
|
||||
(ord($str[1]) != 0) ||
|
||||
(ord($str[2]) != 0) ||
|
||||
(ord($str[3]) != 0) ||
|
||||
((ord($str[0]) & 0x80) != 0)) {
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
return ord($str[4]) << 24 |
|
||||
ord($str[5]) << 16 |
|
||||
ord($str[6]) << 8 |
|
||||
ord($str[7]);
|
||||
return $this->readLong32Bit();
|
||||
}
|
||||
}
|
||||
|
||||
@ -352,21 +336,81 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF);
|
||||
} else {
|
||||
if ($value > 0x7FFFFFFF) {
|
||||
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
|
||||
}
|
||||
|
||||
$this->_data .= chr(0) . chr(0) . chr(0) . chr(0) .
|
||||
chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF);
|
||||
$this->writeLong32Bit($value);
|
||||
}
|
||||
|
||||
$this->_position = strlen($this->_data);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file,
|
||||
* advances the file pointer and return it as float (for 32-bit platforms).
|
||||
*
|
||||
* @return integer|float
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function readLong32Bit()
|
||||
{
|
||||
$wordHigh = $this->readInt();
|
||||
$wordLow = $this->readInt();
|
||||
|
||||
if ($wordHigh & (int)0x80000000) {
|
||||
// It's a negative value since the highest bit is set
|
||||
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
|
||||
return $wordLow;
|
||||
} else {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ($wordLow < 0) {
|
||||
// Value is large than 0x7FFF FFFF. Represent low word as float.
|
||||
$wordLow &= 0x7FFFFFFF;
|
||||
$wordLow += (float)0x80000000;
|
||||
}
|
||||
|
||||
if ($wordHigh == 0) {
|
||||
// Return value as integer if possible
|
||||
return $wordLow;
|
||||
}
|
||||
|
||||
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file (32-bit platforms implementation)
|
||||
*
|
||||
* @param integer|float $value
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeLong32Bit($value)
|
||||
{
|
||||
if ($value < (int)0x80000000) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
|
||||
}
|
||||
|
||||
if ($value < 0) {
|
||||
$wordHigh = (int)0xFFFFFFFF;
|
||||
$wordLow = (int)$value;
|
||||
} else {
|
||||
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
|
||||
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
|
||||
|
||||
if ($wordLow > 0x7FFFFFFF) {
|
||||
// Highest bit of low word is set. Translate it to the corresponding negative integer value
|
||||
$wordLow -= 0x80000000;
|
||||
$wordLow |= 0x80000000;
|
||||
}
|
||||
}
|
||||
|
||||
$this->writeInt($wordHigh);
|
||||
$this->writeInt($wordLow);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a variable-length integer from the current
|
||||
@ -523,6 +567,7 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
|
||||
}
|
||||
|
||||
if ($chars < 0) {
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
|
||||
}
|
||||
|
||||
|
175
libs/Zend/Search/Lucene/TermStreamsPriorityQueue.php
Normal file
175
libs/Zend/Search/Lucene/TermStreamsPriorityQueue.php
Normal file
@ -0,0 +1,175 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsStream_Interface */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
|
||||
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_TermStreamsPriorityQueue implements Zend_Search_Lucene_Index_TermsStream_Interface
|
||||
{
|
||||
/**
|
||||
* Array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_termStreams;
|
||||
|
||||
/**
|
||||
* Terms stream queue
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_TermsPriorityQueue
|
||||
*/
|
||||
protected $_termsStreamQueue = null;
|
||||
|
||||
/**
|
||||
* Last Term in a terms stream
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
protected $_lastTerm = null;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param array $termStreams array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
|
||||
*/
|
||||
public function __construct(array $termStreams)
|
||||
{
|
||||
$this->_termStreams = $termStreams;
|
||||
|
||||
$this->resetTermsStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset terms stream.
|
||||
*/
|
||||
public function resetTermsStream()
|
||||
{
|
||||
$this->_termsStreamQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
|
||||
|
||||
foreach ($this->_termStreams as $termStream) {
|
||||
$termStream->resetTermsStream();
|
||||
|
||||
// Skip "empty" containers
|
||||
if ($termStream->currentTerm() !== null) {
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
$this->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip terms stream up to specified term preffix.
|
||||
*
|
||||
* Prefix contains fully specified field info and portion of searched term
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $prefix
|
||||
*/
|
||||
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
|
||||
{
|
||||
$termStreams = array();
|
||||
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
$termStreams[] = $termStream;
|
||||
}
|
||||
|
||||
foreach ($termStreams as $termStream) {
|
||||
$termStream->skipTo($prefix);
|
||||
|
||||
if ($termStream->currentTerm() !== null) {
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
$this->nextTerm();
|
||||
}
|
||||
|
||||
/**
|
||||
* Scans term streams and returns next term
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function nextTerm()
|
||||
{
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
if ($this->_termsStreamQueue->top() === null ||
|
||||
$this->_termsStreamQueue->top()->currentTerm()->key() !=
|
||||
$termStream->currentTerm()->key()) {
|
||||
// We got new term
|
||||
$this->_lastTerm = $termStream->currentTerm();
|
||||
|
||||
if ($termStream->nextTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
if ($termStream->nextTerm() !== null) {
|
||||
// Put segment back into the priority queue
|
||||
$this->_termsStreamQueue->put($termStream);
|
||||
}
|
||||
}
|
||||
|
||||
// End of stream
|
||||
$this->_lastTerm = null;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term in current position
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Term|null
|
||||
*/
|
||||
public function currentTerm()
|
||||
{
|
||||
return $this->_lastTerm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close terms stream
|
||||
*
|
||||
* Should be used for resources clean up if stream is not read up to the end
|
||||
*/
|
||||
public function closeTermsStream()
|
||||
{
|
||||
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
|
||||
$termStream->closeTermsStream();
|
||||
}
|
||||
|
||||
$this->_termsStreamQueue = null;
|
||||
$this->_lastTerm = null;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user