import v1.1.0_beta1 | 2009-08-21

This commit is contained in:
2019-07-17 22:16:19 +02:00
parent 2c1152f0d3
commit 8dee6b1a10
2306 changed files with 251360 additions and 23428 deletions

View File

@ -18,16 +18,21 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Document */
require_once 'Zend/Search/Lucene/Document.php';
/** Zend_Search_Lucene_Document_Html */
require_once 'Zend/Search/Lucene/Document/Html.php';
/** Zend_Search_Lucene_Document_Docx */
require_once 'Zend/Search/Lucene/Document/Docx.php';
/** Zend_Search_Lucene_Document_Pptx */
require_once 'Zend/Search/Lucene/Document/Pptx.php';
/** Zend_Search_Lucene_Document_Xlsx */
require_once 'Zend/Search/Lucene/Document/Xlsx.php';
/** Zend_Search_Lucene_Storage_Directory_Filesystem */
require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
@ -58,8 +63,11 @@ require_once 'Zend/Search/Lucene/Search/QueryHit.php';
/** Zend_Search_Lucene_Search_Similarity */
require_once 'Zend/Search/Lucene/Search/Similarity.php';
/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
/** Zend_Search_Lucene_Index_DocsFilter */
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
@ -67,15 +75,12 @@ require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
/** Zend_Search_Lucene_LockManager */
require_once 'Zend/Search/Lucene/LockManager.php';
/** Zend_Search_Lucene_Interface */
require_once 'Zend/Search/Lucene/Interface.php';
/** Zend_Search_Lucene_Proxy */
require_once 'Zend/Search/Lucene/Proxy.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -102,6 +107,15 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
private static $_resultSetLimit = 0;
/**
* Terms per query limit
*
* 0 means no limit
*
* @var integer
*/
private static $_termsPerQueryLimit = 1024;
/**
* File system adapter.
*
@ -170,6 +184,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
const FORMAT_2_1 = 1;
const FORMAT_2_3 = 2;
/**
* Index format version
*
@ -229,6 +244,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
* without performance problems
*/
require_once 'Zend/Search/Lucene/Exception.php';
try {
for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
// Try to get generation file
@ -311,6 +327,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
if ($formatVersion != self::FORMAT_PRE_2_1 &&
$formatVersion != self::FORMAT_2_1 &&
$formatVersion != self::FORMAT_2_3) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unsupported index format');
}
@ -329,12 +346,12 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
$format = $segmentsFile->readInt();
if ($format != (int)0xFFFFFFFF) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong segments file format');
}
// read version
// $segmentsFile->readLong();
$segmentsFile->readInt(); $segmentsFile->readInt();
$segmentsFile->readLong();
// read segment name counter
$segmentsFile->readInt();
@ -375,12 +392,12 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
} else if ($format == (int)0xFFFFFFFD) {
$this->_formatVersion = self::FORMAT_2_1;
} else {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
}
// read version
// $segmentsFile->readLong();
$segmentsFile->readInt(); $segmentsFile->readInt();
$segmentsFile->readLong();
// read segment name counter
$segmentsFile->readInt();
@ -395,19 +412,12 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
$segSize = $segmentsFile->readInt();
// 2.1+ specific properties
//$delGen = $segmentsFile->readLong();
$delGenHigh = $segmentsFile->readInt();
$delGenLow = $segmentsFile->readInt();
if ($delGenHigh == (int)0xFFFFFFFF && $delGenLow == (int)0xFFFFFFFF) {
$delGen = -1; // There are no deletes
} else {
$delGen = ($delGenHigh << 32) | $delGenLow;
}
$delGen = $segmentsFile->readLong();
if ($this->_formatVersion == self::FORMAT_2_3) {
$docStoreOffset = $segmentsFile->readInt();
if ($docStoreOffset != -1) {
if ($docStoreOffset != (int)0xFFFFFFFF) {
$docStoreSegment = $segmentsFile->readString();
$docStoreIsCompoundFile = $segmentsFile->readByte();
@ -430,6 +440,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
$normGens[] = $segmentsFile->readLong();
}
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
}
@ -471,6 +482,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
public function __construct($directory = null, $create = false)
{
if ($directory === null) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Exception('No index directory specified');
}
@ -490,6 +502,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
$this->_generation = self::getActualGeneration($this->_directory);
if ($create) {
require_once 'Zend/Search/Lucene/Exception.php';
try {
Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
} catch (Zend_Search_Lucene_Exception $e) {
@ -521,6 +534,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
}
if ($this->_generation == -1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
} else if ($this->_generation == 0) {
$this->_readPre21SegmentsFile();
@ -663,6 +677,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
public function isDeleted($id)
{
if ($id >= $this->_docCount) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
@ -717,7 +732,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
}
/**
* Set result set limit.
* Get result set limit.
*
* 0 means no limit
*
@ -728,6 +743,30 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
return self::$_resultSetLimit;
}
/**
* Set terms per query limit.
*
* 0 means no limit
*
* @param integer $limit
*/
public static function setTermsPerQueryLimit($limit)
{
self::$_termsPerQueryLimit = $limit;
}
/**
* Get result set limit.
*
* 0 (default) means no limit
*
* @return integer
*/
public static function getTermsPerQueryLimit()
{
return self::$_termsPerQueryLimit;
}
/**
* Retrieve index maxBufferedDocs option
*
@ -852,6 +891,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
}
if (!$query instanceof Zend_Search_Lucene_Search_Query) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
}
@ -911,6 +951,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
$fieldNames = $this->getFieldNames();
$sortArgs = array();
require_once 'Zend/Search/Lucene/Exception.php';
for ($count = 1; $count < count($argList); $count++) {
$fieldName = $argList[$count];
@ -997,6 +1038,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
* @return Zend_Search_Lucene_Document
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
*/
public function getDocument($id)
{
@ -1006,6 +1048,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
}
if ($id >= $this->_docCount) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
@ -1277,6 +1320,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
}
if ($id >= $this->_docCount) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
@ -1363,10 +1407,10 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
{
$result = array();
$segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
foreach ($this->_segmentInfos as $segmentInfo) {
$segmentInfo->reset();
$segmentInfo->resetTermsStream();
// Skip "empty" segments
if ($segmentInfo->currentTerm() !== null) {
@ -1393,36 +1437,22 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
/**
* Terms stream queue
* Terms stream priority queue object
*
* @var Zend_Search_Lucene_Index_SegmentInfoPriorityQueue
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
*/
private $_termsStreamQueue = null;
/**
* Last Term in a terms stream
*
* @var Zend_Search_Lucene_Index_Term
*/
private $_lastTerm = null;
private $_termsStream = null;
/**
* Reset terms stream.
*/
public function resetTermsStream()
{
$this->_termsStreamQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
foreach ($this->_segmentInfos as $segmentInfo) {
$segmentInfo->reset();
// Skip "empty" segments
if ($segmentInfo->currentTerm() !== null) {
$this->_termsStreamQueue->put($segmentInfo);
}
}
$this->nextTerm();
if ($this->_termsStream === null) {
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos);
} else {
$this->_termsStream->resetTermsStream();
}
}
/**
@ -1434,21 +1464,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
{
$segments = array();
while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
$segments[] = $segmentInfo;
}
foreach ($segments as $segmentInfo) {
$segmentInfo->skipTo($prefix);
if ($segmentInfo->currentTerm() !== null) {
$this->_termsStreamQueue->put($segmentInfo);
}
}
$this->nextTerm();
$this->_termsStream->skipTo($prefix);
}
/**
@ -1458,31 +1474,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
public function nextTerm()
{
while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
if ($this->_termsStreamQueue->top() === null ||
$this->_termsStreamQueue->top()->currentTerm()->key() !=
$segmentInfo->currentTerm()->key()) {
// We got new term
$this->_lastTerm = $segmentInfo->currentTerm();
if ($segmentInfo->nextTerm() !== null) {
// Put segment back into the priority queue
$this->_termsStreamQueue->put($segmentInfo);
}
return $this->_lastTerm;
}
if ($segmentInfo->nextTerm() !== null) {
// Put segment back into the priority queue
$this->_termsStreamQueue->put($segmentInfo);
}
}
// End of stream
$this->_lastTerm = null;
return null;
return $this->_termsStream->nextTerm();
}
/**
@ -1492,7 +1484,7 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
public function currentTerm()
{
return $this->_lastTerm;
return $this->_termsStream->currentTerm();
}
/**
@ -1502,12 +1494,8 @@ class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
*/
public function closeTermsStream()
{
while (($segmentInfo = $this->_termsStreamQueue->pop()) !== null) {
$segmentInfo->closeTermsStream();
}
$this->_termsStreamQueue = null;
$this->_lastTerm = null;
$this->_termsStream->closeTermsStream();
$this->_termsStream = null;
}

View File

@ -69,7 +69,7 @@ abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_L
$token = $filter->normalize($token);
// resulting token can be null if the filter removes it
if (is_null($token)) {
if ($token === null) {
return null;
}
}

View File

@ -19,11 +19,8 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Analysis_TokenFilter */
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
require_once 'Zend/Search/Exception.php';
/**
* Token filter that removes stop words. These words must be provided as array (set), example:
@ -80,11 +77,13 @@ class Zend_Search_Lucene_Analysis_TokenFilter_StopWords extends Zend_Search_Luce
*/
public function loadFromFile($filepath = null) {
if (! $filepath || ! file_exists($filepath)) {
throw new Zend_Search_Exception('You have to provide valid file path');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('You have to provide valid file path');
}
$fd = fopen($filepath, "r");
if (! $fd) {
throw new Zend_Search_Exception('Cannot open file ' . $filepath);
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Cannot open file ' . $filepath);
}
while (!feof ($fd)) {
$buffer = trim(fgets($fd));
@ -93,7 +92,8 @@ class Zend_Search_Lucene_Analysis_TokenFilter_StopWords extends Zend_Search_Luce
}
}
if (!fclose($fd)) {
throw new Zend_Search_Exception('Cannot close file ' . $filepath);
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Cannot close file ' . $filepath);
}
}
}

View File

@ -22,98 +22,122 @@
/** Zend_Search_Lucene_Document_OpenXml */
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
if (class_exists ( 'ZipArchive' )) {
if (class_exists('ZipArchive', false)) {
/**
* Docx document.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Document
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Document_Docx extends Zend_Search_Lucene_Document_OpenXml {
/**
* Docx document.
* Xml Schema - WordprocessingML
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Document
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @var string
*/
class Zend_Search_Lucene_Document_Docx extends Zend_Search_Lucene_Document_OpenXml {
/**
* Xml Schema - WordprocessingML
*
* @var string
*/
const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
/**
* Object constructor
*
* @param string $fileName
* @param boolean $storeContent
*/
private function __construct($fileName, $storeContent) {
// Document data holders
$documentBody = array ( );
$coreProperties = array ( );
// Open OpenXML package
$package = new ZipArchive ( );
$package->open ( $fileName );
// Read relations and search for officeDocument
$relations = simplexml_load_string ( $package->getFromName ( "_rels/.rels" ) );
foreach ( $relations->Relationship as $rel ) {
if ($rel ["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
// Found office document! Read in contents...
$contents = simplexml_load_string ( $package->getFromName ( $this->absoluteZipPath ( dirname ( $rel ["Target"] ) . "/" . basename ( $rel ["Target"] ) ) ) );
$contents->registerXPathNamespace ( "w", Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML );
$paragraphs = $contents->xpath ( '//w:body/w:p' );
foreach ( $paragraphs as $paragraph ) {
$runs = $paragraph->xpath ( '//w:r/w:t' );
foreach ( $runs as $run ) {
$documentBody [] = ( string ) $run;
}
const SCHEMA_WORDPROCESSINGML = 'http://schemas.openxmlformats.org/wordprocessingml/2006/main';
/**
* Object constructor
*
* @param string $fileName
* @param boolean $storeContent
*/
private function __construct($fileName, $storeContent) {
// Document data holders
$documentBody = array();
$coreProperties = array();
// Open OpenXML package
$package = new ZipArchive();
$package->open($fileName);
// Read relations and search for officeDocument
$relations = simplexml_load_string($package->getFromName('_rels/.rels'));
foreach($relations->Relationship as $rel) {
if ($rel ["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) {
// Found office document! Read in contents...
$contents = simplexml_load_string($package->getFromName(
$this->absoluteZipPath(dirname($rel['Target'])
. '/'
. basename($rel['Target']))
));
$contents->registerXPathNamespace('w', Zend_Search_Lucene_Document_Docx::SCHEMA_WORDPROCESSINGML);
$paragraphs = $contents->xpath('//w:body/w:p');
foreach ($paragraphs as $paragraph) {
$runs = $paragraph->xpath('.//w:r/*[name() = "w:t" or name() = "w:br"]');
if ($runs === false) {
// Paragraph doesn't contain any text or breaks
continue;
}
break;
foreach ($runs as $run) {
if ($run->getName() == 'br') {
// Break element
$documentBody[] = ' ';
} else {
$documentBody[] = (string)$run;
}
}
// Add space after each paragraph. So they are not bound together.
$documentBody[] = ' ';
}
}
// Read core properties
$coreProperties = $this->extractMetaData ( $package );
// Close file
$package->close ();
// Store filename
$this->addField ( Zend_Search_Lucene_Field::Text ( 'filename', $fileName ) );
// Store contents
if ($storeContent) {
$this->addField ( Zend_Search_Lucene_Field::Text ( 'body', implode ( ' ', $documentBody ) ) );
} else {
$this->addField ( Zend_Search_Lucene_Field::UnStored ( 'body', implode ( ' ', $documentBody ) ) );
}
// Store meta data properties
foreach ( $coreProperties as $key => $value ) {
$this->addField ( Zend_Search_Lucene_Field::Text ( $key, $value ) );
}
// Store title (if not present in meta data)
if (! isset ( $coreProperties ['title'] )) {
$this->addField ( Zend_Search_Lucene_Field::Text ( 'title', $fileName ) );
break;
}
}
/**
* Load Docx document from a file
*
* @param string $fileName
* @param boolean $storeContent
* @return Zend_Search_Lucene_Document_Docx
*/
public static function loadDocxFile($fileName, $storeContent = false) {
return new Zend_Search_Lucene_Document_Docx ( $fileName, $storeContent );
// Read core properties
$coreProperties = $this->extractMetaData($package);
// Close file
$package->close();
// Store filename
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
// Store contents
if ($storeContent) {
$this->addField(Zend_Search_Lucene_Field::Text('body', implode('', $documentBody), 'UTF-8'));
} else {
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode('', $documentBody), 'UTF-8'));
}
// Store meta data properties
foreach ($coreProperties as $key => $value) {
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
}
// Store title (if not present in meta data)
if (! isset($coreProperties['title'])) {
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
}
}
/**
* Load Docx document from a file
*
* @param string $fileName
* @param boolean $storeContent
* @return Zend_Search_Lucene_Document_Docx
* @throws Zend_Search_Lucene_Document_Exception
*/
public static function loadDocxFile($fileName, $storeContent = false) {
if (!is_readable($fileName)) {
require_once 'Zend/Search/Lucene/Document/Exception.php';
throw new Zend_Search_Lucene_Document_Exception('Provided file \'' . $fileName . '\' is not readable.');
}
return new Zend_Search_Lucene_Document_Docx($fileName, $storeContent);
}
}
} // end if (class_exists('ZipArchive'))

View File

@ -0,0 +1,36 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Framework base exception
*/
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Document_Exception extends Zend_Search_Lucene_Exception
{}

View File

@ -69,11 +69,12 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
/**
* Object constructor
*
* @param string $data
* @param string $data HTML string (may be HTML fragment, )
* @param boolean $isFile
* @param boolean $storeContent
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
*/
private function __construct($data, $isFile, $storeContent)
private function __construct($data, $isFile, $storeContent, $defaultEncoding = '')
{
$this->_doc = new DOMDocument();
$this->_doc->substituteEntities = true;
@ -85,6 +86,37 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
}
@$this->_doc->loadHTML($htmlData);
if ($this->_doc->encoding === null) {
// Document encoding is not recognized
/** @todo improve HTML vs HTML fragment recognition */
if (preg_match('/<html>/i', $htmlData, $matches, PREG_OFFSET_CAPTURE)) {
// It's an HTML document
// Add additional HEAD section and recognize document
$htmlTagOffset = $matches[0][1] + strlen($matches[0][1]);
@$this->_doc->loadHTML(iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, 0, $htmlTagOffset))
. '<head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head>'
. iconv($defaultEncoding, 'UTF-8//IGNORE', substr($htmlData, $htmlTagOffset)));
// Remove additional HEAD section
$xpath = new DOMXPath($this->_doc);
$head = $xpath->query('/html/head')->item(0);
$head->parentNode->removeChild($head);
} else {
// It's an HTML fragment
@$this->_doc->loadHTML('<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
. iconv($defaultEncoding, 'UTF-8//IGNORE', $htmlData)
. '</body></html>');
}
}
/** @todo Add correction of wrong HTML encoding recognition processing
* The case is:
* Content-type HTTP-EQUIV meta tag is presented, but ISO-8859-5 encoding is actually used,
* even $this->_doc->encoding demonstrates another recognized encoding
*/
$xpath = new DOMXPath($this->_doc);
$docTitle = '';
@ -93,13 +125,13 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
// title should always have only one entry, but we process all nodeset entries
$docTitle .= $titleNode->nodeValue . ' ';
}
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, $this->_doc->actualEncoding));
$this->addField(Zend_Search_Lucene_Field::Text('title', $docTitle, 'UTF-8'));
$metaNodes = $xpath->query('/html/head/meta[@name]');
foreach ($metaNodes as $metaNode) {
$this->addField(Zend_Search_Lucene_Field::Text($metaNode->getAttribute('name'),
$metaNode->getAttribute('content'),
$this->_doc->actualEncoding));
'UTF-8'));
}
$docBody = '';
@ -109,9 +141,9 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
$this->_retrieveNodeText($bodyNode, $docBody);
}
if ($storeContent) {
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, $this->_doc->actualEncoding));
$this->addField(Zend_Search_Lucene_Field::Text('body', $docBody, 'UTF-8'));
} else {
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, $this->_doc->actualEncoding));
$this->addField(Zend_Search_Lucene_Field::UnStored('body', $docBody, 'UTF-8'));
}
$linkNodes = $this->_doc->getElementsByTagName('a');
@ -196,25 +228,27 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
/**
* Load HTML document from a string
*
* @param string $data
* @param string $data
* @param boolean $storeContent
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
* @return Zend_Search_Lucene_Document_Html
*/
public static function loadHTML($data, $storeContent = false)
public static function loadHTML($data, $storeContent = false, $defaultEncoding = '')
{
return new Zend_Search_Lucene_Document_Html($data, false, $storeContent);
return new Zend_Search_Lucene_Document_Html($data, false, $storeContent, $defaultEncoding);
}
/**
* Load HTML document from a file
*
* @param string $file
* @param string $file
* @param boolean $storeContent
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
* @return Zend_Search_Lucene_Document_Html
*/
public static function loadHTMLFile($file, $storeContent = false)
public static function loadHTMLFile($file, $storeContent = false, $defaultEncoding = '')
{
return new Zend_Search_Lucene_Document_Html($file, true, $storeContent);
return new Zend_Search_Lucene_Document_Html($file, true, $storeContent, $defaultEncoding);
}
@ -223,12 +257,14 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
*
* @param DOMText $node
* @param array $wordsToHighlight
* @param string $color
* @param callback $callback Callback method, used to transform (highlighting) text.
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
* @throws Zend_Search_Lucene_Exception
*/
public function _highlightTextNode(DOMText $node, $wordsToHighlight, $color)
protected function _highlightTextNode(DOMText $node, $wordsToHighlight, $callback, $params)
{
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
$analyzer->setInput($node->nodeValue, $this->_doc->encoding);
$analyzer->setInput($node->nodeValue, 'UTF-8');
$matchedTokens = array();
@ -251,10 +287,32 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
// Cut matched node
$matchedWordNode = $node->splitText($token->getStartOffset());
$highlightedNode = $this->_doc->createElement('b', $matchedWordNode->nodeValue);
$highlightedNode->setAttribute('style', 'color:black;background-color:' . $color);
// Retrieve HTML string representation for highlihted word
$fullCallbackparamsList = $params;
array_unshift($fullCallbackparamsList, $matchedWordNode->nodeValue);
$highlightedWordNodeSetHtml = call_user_func_array($callback, $fullCallbackparamsList);
$node->parentNode->replaceChild($highlightedNode, $matchedWordNode);
// Transform HTML string to a DOM representation and automatically transform retrieved string
// into valid XHTML (It's automatically done by loadHTML() method)
$highlightedWordNodeSetDomDocument = new DOMDocument('1.0', 'UTF-8');
$success = @$highlightedWordNodeSetDomDocument->
loadHTML('<html><head><meta http-equiv="Content-type" content="text/html; charset=UTF-8"/></head><body>'
. $highlightedWordNodeSetHtml
. '</body></html>');
if (!$success) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception("Error occured while loading highlighted text fragment: '$highlightedNodeHtml'.");
}
$highlightedWordNodeSetXpath = new DOMXPath($highlightedWordNodeSetDomDocument);
$highlightedWordNodeSet = $highlightedWordNodeSetXpath->query('/html/body')->item(0)->childNodes;
for ($count = 0; $count < $highlightedWordNodeSet->length; $count++) {
$nodeToImport = $highlightedWordNodeSet->item($count);
$node->parentNode->insertBefore($this->_doc->importNode($nodeToImport, true /* deep copy */),
$matchedWordNode);
}
$node->parentNode->removeChild($matchedWordNode);
}
}
@ -264,9 +322,10 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
*
* @param DOMNode $contextNode
* @param array $wordsToHighlight
* @param string $color
* @param callback $callback Callback method, used to transform (highlighting) text.
* @param array $params Array of additionall callback parameters (first non-optional parameter is a text to transform)
*/
public function _highlightNode(DOMNode $contextNode, $wordsToHighlight, $color)
protected function _highlightNodeRecursive(DOMNode $contextNode, $wordsToHighlight, $callback, $params)
{
$textNodes = array();
@ -279,38 +338,66 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
// process node later to leave childNodes structure untouched
$textNodes[] = $childNode;
} else {
// Skip script nodes
// Process node if it's not a script node
if ($childNode->nodeName != 'script') {
$this->_highlightNode($childNode, $wordsToHighlight, $color);
$this->_highlightNodeRecursive($childNode, $wordsToHighlight, $callback, $params);
}
}
}
foreach ($textNodes as $textNode) {
$this->_highlightTextNode($textNode, $wordsToHighlight, $color);
$this->_highlightTextNode($textNode, $wordsToHighlight, $callback, $params);
}
}
/**
* Standard callback method used to highlight words.
*
* @param string $stringToHighlight
* @return string
* @internal
*/
public function applyColour($stringToHighlight, $colour)
{
return '<b style="color:black;background-color:' . $colour . '">' . $stringToHighlight . '</b>';
}
/**
* Highlight text with specified color
*
* @param string|array $words
* @param string $color
* @param string $colour
* @return string
*/
public function highlight($words, $color = '#66ffff')
public function highlight($words, $colour = '#66ffff')
{
return $this->highlightExtended($words, array($this, 'applyColour'), array($colour));
}
/**
* Highlight text using specified View helper or callback function.
*
* @param string|array $words Words to highlight. Words could be organized using the array or string.
* @param callback $callback Callback method, used to transform (highlighting) text.
* @param array $params Array of additionall callback parameters passed through into it
* (first non-optional parameter is an HTML fragment for highlighting)
* @return string
* @throws Zend_Search_Lucene_Exception
*/
public function highlightExtended($words, $callback, $params = array())
{
if (!is_array($words)) {
$words = array($words);
}
$wordsToHighlight = array();
$wordsToHighlightList = array();
$analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault();
foreach ($words as $wordString) {
$wordsToHighlight = array_merge($wordsToHighlight, $analyzer->tokenize($wordString));
$wordsToHighlightList[] = $analyzer->tokenize($wordString);
}
$wordsToHighlight = call_user_func_array('array_merge', $wordsToHighlightList);
if (count($wordsToHighlight) == 0) {
return $this->_doc->saveHTML();
@ -321,15 +408,20 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
$wordsToHighlightFlipped[$token->getTermText()] = $id;
}
if (!is_callable($callback)) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('$viewHelper parameter mast be a View Helper name, View Helper object or callback.');
}
$xpath = new DOMXPath($this->_doc);
$matchedNodes = $xpath->query("/html/body");
foreach ($matchedNodes as $matchedNode) {
$this->_highlightNode($matchedNode, $wordsToHighlightFlipped, $color);
$this->_highlightNodeRecursive($matchedNode, $wordsToHighlightFlipped, $callback, $params);
}
}
/**
* Get HTML
*
@ -339,5 +431,23 @@ class Zend_Search_Lucene_Document_Html extends Zend_Search_Lucene_Document
{
return $this->_doc->saveHTML();
}
/**
* Get HTML body
*
* @return string
*/
public function getHtmlBody()
{
$xpath = new DOMXPath($this->_doc);
$bodyNodes = $xpath->query('/html/body')->item(0)->childNodes;
$outputFragments = array();
for ($count = 0; $count < $bodyNodes->length; $count++) {
$outputFragments[] = $this->_doc->saveXML($bodyNodes->item($count));
}
return implode($outputFragments);
}
}

View File

@ -23,7 +23,7 @@
/** Zend_Search_Lucene_Document */
require_once 'Zend/Search/Lucene/Document.php';
if (class_exists('ZipArchive')) {
if (class_exists('ZipArchive', false)) {
/**
* OpenXML document.

View File

@ -23,7 +23,7 @@
/** Zend_Search_Lucene_Document_OpenXml */
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
if (class_exists('ZipArchive')) {
if (class_exists('ZipArchive', false)) {
/**
* Pptx document.
@ -42,14 +42,14 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
* @var string
*/
const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main';
/**
* Xml Schema - DrawingML
*
* @var string
*/
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
/**
* Xml Schema - Slide relation
*
@ -63,7 +63,7 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
* @var string
*/
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
/**
* Object constructor
*
@ -94,7 +94,7 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
$slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) )
);
// Search for slide notes
$slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) );
foreach ($slideNotesRelations->Relationship as $slideNoteRel) {
@ -103,27 +103,27 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
$slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string(
$package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) )
);
break;
}
}
}
}
break;
}
}
// Sort slides
ksort($slides);
ksort($slideNotes);
// Extract contents from slides
foreach ($slides as $slideKey => $slide) {
// Register namespaces
$slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
$slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
// Fetch all text
$textElements = $slide->xpath('//a:t');
foreach ($textElements as $textElement) {
@ -138,15 +138,15 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
// Register namespaces
$slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML);
$slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML);
// Fetch all text
$textElements = $slideNote->xpath('//a:t');
foreach ($textElements as $textElement) {
$documentBody[] = (string)$textElement;
}
}
}
}
// Read core properties
$coreProperties = $this->extractMetaData($package);
@ -154,25 +154,25 @@ class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenX
$package->close();
// Store filename
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName));
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
// Store contents
if ($storeContent) {
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody)));
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
} else {
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody)));
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
}
// Store meta data properties
foreach ($coreProperties as $key => $value)
{
$this->addField(Zend_Search_Lucene_Field::Text($key, $value));
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
}
// Store title (if not present in meta data)
if (!isset($coreProperties['title']))
{
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName));
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
}
}

View File

@ -23,7 +23,7 @@
/** Zend_Search_Lucene_Document_OpenXml */
require_once 'Zend/Search/Lucene/Document/OpenXml.php';
if (class_exists('ZipArchive')) {
if (class_exists('ZipArchive', false)) {
/**
* Xlsx document.
@ -42,21 +42,21 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
* @var string
*/
const SCHEMA_SPREADSHEETML = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
/**
* Xml Schema - DrawingML
*
* @var string
*/
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main';
/**
* Xml Schema - Shared Strings
*
* @var string
*/
const SCHEMA_SHAREDSTRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings';
/**
* Xml Schema - Worksheet relation
*
@ -70,7 +70,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
* @var string
*/
const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide';
/**
* Object constructor
*
@ -84,7 +84,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
$worksheets = array();
$documentBody = array();
$coreProperties = array();
// Open OpenXML package
$package = new ZipArchive();
$package->open($fileName);
@ -96,11 +96,11 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
// Found office document! Read relations for workbook...
$workbookRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) );
$workbookRelations->registerXPathNamespace("rel", Zend_Search_Lucene_Document_OpenXml::SCHEMA_RELATIONSHIP);
// Read shared strings
$sharedStringsPath = $workbookRelations->xpath("rel:Relationship[@Type='" . Zend_Search_Lucene_Document_Xlsx::SCHEMA_SHAREDSTRINGS . "']");
$sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
$xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
$sharedStringsPath = (string)$sharedStringsPath[0]['Target'];
$xmlStrings = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . $sharedStringsPath)) );
if (isset($xmlStrings) && isset($xmlStrings->si)) {
foreach ($xmlStrings->si as $val) {
if (isset($val->t)) {
@ -119,14 +119,14 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
);
}
}
break;
}
}
// Sort worksheets
ksort($worksheets);
// Extract contents from worksheets
foreach ($worksheets as $sheetKey => $worksheet) {
foreach ($worksheet->sheetData->row as $row) {
@ -143,7 +143,7 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
}
break;
case "b":
// Value is boolean
$value = (string)$c->v;
@ -156,13 +156,13 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
}
break;
case "inlineStr":
// Value is rich text inline
$value = $this->_parseRichText($c->is);
break;
case "e":
// Value is an error message
if ((string)$c->v != '') {
@ -184,11 +184,11 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
elseif ($value == (double)$value) $value = (double)$value;
}
}
$documentBody[] = $value;
}
}
}
}
// Read core properties
$coreProperties = $this->extractMetaData($package);
@ -197,28 +197,28 @@ class Zend_Search_Lucene_Document_Xlsx extends Zend_Search_Lucene_Document_OpenX
$package->close();
// Store filename
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName));
$this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8'));
// Store contents
if ($storeContent) {
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody)));
$this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8'));
} else {
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody)));
$this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8'));
}
// Store meta data properties
foreach ($coreProperties as $key => $value)
{
$this->addField(Zend_Search_Lucene_Field::Text($key, $value));
$this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8'));
}
// Store title (if not present in meta data)
if (!isset($coreProperties['title']))
{
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName));
$this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8'));
}
}
/**
* Parse rich text XML
*

View File

@ -18,14 +18,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_FSMAction */
require_once 'Zend/Search/Lucene/FSMAction.php';
/** Zend_Search_Exception */
require_once 'Zend/Search/Exception.php';
/**
* Abstract Finite State Machine
*
@ -181,6 +176,7 @@ abstract class Zend_Search_Lucene_FSM
public function setState($state)
{
if (!isset($this->_states[$state])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('State \'' . $state . '\' is not on of the possible FSM states.');
}
@ -251,12 +247,15 @@ abstract class Zend_Search_Lucene_FSM
public function addRule($sourceState, $input, $targetState, $inputAction = null)
{
if (!isset($this->_states[$sourceState])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined source state (' . $sourceState . ').');
}
if (!isset($this->_states[$targetState])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined target state (' . $targetState . ').');
}
if (!isset($this->_inputAphabet[$input])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined input symbol (' . $input . ').');
}
@ -264,6 +263,7 @@ abstract class Zend_Search_Lucene_FSM
$this->_rules[$sourceState] = array();
}
if (isset($this->_rules[$sourceState][$input])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Rule for {state,input} pair (' . $sourceState . ', '. $input . ') is already defined.');
}
@ -287,6 +287,7 @@ abstract class Zend_Search_Lucene_FSM
public function addEntryAction($state, Zend_Search_Lucene_FSMAction $action)
{
if (!isset($this->_states[$state])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
}
@ -308,6 +309,7 @@ abstract class Zend_Search_Lucene_FSM
public function addExitAction($state, Zend_Search_Lucene_FSMAction $action)
{
if (!isset($this->_states[$state])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
}
@ -330,9 +332,11 @@ abstract class Zend_Search_Lucene_FSM
public function addInputAction($state, $inputSymbol, Zend_Search_Lucene_FSMAction $action)
{
if (!isset($this->_states[$state])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined state (' . $state. ').');
}
if (!isset($this->_inputAphabet[$inputSymbol])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined input symbol (' . $inputSymbol. ').');
}
@ -358,9 +362,11 @@ abstract class Zend_Search_Lucene_FSM
public function addTransitionAction($sourceState, $targetState, Zend_Search_Lucene_FSMAction $action)
{
if (!isset($this->_states[$sourceState])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined source state (' . $sourceState. ').');
}
if (!isset($this->_states[$targetState])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('Undefined source state (' . $targetState. ').');
}
@ -384,9 +390,11 @@ abstract class Zend_Search_Lucene_FSM
public function process($input)
{
if (!isset($this->_rules[$this->_currentState])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('There is no any rule for current state (' . $this->_currentState . ').');
}
if (!isset($this->_rules[$this->_currentState][$input])) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('There is no any rule for {current state, input} pair (' . $this->_currentState . ', ' . $input . ').');
}
@ -424,6 +432,7 @@ abstract class Zend_Search_Lucene_FSM
public function reset()
{
if (count($this->_states) == 0) {
require_once 'Zend/Search/Exception.php';
throw new Zend_Search_Exception('There is no any state defined for FSM.');
}

View File

@ -19,11 +19,6 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* Dictionary loader
*
@ -63,7 +58,8 @@ class Zend_Search_Lucene_Index_DictionaryLoader
$pos += 4;
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
}
// $indexTermCount = $tiiFile->readLong();
@ -82,7 +78,8 @@ class Zend_Search_Lucene_Index_DictionaryLoader
(ord($data[$pos+2]) != 0) ||
(ord($data[$pos+3]) != 0) ||
((ord($data[$pos+4]) & 0x80) != 0)) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
$indexTermCount = ord($data[$pos+4]) << 24 |
@ -99,6 +96,7 @@ class Zend_Search_Lucene_Index_DictionaryLoader
$skipInterval = ord($data[$pos]) << 24 | ord($data[$pos+1]) << 16 | ord($data[$pos+2]) << 8 | ord($data[$pos+3]);
$pos += 4;
if ($indexTermCount < 1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong number of terms in a term dictionary index');
}
@ -254,13 +252,16 @@ class Zend_Search_Lucene_Index_DictionaryLoader
// Check special index entry mark
if ($termDictionary[0][0] != (int)0xFFFFFFFF) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
} else if (PHP_INT_SIZE > 4){
}
if (PHP_INT_SIZE > 4) {
// Treat 64-bit 0xFFFFFFFF as -1
$termDictionary[0][0] = -1;
}
return array(&$termDictionary, &$termInfos);
return array($termDictionary, $termInfos);
}
}

View File

@ -22,16 +22,11 @@
/** Zend_Search_Lucene_Index_DictionaryLoader */
require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_LockManager */
require_once 'Zend/Search/Lucene/LockManager.php';
/** Zend_Search_Lucene_Index_DocsFilter */
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
/** Zend_Search_Lucene_Index_TermsStream_Interface */
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
/**
* @category Zend
@ -40,7 +35,7 @@ require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_SegmentInfo
class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
{
/**
* "Full scan vs fetch" boundary.
@ -261,6 +256,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
} else {
// It's a pre-2.1 segment or isCompound is set to 'unknown'
// Detect if segment uses compound file
require_once 'Zend/Search/Lucene/Exception.php';
try {
// Try to open compound file
$this->_directory->getFileObject($name . '.cfs');
@ -321,110 +317,165 @@ class Zend_Search_Lucene_Index_SegmentInfo
$this->_fieldsDicPositions = array_flip($fieldNums);
if ($this->_delGen == -2) {
$this->_detectLatestDelGen();
// SegmentInfo constructor is invoked from index writer
// Autodetect current delete file generation number
$this->_delGen = $this->_detectLatestDelGen();
}
// Load deletions
$this->_deleted = $this->_loadDelFile();
}
/**
* Load detetions file
*
* Returns bitset or an array depending on bitset extension availability
*
* @return mixed
* @throws Zend_Search_Lucene_Exception
*/
private function _loadDelFile()
{
if ($this->_delGen == -1) {
// There is no delete file for this segment
// Do nothing
return null;
} else if ($this->_delGen == 0) {
// It's a segment with pre-2.1 format delete file
// Try to find delete file
try {
// '.del' files always stored in a separate file
// Segment compound is not used
$delFile = $this->_directory->getFileObject($this->_name . '.del');
$byteCount = $delFile->readInt();
$byteCount = ceil($byteCount/8);
$bitCount = $delFile->readInt();
if ($bitCount == 0) {
$delBytes = '';
} else {
$delBytes = $delFile->readBytes($byteCount);
}
if (extension_loaded('bitset')) {
$this->_deleted = $delBytes;
} else {
$this->_deleted = array();
for ($count = 0; $count < $byteCount; $count++) {
$byte = ord($delBytes[$count]);
for ($bit = 0; $bit < 8; $bit++) {
if ($byte & (1<<$bit)) {
$this->_deleted[$count*8 + $bit] = 1;
}
}
}
}
} catch(Zend_Search_Exception $e) {
if (strpos($e->getMessage(), 'is not readable') === false ) {
throw $e;
}
// There is no delete file
// Do nothing
}
// Try to load deletions file
return $this->_loadPre21DelFile();
} else {
// It's 2.1+ format delete file
$delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
// It's 2.1+ format deleteions file
return $this->_load21DelFile();
}
}
$format = $delFile->readInt();
/**
* Load pre-2.1 detetions file
*
* Returns bitset or an array depending on bitset extension availability
*
* @return mixed
* @throws Zend_Search_Lucene_Exception
*/
private function _loadPre21DelFile()
{
require_once 'Zend/Search/Lucene/Exception.php';
try {
// '.del' files always stored in a separate file
// Segment compound is not used
$delFile = $this->_directory->getFileObject($this->_name . '.del');
if ($format == (int)0xFFFFFFFF) {
if (extension_loaded('bitset')) {
$this->_deleted = bitset_empty();
} else {
$this->_deleted = array();
}
$byteCount = $delFile->readInt();
$bitCount = $delFile->readInt();
$delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
$byteNum = 0;
do {
$dgap = $delFile->readVInt();
$nonZeroByte = $delFile->readByte();
$byteNum += $dgap;
for ($bit = 0; $bit < 8; $bit++) {
if ($nonZeroByte & (1<<$bit)) {
if (extension_loaded('bitset')) {
bitset_incl($this->_deleted, $byteNum*8 + $bit);
} else {
$this->_deleted[$byteNum*8 + $bit] = 1;
}
}
}
} while ($delFile->tell() < $delFileSize);
$byteCount = $delFile->readInt();
$byteCount = ceil($byteCount/8);
$bitCount = $delFile->readInt();
if ($bitCount == 0) {
$delBytes = '';
} else {
// $format is actually byte count
$byteCount = ceil($format/8);
$bitCount = $delFile->readInt();
$delBytes = $delFile->readBytes($byteCount);
}
if ($bitCount == 0) {
$delBytes = '';
} else {
$delBytes = $delFile->readBytes($byteCount);
}
if (extension_loaded('bitset')) {
$this->_deleted = $delBytes;
} else {
$this->_deleted = array();
for ($count = 0; $count < $byteCount; $count++) {
$byte = ord($delBytes[$count]);
for ($bit = 0; $bit < 8; $bit++) {
if ($byte & (1<<$bit)) {
$this->_deleted[$count*8 + $bit] = 1;
}
if (extension_loaded('bitset')) {
return $delBytes;
} else {
$deletions = array();
for ($count = 0; $count < $byteCount; $count++) {
$byte = ord($delBytes[$count]);
for ($bit = 0; $bit < 8; $bit++) {
if ($byte & (1<<$bit)) {
$deletions[$count*8 + $bit] = 1;
}
}
}
return $deletions;
}
} catch(Zend_Search_Lucene_Exception $e) {
if (strpos($e->getMessage(), 'is not readable') === false) {
throw $e;
}
// There is no deletion file
$this->_delGen = -1;
return null;
}
}
/**
* Load 2.1+ format detetions file
*
* Returns bitset or an array depending on bitset extension availability
*
* @return mixed
*/
private function _load21DelFile()
{
$delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
$format = $delFile->readInt();
if ($format == (int)0xFFFFFFFF) {
if (extension_loaded('bitset')) {
$deletions = bitset_empty();
} else {
$deletions = array();
}
$byteCount = $delFile->readInt();
$bitCount = $delFile->readInt();
$delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
$byteNum = 0;
do {
$dgap = $delFile->readVInt();
$nonZeroByte = $delFile->readByte();
$byteNum += $dgap;
if (extension_loaded('bitset')) {
for ($bit = 0; $bit < 8; $bit++) {
if ($nonZeroByte & (1<<$bit)) {
bitset_incl($deletions, $byteNum*8 + $bit);
}
}
return $deletions;
} else {
for ($bit = 0; $bit < 8; $bit++) {
if ($nonZeroByte & (1<<$bit)) {
$deletions[$byteNum*8 + $bit] = 1;
}
}
return (count($deletions) > 0) ? $deletions : null;
}
} while ($delFile->tell() < $delFileSize);
} else {
// $format is actually byte count
$byteCount = ceil($format/8);
$bitCount = $delFile->readInt();
if ($bitCount == 0) {
$delBytes = '';
} else {
$delBytes = $delFile->readBytes($byteCount);
}
if (extension_loaded('bitset')) {
return $delBytes;
} else {
$deletions = array();
for ($count = 0; $count < $byteCount; $count++) {
$byte = ord($delBytes[$count]);
for ($bit = 0; $bit < 8; $bit++) {
if ($byte & (1<<$bit)) {
$deletions[$count*8 + $bit] = 1;
}
}
}
return (count($deletions) > 0) ? $deletions : null;
}
}
}
@ -462,10 +513,12 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
. $fdxFName . ' file.' );
}
if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
. $fdtFName . ' file.' );
}
@ -500,6 +553,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
if( !isset($this->_segFiles[$filename]) ) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
. $filename . ' file.' );
}
@ -525,6 +579,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
. $filename . ' file.' );
}
@ -541,6 +596,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
if( !isset($this->_segFileSizes[$filename]) ) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
. $filename . ' file.' );
}
@ -811,6 +867,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
$tiVersion = $tisFile->readInt();
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
}
@ -890,6 +947,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
if ($docsFilter !== null) {
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
}
@ -1012,6 +1070,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
if ($docsFilter !== null) {
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
}
@ -1136,6 +1195,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
if ($docsFilter !== null) {
if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
}
@ -1304,6 +1364,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
$headerFormatVersion = $normfFile->readByte();
if ($header != 'NRM' || $headerFormatVersion != (int)0xFF) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong norms file format.');
}
@ -1439,13 +1500,14 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
}
/**
* Detect latest delete generation
*
* Is actualy used from writeChanges() method or from the constructor if it's invoked from
* Index writer. In both cases index write lock is already obtained, so we shouldn't care
* about it
*
* @return integer
*/
private function _detectLatestDelGen()
{
@ -1462,12 +1524,12 @@ class Zend_Search_Lucene_Index_SegmentInfo
if (count($delFileList) == 0) {
// There is no deletions file for current segment in the directory
// Set detetions file generation number to 1
$this->_delGen = -1;
// Set deletions file generation number to 1
return -1;
} else {
// There are some deletions files for current segment in the directory
// Set deletions file generation number to the highest nuber
$this->_delGen = max($delFileList);
return max($delFileList);
}
}
@ -1478,11 +1540,43 @@ class Zend_Search_Lucene_Index_SegmentInfo
* so index Write lock has to be already obtained.
*
* @internal
* @throws Zend_Search_Lucene_Exceptions
*/
public function writeChanges()
{
// Get new generation number
$latestDelGen = $this->_detectLatestDelGen();
if (!$this->_deletedDirty) {
return;
// There was no deletions by current process
if ($latestDelGen == $this->_delGen) {
// Delete file hasn't been updated by any concurrent process
return;
} else if ($latestDelGen > $this->_delGen) {
// Delete file has been updated by some concurrent process
// Reload deletions file
$this->_delGen = $latestDelGen;
$this->_deleted = $this->_loadDelFile();
return;
} else {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
}
}
if ($latestDelGen > $this->_delGen) {
// Merge current deletions with latest deletions file
$this->_delGen = $latestDelGen;
$latestDelete = $this->_loadDelFile();
if (extension_loaded('bitset')) {
$this->_deleted = bitset_union($this->_deleted, $latestDelete);
} else {
$this->_deleted += $latestDelete;
}
}
if (extension_loaded('bitset')) {
@ -1503,10 +1597,6 @@ class Zend_Search_Lucene_Index_SegmentInfo
$bitCount = count($this->_deleted);
}
// Get new generation number
$this->_detectLatestDelGen();
if ($this->_delGen == -1) {
// Set delete file generation number to 1
$this->_delGen = 1;
@ -1524,7 +1614,6 @@ class Zend_Search_Lucene_Index_SegmentInfo
}
/**
* Term Dictionary File object for stream like terms reading
*
@ -1664,8 +1753,28 @@ class Zend_Search_Lucene_Index_SegmentInfo
* @throws Zend_Search_Lucene_Exception
* @return integer
*/
public function reset($startId = 0, $mode = self::SM_TERMS_ONLY)
public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
{
/**
* SegmentInfo->resetTermsStream() method actually takes two optional parameters:
* $startId (default value is 0)
* $mode (default value is self::SM_TERMS_ONLY)
*/
$argList = func_get_args();
if (count($argList) > 2) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
} else if (count($argList) == 2) {
$startId = $argList[0];
$mode = $argList[1];
} else if (count($argList) == 1) {
$startId = $argList[0];
$mode = self::SM_TERMS_ONLY;
} else {
$startId = 0;
$mode = self::SM_TERMS_ONLY;
}
if ($this->_tisFile !== null) {
$this->_tisFile = null;
}
@ -1676,6 +1785,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
$tiVersion = $this->_tisFile->readInt();
if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */ &&
$tiVersion != (int)0xFFFFFFFD /* 2.1+ format */) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
}
@ -1723,6 +1833,7 @@ class Zend_Search_Lucene_Index_SegmentInfo
break;
default:
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
break;
}

View File

@ -19,19 +19,14 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
/** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
/** Zend_Search_Lucene_Index_SegmentInfoPriorityQueue */
require_once 'Zend/Search/Lucene/Index/SegmentInfoPriorityQueue.php';
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
/**
* @category Zend
@ -117,10 +112,12 @@ class Zend_Search_Lucene_Index_SegmentMerger
public function merge()
{
if ($this->_mergeDone) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Merge is already done.');
}
if (count($this->_segmentInfos) < 1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
. count($this->_segmentInfos)
. ').');
@ -228,11 +225,11 @@ class Zend_Search_Lucene_Index_SegmentMerger
*/
private function _mergeTerms()
{
$segmentInfoQueue = new Zend_Search_Lucene_Index_SegmentInfoPriorityQueue();
$segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
$segmentStartId = 0;
foreach ($this->_segmentInfos as $segName => $segmentInfo) {
$segmentStartId = $segmentInfo->reset($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
$segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
// Skip "empty" segments
if ($segmentInfo->currentTerm() !== null) {

View File

@ -19,14 +19,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
/**
* @category Zend
* @package Zend_Search_Lucene

View File

@ -19,17 +19,12 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/** Zend_Search_Lucene_Index_SegmentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -90,6 +85,7 @@ class Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter extends Zend_Search_
/**
* @todo term vector storing support
*/
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
}

View File

@ -19,17 +19,12 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Index_SegmentInfo */
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
/** Zend_Search_Lucene_Index_SegmentWriter */
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
/**
* @category Zend
* @package Zend_Search_Lucene

View File

@ -0,0 +1,48 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene */
require_once 'Zend/Search/Lucene/PriorityQueue.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Index_TermsPriorityQueue extends Zend_Search_Lucene_PriorityQueue
{
/**
* Compare elements
*
* Returns true, if $termsStream1 is "less" than $termsStream2; else otherwise
*
* @param mixed $termsStream1
* @param mixed $termsStream2
* @return boolean
*/
protected function _less($termsStream1, $termsStream2)
{
return strcmp($termsStream1->currentTerm()->key(), $termsStream2->currentTerm()->key()) < 0;
}
}

View File

@ -0,0 +1,65 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Search_Lucene_Index_TermsStream_Interface
{
/**
* Reset terms stream.
*/
public function resetTermsStream();
/**
* Skip terms stream up to specified term preffix.
*
* Prefix contains fully specified field info and portion of searched term
*
* @param Zend_Search_Lucene_Index_Term $prefix
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
/**
* Scans terms dictionary and returns next term
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function nextTerm();
/**
* Returns term in current position
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function currentTerm();
/**
* Close terms stream
*
* Should be used for resources clean up if stream is not read up to the end
*/
public function closeTermsStream();
}

View File

@ -189,11 +189,8 @@ class Zend_Search_Lucene_Index_Writer
$segmentsFile = $directory->createFile('segments');
$segmentsFile->writeInt((int)0xFFFFFFFF);
// write version (is initialized by current time
// $segmentsFile->writeLong((int)microtime(true));
$version = microtime(true);
$segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
$segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
// write version (initialized by current time)
$segmentsFile->writeLong(round(microtime(true)));
// write name counter
$segmentsFile->writeInt($nameCount);
@ -214,11 +211,8 @@ class Zend_Search_Lucene_Index_Writer
$segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
$segmentsFile->writeInt((int)0xFFFFFFFD);
// write version (is initialized by current time
// $segmentsFile->writeLong((int)microtime(true));
$version = microtime(true);
$segmentsFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
$segmentsFile->writeInt((int)($version & 0xFFFFFFFF));
// write version (initialized by current time)
$segmentsFile->writeLong(round(microtime(true)));
// write name counter
$segmentsFile->writeInt($nameCount);
@ -435,9 +429,9 @@ class Zend_Search_Lucene_Index_Writer
try {
// Write format marker
if ($this->_targetFormatVersion == Zend_Search_lucene::FORMAT_2_1) {
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
$newSegmentFile->writeInt((int)0xFFFFFFFD);
} else if ($this->_targetFormatVersion == Zend_Search_lucene::FORMAT_2_3) {
} else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
$newSegmentFile->writeInt((int)0xFFFFFFFC);
}
@ -453,16 +447,9 @@ class Zend_Search_Lucene_Index_Writer
throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
}
// $version = $segmentsFile->readLong() + $this->_versionUpdate;
// Process version on 32-bit platforms
$versionHigh = $segmentsFile->readInt();
$versionLow = $segmentsFile->readInt();
$version = $versionHigh * ((double)0xFFFFFFFF + 1) +
(($versionLow < 0)? (double)0xFFFFFFFF - (-1 - $versionLow) : $versionLow);
$version += $this->_versionUpdate;
$version = $segmentsFile->readLong() + $this->_versionUpdate;
$this->_versionUpdate = 0;
$newSegmentFile->writeInt((int)($version/((double)0xFFFFFFFF + 1)));
$newSegmentFile->writeInt((int)($version & 0xFFFFFFFF));
$newSegmentFile->writeLong($version);
// Write segment name counter
$newSegmentFile->writeInt($segmentsFile->readInt());
@ -482,21 +469,18 @@ class Zend_Search_Lucene_Index_Writer
if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
// pre-2.1 index format
$delGenHigh = 0;
$delGenLow = 0;
$delGen = 0;
$hasSingleNormFile = false;
$numField = (int)0xFFFFFFFF;
$isCompoundByte = 0;
$docStoreOptions = null;
} else {
//$delGen = $segmentsFile->readLong();
$delGenHigh = $segmentsFile->readInt();
$delGenLow = $segmentsFile->readInt();
$delGen = $segmentsFile->readLong();
if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
$docStoreOffset = $segmentsFile->readInt();
if ($docStoreOffset != -1) {
if ($docStoreOffset != (int)0xFFFFFFFF) {
$docStoreSegment = $segmentsFile->readString();
$docStoreIsCompoundFile = $segmentsFile->readByte();
@ -525,8 +509,6 @@ class Zend_Search_Lucene_Index_Writer
if (!in_array($segName, $this->_segmentsToDelete)) {
// Load segment if necessary
if (!isset($this->_segmentInfos[$segName])) {
$delGen = $delGenHigh * ((double)0xFFFFFFFF + 1) +
(($delGenLow < 0)? (double)0xFFFFFFFF - (-1 - $delGenLow) : $delGenLow);
if ($isCompoundByte == 0xFF) {
// The segment is not a compound file
$isCompound = false;
@ -549,19 +531,11 @@ class Zend_Search_Lucene_Index_Writer
} else {
// Retrieve actual deletions file generation number
$delGen = $this->_segmentInfos[$segName]->getDelGen();
if ($delGen >= 0) {
$delGenHigh = (int)($delGen/((double)0xFFFFFFFF + 1));
$delGenLow =(int)($delGen & 0xFFFFFFFF);
} else {
$delGenHigh = $delGenLow = (int)0xFFFFFFFF;
}
}
$newSegmentFile->writeString($segName);
$newSegmentFile->writeInt($segSize);
$newSegmentFile->writeInt($delGenHigh);
$newSegmentFile->writeInt($delGenLow);
$newSegmentFile->writeLong($delGen);
if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
if ($docStoreOptions !== null) {
$newSegmentFile->writeInt($docStoreOffset);

View File

@ -18,6 +18,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_TermsStream_Interface */
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
/**
* @category Zend
@ -25,7 +28,7 @@
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Search_Lucene_Interface
interface Zend_Search_Lucene_Interface extends Zend_Search_Lucene_Index_TermsStream_Interface
{
/**
* Get current generation number
@ -376,43 +379,6 @@ interface Zend_Search_Lucene_Interface
*/
public function terms();
/**
* Reset terms stream.
*/
public function resetTermsStream();
/**
* Skip terms stream up to specified term preffix.
*
* Prefix contains fully specified field info and portion of searched term
*
* @param Zend_Search_Lucene_Index_Term $prefix
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix);
/**
* Scans terms dictionary and returns next term
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function nextTerm();
/**
* Returns term in current position
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function currentTerm();
/**
* Close terms stream
*
* Should be used for resources clean up if stream is not read up to the end
*/
public function closeTermsStream();
/**
* Undeletes all documents currently marked as deleted in this index.
*/

View File

@ -18,18 +18,12 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Storage_Directory */
require_once 'Zend/Search/Lucene/Storage/Directory.php';
/** Zend_Search_Lucene_Storage_File */
require_once 'Zend/Search/Lucene/Storage/File.php';
/**
* This is an utility class which provides index locks processing functionality
*
@ -59,6 +53,7 @@ class Zend_Search_Lucene_LockManager
{
$lock = $lockDirectory->createFile(self::WRITE_LOCK_FILE);
if (!$lock->lock(LOCK_EX)) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive index lock');
}
return $lock;
@ -99,11 +94,11 @@ class Zend_Search_Lucene_LockManager
* @return Zend_Search_Lucene_Storage_File
* @throws Zend_Search_Lucene_Exception
*/
private static function _startReadLockProcessing(Zend_Search_Lucene_Storage_Directory $lockDirectory)
{
$lock = $lockDirectory->createFile(self::READ_LOCK_PROCESSING_LOCK_FILE);
if (!$lock->lock(LOCK_EX)) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Can\'t obtain exclusive lock for the read lock processing file');
}
return $lock;
@ -137,7 +132,7 @@ class Zend_Search_Lucene_LockManager
{
$lock = $lockDirectory->createFile(self::READ_LOCK_FILE);
if (!$lock->lock(LOCK_SH)) {
self::_stopReadLockProcessing($lockDirectory);
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Can\'t obtain shared reading index lock');
}
return $lock;

View File

@ -0,0 +1,962 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_TermStreamsPriorityQueue */
require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
/** Zend_Search_Lucene_Interface */
require_once 'Zend/Search/Lucene/Interface.php';
/**
* Multisearcher allows to search through several independent indexes.
*
* @category Zend
* @package Zend_Search_Lucene
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface
{
/**
* List of indices for searching.
* Array of Zend_Search_Lucene_Interface objects
*
* @var array
*/
protected $_indices;
/**
* Object constructor.
*
* @param array $indices Arrays of indices for search
* @throws Zend_Search_Lucene_Exception
*/
public function __construct($indices = array())
{
$this->_indices = $indices;
foreach ($this->_indices as $index) {
if (!$index instanceof Zend_Search_Lucene_Interface) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.');
}
}
}
/**
* Add index for searching.
*
* @param Zend_Search_Lucene_Interface $index
*/
public function addIndex(Zend_Search_Lucene_Interface $index)
{
$this->_indices[] = $index;
}
/**
* Get current generation number
*
* Returns generation number
* 0 means pre-2.1 index format
* -1 means there are no segments files.
*
* @param Zend_Search_Lucene_Storage_Directory $directory
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher");
}
/**
* Get segments file name
*
* @param integer $generation
* @return string
*/
public static function getSegmentFileName($generation)
{
return Zend_Search_Lucene::getSegmentFileName($generation);
}
/**
* Get index format version
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function getFormatVersion()
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher");
}
/**
* Set index format version.
* Index is converted to this format at the nearest upfdate time
*
* @param int $formatVersion
*/
public function setFormatVersion($formatVersion)
{
foreach ($this->_indices as $index) {
$index->setFormatVersion($formatVersion);
}
}
/**
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
*
* @return Zend_Search_Lucene_Storage_Directory
*/
public function getDirectory()
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher");
}
/**
* Returns the total number of documents in this index (including deleted documents).
*
* @return integer
*/
public function count()
{
$count = 0;
foreach ($this->_indices as $index) {
$count += $this->_indices->count();
}
return $count;
}
/**
* Returns one greater than the largest possible document number.
* This may be used to, e.g., determine how big to allocate a structure which will have
* an element for every document number in an index.
*
* @return integer
*/
public function maxDoc()
{
return $this->count();
}
/**
* Returns the total number of non-deleted documents in this index.
*
* @return integer
*/
public function numDocs()
{
$docs = 0;
foreach ($this->_indices as $index) {
$docs += $this->_indices->numDocs();
}
return $docs;
}
/**
* Checks, that document is deleted
*
* @param integer $id
* @return boolean
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
*/
public function isDeleted($id)
{
foreach ($this->_indices as $index) {
$indexCount = $index->count();
if ($indexCount > $id) {
return $index->isDeleted($id);
}
$id -= $indexCount;
}
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
/**
* Set default search field.
*
* Null means, that search is performed through all fields by default
*
* Default value is null
*
* @param string $fieldName
*/
public static function setDefaultSearchField($fieldName)
{
foreach ($this->_indices as $index) {
$index->setDefaultSearchField($fieldName);
}
}
/**
* Get default search field.
*
* Null means, that search is performed through all fields by default
*
* @return string
* @throws Zend_Search_Lucene_Exception
*/
public static function getDefaultSearchField()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$defaultSearchField = reset($this->_indices)->getDefaultSearchField();
foreach ($this->_indices as $index) {
if ($index->getDefaultSearchField() !== $defaultSearchField) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
}
}
return $defaultSearchField;
}
/**
* Set result set limit.
*
* 0 (default) means no limit
*
* @param integer $limit
*/
public static function setResultSetLimit($limit)
{
foreach ($this->_indices as $index) {
$index->setResultSetLimit($limit);
}
}
/**
* Set result set limit.
*
* 0 means no limit
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public static function getResultSetLimit()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$defaultResultSetLimit = reset($this->_indices)->getResultSetLimit();
foreach ($this->_indices as $index) {
if ($index->getResultSetLimit() !== $defaultResultSetLimit) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
}
}
return $defaultResultSetLimit;
}
/**
* Retrieve index maxBufferedDocs option
*
* maxBufferedDocs is a minimal number of documents required before
* the buffered in-memory documents are written into a new Segment
*
* Default value is 10
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function getMaxBufferedDocs()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs();
foreach ($this->_indices as $index) {
if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
}
}
return $maxBufferedDocs;
}
/**
* Set index maxBufferedDocs option
*
* maxBufferedDocs is a minimal number of documents required before
* the buffered in-memory documents are written into a new Segment
*
* Default value is 10
*
* @param integer $maxBufferedDocs
*/
public function setMaxBufferedDocs($maxBufferedDocs)
{
foreach ($this->_indices as $index) {
$index->setMaxBufferedDocs($maxBufferedDocs);
}
}
/**
* Retrieve index maxMergeDocs option
*
* maxMergeDocs is a largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* Default value is PHP_INT_MAX
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function getMaxMergeDocs()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$maxMergeDocs = reset($this->_indices)->getMaxMergeDocs();
foreach ($this->_indices as $index) {
if ($index->getMaxMergeDocs() !== $maxMergeDocs) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
}
}
return $maxMergeDocs;
}
/**
* Set index maxMergeDocs option
*
* maxMergeDocs is a largest number of documents ever merged by addDocument().
* Small values (e.g., less than 10,000) are best for interactive indexing,
* as this limits the length of pauses while indexing to a few seconds.
* Larger values are best for batched indexing and speedier searches.
*
* Default value is PHP_INT_MAX
*
* @param integer $maxMergeDocs
*/
public function setMaxMergeDocs($maxMergeDocs)
{
foreach ($this->_indices as $index) {
$index->setMaxMergeDocs($maxMergeDocs);
}
}
/**
* Retrieve index mergeFactor option
*
* mergeFactor determines how often segment indices are merged by addDocument().
* With smaller values, less RAM is used while indexing,
* and searches on unoptimized indices are faster,
* but indexing speed is slower.
* With larger values, more RAM is used during indexing,
* and while searches on unoptimized indices are slower,
* indexing is faster.
* Thus larger values (> 10) are best for batch index creation,
* and smaller values (< 10) for indices that are interactively maintained.
*
* Default value is 10
*
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function getMergeFactor()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$mergeFactor = reset($this->_indices)->getMergeFactor();
foreach ($this->_indices as $index) {
if ($index->getMergeFactor() !== $mergeFactor) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different default search field.');
}
}
return $mergeFactor;
}
/**
* Set index mergeFactor option
*
* mergeFactor determines how often segment indices are merged by addDocument().
* With smaller values, less RAM is used while indexing,
* and searches on unoptimized indices are faster,
* but indexing speed is slower.
* With larger values, more RAM is used during indexing,
* and while searches on unoptimized indices are slower,
* indexing is faster.
* Thus larger values (> 10) are best for batch index creation,
* and smaller values (< 10) for indices that are interactively maintained.
*
* Default value is 10
*
* @param integer $maxMergeDocs
*/
public function setMergeFactor($mergeFactor)
{
foreach ($this->_indices as $index) {
$index->setMaxMergeDocs($maxMergeDocs);
}
}
/**
* Performs a query against the index and returns an array
* of Zend_Search_Lucene_Search_QueryHit objects.
* Input is a string or Zend_Search_Lucene_Search_Query.
*
* @param mixed $query
* @return array Zend_Search_Lucene_Search_QueryHit
* @throws Zend_Search_Lucene_Exception
*/
public function find($query)
{
$hitsList = array();
$indexShift = 0;
foreach ($this->_indices as $index) {
$hits = $index->find($query);
if ($indexShift != 0) {
foreach ($hits as $hit) {
$hit->id += $indexShift;
}
}
$indexShift += $index->count();
$hitsList[] = $hits;
}
/** @todo Implement advanced sorting */
return call_user_func_array('array_merge', $hitsList);
}
/**
* Returns a list of all unique field names that exist in this index.
*
* @param boolean $indexed
* @return array
*/
public function getFieldNames($indexed = false)
{
$fieldNamesList = array();
foreach ($this->_indices as $index) {
$fieldNamesList[] = $index->getFieldNames($indexed);
}
return array_unique(call_user_func_array('array_merge', $fieldNamesList));
}
/**
* Returns a Zend_Search_Lucene_Document object for the document
* number $id in this index.
*
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
* @return Zend_Search_Lucene_Document
* @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range
*/
public function getDocument($id)
{
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
/* @var $id Zend_Search_Lucene_Search_QueryHit */
$id = $id->id;
}
foreach ($this->_indices as $index) {
$indexCount = $index->count();
if ($indexCount > $id) {
return $index->getDocument($id);
}
$id -= $indexCount;
}
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
/**
* Returns true if index contain documents with specified term.
*
* Is used for query optimization.
*
* @param Zend_Search_Lucene_Index_Term $term
* @return boolean
*/
public function hasTerm(Zend_Search_Lucene_Index_Term $term)
{
foreach ($this->_indices as $index) {
if ($index->hasTerm($term)) {
return true;
}
}
return false;
}
/**
* Returns IDs of all the documents containing term.
*
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
{
if ($docsFilter != null) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
}
$docsList = array();
$indexShift = 0;
foreach ($this->_indices as $index) {
$docs = $index->termDocs($term);
if ($indexShift != 0) {
foreach ($docs as $id => $docId) {
$docs[$id] += $indexShift;
}
}
$indexShift += $index->count();
$docsList[] = $docs;
}
return call_user_func_array('array_merge', $docsList);
}
/**
* Returns documents filter for all documents containing term.
*
* It performs the same operation as termDocs, but return result as
* Zend_Search_Lucene_Index_DocsFilter object
*
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @return Zend_Search_Lucene_Index_DocsFilter
* @throws Zend_Search_Lucene_Exception
*/
public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
}
/**
* Returns an array of all term freqs.
* Return array structure: array( docId => freq, ...)
*
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @return integer
* @throws Zend_Search_Lucene_Exception
*/
public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
{
if ($docsFilter != null) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
}
$freqsList = array();
$indexShift = 0;
foreach ($this->_indices as $index) {
$freqs = $index->termFreqs($term);
if ($indexShift != 0) {
$freqsShifted = array();
foreach ($freqs as $docId => $freq) {
$freqsShifted[$docId + $indexShift] = $freq;
}
$freqs = $freqsShifted;
}
$indexShift += $index->count();
$freqsList[] = $freqs;
}
return call_user_func_array('array_merge', $freqsList);
}
/**
* Returns an array of all term positions in the documents.
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
*
* @param Zend_Search_Lucene_Index_Term $term
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
* @return array
* @throws Zend_Search_Lucene_Exception
*/
public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
{
if ($docsFilter != null) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher');
}
$termPositionsList = array();
$indexShift = 0;
foreach ($this->_indices as $index) {
$termPositions = $index->termPositions($term);
if ($indexShift != 0) {
$termPositionsShifted = array();
foreach ($termPositions as $docId => $positions) {
$termPositions[$docId + $indexShift] = $positions;
}
$termPositions = $termPositionsShifted;
}
$indexShift += $index->count();
$termPositionsList[] = $termPositions;
}
return call_user_func_array('array_merge', $termPositions);
}
/**
* Returns the number of documents in this index containing the $term.
*
* @param Zend_Search_Lucene_Index_Term $term
* @return integer
*/
public function docFreq(Zend_Search_Lucene_Index_Term $term)
{
$docFreq = 0;
foreach ($this->_indices as $index) {
$docFreq += $index->docFreq($term);
}
return $docFreq;
}
/**
* Retrive similarity used by index reader
*
* @return Zend_Search_Lucene_Search_Similarity
* @throws Zend_Search_Lucene_Exception
*/
public function getSimilarity()
{
if (count($this->_indices) == 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices list is empty');
}
$similarity = reset($this->_indices)->getSimilarity();
foreach ($this->_indices as $index) {
if ($index->getSimilarity() !== $similarity) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Indices have different similarity.');
}
}
return $similarity;
}
/**
* Returns a normalization factor for "field, document" pair.
*
* @param integer $id
* @param string $fieldName
* @return float
*/
public function norm($id, $fieldName)
{
foreach ($this->_indices as $index) {
$indexCount = $index->count();
if ($indexCount > $id) {
return $index->norm($id, $fieldName);
}
$id -= $indexCount;
}
return null;
}
/**
* Returns true if any documents have been deleted from this index.
*
* @return boolean
*/
public function hasDeletions()
{
foreach ($this->_indices as $index) {
if ($index->hasDeletions()) {
return true;
}
}
return false;
}
/**
* Deletes a document from the index.
* $id is an internal document id
*
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
* @throws Zend_Search_Lucene_Exception
*/
public function delete($id)
{
foreach ($this->_indices as $index) {
$indexCount = $index->count();
if ($indexCount > $id) {
$index->delete($id);
return;
}
$id -= $indexCount;
}
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
}
/**
* Callback used to choose target index for new documents
*
* Function/method signature:
* Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices);
*
* null means "default documents distributing algorithm"
*
* @var callback
*/
protected $_documentDistributorCallBack = null;
/**
* Set callback for choosing target index.
*
* @param callback $callback
*/
public function setDocumentDistributorCallback($callback)
{
if ($callback !== null && !is_callable($callback))
$this->_documentDistributorCallBack = $callback;
}
/**
* Get callback for choosing target index.
*
* @return callback
*/
public function getDocumentDistributorCallback()
{
return $this->_documentDistributorCallBack;
}
/**
* Adds a document to this index.
*
* @param Zend_Search_Lucene_Document $document
* @throws Zend_Search_Lucene_Exception
*/
public function addDocument(Zend_Search_Lucene_Document $document)
{
if ($this->_documentDistributorCallBack !== null) {
$index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices);
} else {
$index = $this->_indices[ array_rand($this->_indices) ];
}
$index->addDocument($document);
}
/**
* Commit changes resulting from delete() or undeleteAll() operations.
*/
public function commit()
{
foreach ($this->_indices as $index) {
$index->commit();
}
}
/**
* Optimize index.
*
* Merges all segments into one
*/
public function optimize()
{
foreach ($this->_indices as $index) {
$index->_optimise();
}
}
/**
* Returns an array of all terms in this index.
*
* @return array
*/
public function terms()
{
$termsList = array();
foreach ($this->_indices as $index) {
$termsList[] = $index->terms();
}
return array_unique(call_user_func_array('array_merge', $termsList));
}
/**
* Terms stream priority queue object
*
* @var Zend_Search_Lucene_TermStreamsPriorityQueue
*/
private $_termsStream = null;
/**
* Reset terms stream.
*/
public function resetTermsStream()
{
if ($this->_termsStream === null) {
$this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices);
} else {
$this->_termsStream->resetTermsStream();
}
}
/**
* Skip terms stream up to specified term preffix.
*
* Prefix contains fully specified field info and portion of searched term
*
* @param Zend_Search_Lucene_Index_Term $prefix
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
{
$this->_termsStream->skipTo($prefix);
}
/**
* Scans terms dictionary and returns next term
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function nextTerm()
{
return $this->_termsStream->nextTerm();
}
/**
* Returns term in current position
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function currentTerm()
{
return $this->_termsStream->currentTerm();
}
/**
* Close terms stream
*
* Should be used for resources clean up if stream is not read up to the end
*/
public function closeTermsStream()
{
$this->_termsStream->closeTermsStream();
$this->_termsStream = null;
}
/**
* Undeletes all documents currently marked as deleted in this index.
*/
public function undeleteAll()
{
foreach ($this->_indices as $index) {
$index->undeleteAll();
}
}
/**
* Add reference to the index object
*
* @internal
*/
public function addReference()
{
// Do nothing, since it's never referenced by indices
}
/**
* Remove reference from the index object
*
* When reference count becomes zero, index is closed and resources are cleaned up
*
* @internal
*/
public function removeReference()
{
// Do nothing, since it's never referenced by indices
}
}

View File

@ -29,10 +29,6 @@ require_once 'Zend/Search/Lucene/Search/QueryToken.php';
/** Zend_Search_Lucene_Search_QueryParser */
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -203,6 +199,7 @@ class Zend_Search_Lucene_Search_BooleanExpressionRecognizer extends Zend_Search_
public function finishExpression()
{
if ($this->getState() != self::ST_LITERAL) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Literal expected.');
}

View File

@ -0,0 +1,93 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Highlighter_Interface */
require_once 'Zend/Search/Lucene/Search/Highlighter/Interface.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Highlighter_Default implements Zend_Search_Lucene_Search_Highlighter_Interface
{
/**
* List of colors for text highlighting
*
* @var array
*/
protected $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
'#ff8888', '#88ff88', '#8888ff',
'#88dddd', '#dd88dd', '#dddd88',
'#aaddff', '#aaffdd', '#ddaaff',
'#ddffaa', '#ffaadd', '#ffddaa');
/**
* Index of current color for highlighting
*
* Index is increased at each highlight() call, so terms matching different queries are highlighted using different colors.
*
* @var integer
*/
protected $_currentColorIndex = 0;
/**
* HTML document for highlighting
*
* @var Zend_Search_Lucene_Document_Html
*/
protected $_doc;
/**
* Set document for highlighting.
*
* @param Zend_Search_Lucene_Document_Html $document
*/
public function setDocument(Zend_Search_Lucene_Document_Html $document)
{
$this->_doc = $document;
}
/**
* Get document for highlighting.
*
* @return Zend_Search_Lucene_Document_Html $document
*/
public function getDocument()
{
return $this->_doc;
}
/**
* Highlight specified words
*
* @param string|array $words Words to highlight. They could be organized using the array or string.
*/
public function highlight($words)
{
$color = $this->_highlightColors[$this->_currentColorIndex];
$this->_currentColorIndex = ($this->_currentColorIndex + 1) % count($this->_highlightColors);
$this->_doc->highlight($words, $color);
}
}

View File

@ -0,0 +1,52 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
interface Zend_Search_Lucene_Search_Highlighter_Interface
{
/**
* Set document for highlighting.
*
* @param Zend_Search_Lucene_Document_Html $document
*/
public function setDocument(Zend_Search_Lucene_Document_Html $document);
/**
* Get document for highlighting.
*
* @return Zend_Search_Lucene_Document_Html $document
*/
public function getDocument();
/**
* Highlight specified words (method is invoked once per subquery)
*
* @param string|array $words Words to highlight. They could be organized using the array or string.
*/
public function highlight($words);
}

View File

@ -25,6 +25,9 @@ require_once 'Zend/Search/Lucene/Document/Html.php';
/** Zend_Search_Lucene_Index_DocsFilter */
require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
/** Zend_Search_Lucene_Search_Highlighter_Default */
require_once 'Zend/Search/Lucene/Search/Highlighter/Default.php';
/**
* @category Zend
@ -35,7 +38,6 @@ require_once 'Zend/Search/Lucene/Index/DocsFilter.php';
*/
abstract class Zend_Search_Lucene_Search_Query
{
/**
* query boost factor
*
@ -57,17 +59,6 @@ abstract class Zend_Search_Lucene_Search_Query
*/
private $_currentColorIndex = 0;
/**
* List of colors for text highlighting
*
* @var array
*/
private $_highlightColors = array('#66ffff', '#ff66ff', '#ffff66',
'#ff8888', '#88ff88', '#8888ff',
'#88dddd', '#dd88dd', '#dddd88',
'#aaddff', '#aaffdd', '#ddaaff', '#ddffaa', '#ffaadd', '#ffddaa');
/**
* Gets the boost for this clause. Documents matching
* this clause will (in addition to the normal weightings) have their score
@ -186,42 +177,57 @@ abstract class Zend_Search_Lucene_Search_Query
abstract public function getQueryTerms();
/**
* Get highlight color and shift to next
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @return string
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _getHighlightColor(&$colorIndex)
{
$color = $this->_highlightColors[$colorIndex++];
$colorIndex %= count($this->_highlightColors);
return $color;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
abstract public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex);
abstract protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter);
/**
* Highlight matches in $inputHTML
*
* @param string $inputHTML
* @param string $defaultEncoding HTML encoding, is used if it's not specified using Content-type HTTP-EQUIV meta tag.
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
* @return string
*/
public function highlightMatches($inputHTML)
public function highlightMatches($inputHTML, $defaultEncoding = '', $highlighter = null)
{
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
if ($highlighter === null) {
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
}
$colorIndex = 0;
$this->highlightMatchesDOM($doc, $colorIndex);
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML, false, $defaultEncoding);
$highlighter->setDocument($doc);
$this->_highlightMatches($highlighter);
return $doc->getHTML();
}
/**
* Highlight matches in $inputHtmlFragment and return it (without HTML header and body tag)
*
* @param string $inputHtmlFragment
* @param string $encoding Input HTML string encoding
* @param Zend_Search_Lucene_Search_Highlighter_Interface|null $highlighter
* @return string
*/
public function htmlFragmentHighlightMatches($inputHtmlFragment, $encoding = 'UTF-8', $highlighter = null)
{
if ($highlighter === null) {
$highlighter = new Zend_Search_Lucene_Search_Highlighter_Default();
}
$inputHTML = '<html><head><META HTTP-EQUIV="Content-type" CONTENT="text/html; charset=UTF-8"/></head><body>'
. iconv($encoding, 'UTF-8//IGNORE', $inputHtmlFragment) . '</body></html>';
$doc = Zend_Search_Lucene_Document_Html::loadHTML($inputHTML);
$highlighter->setDocument($doc);
$this->_highlightMatches($highlighter);
return $doc->getHtmlBody();
}
}

View File

@ -757,16 +757,15 @@ class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
foreach ($this->_subqueries as $id => $subquery) {
if ($this->_signs === null || $this->_signs[$id] !== false) {
$subquery->highlightMatchesDOM($doc, $colorIndex);
$subquery->_highlightMatches($highlighter);
}
}
}
@ -794,10 +793,10 @@ class Zend_Search_Lucene_Search_Query_Boolean extends Zend_Search_Lucene_Search_
}
$query .= '(' . $subquery->__toString() . ')';
}
if ($subquery->getBoost() != 1) {
$query .= '^' . round($subquery->getBoost(), 4);
}
if ($this->getBoost() != 1) {
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
}
return $query;

View File

@ -117,12 +117,11 @@ class Zend_Search_Lucene_Search_Query_Empty extends Zend_Search_Lucene_Search_Qu
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
// Do nothing
}

View File

@ -106,27 +106,59 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
private $_termKeys = null;
/**
* Default non-fuzzy prefix length
*
* @var integer
*/
private static $_defaultPrefixLength = 3;
/**
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
*
* @param Zend_Search_Lucene_Index_Term $pattern
* @param Zend_Search_Lucene_Index_Term $term
* @param float $minimumSimilarity
* @param integer $prefixLength
* @throws Zend_Search_Lucene_Exception
*/
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = 0)
public function __construct(Zend_Search_Lucene_Index_Term $term, $minimumSimilarity = self::DEFAULT_MIN_SIMILARITY, $prefixLength = null)
{
if ($minimumSimilarity < 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be less than 0');
}
if ($minimumSimilarity >= 1) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('minimumSimilarity cannot be greater than or equal to 1');
}
if ($prefixLength < 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('prefixLength cannot be less than 0');
}
$this->_term = $term;
$this->_minimumSimilarity = $minimumSimilarity;
$this->_prefixLength = $prefixLength;
$this->_prefixLength = ($prefixLength !== null)? $prefixLength : self::$_defaultPrefixLength;
}
/**
* Get default non-fuzzy prefix length
*
* @return integer
*/
public static function getDefaultPrefixLength()
{
return self::$_defaultPrefixLength;
}
/**
* Set default non-fuzzy prefix length
*
* @param integer $defaultPrefixLength
*/
public static function setDefaultPrefixLength($defaultPrefixLength)
{
self::$_defaultPrefixLength = $defaultPrefixLength;
}
/**
@ -148,6 +180,7 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Exception
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
@ -174,6 +207,7 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
foreach ($fields as $field) {
$index->resetTermsStream();
@ -212,6 +246,11 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
}
$index->nextTerm();
@ -243,6 +282,11 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
$this->_matches[] = $index->currentTerm();
$this->_termKeys[] = $index->currentTerm()->key();
$this->_scores[] = ($similarity - $this->_minimumSimilarity)*$scaleFactor;
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
}
$index->nextTerm();
@ -288,7 +332,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
}
/**
@ -300,7 +345,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
public function getQueryTerms()
{
if ($this->_matches === null) {
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
}
return $this->_matches;
@ -315,7 +361,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
}
@ -329,7 +376,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
}
/**
@ -342,7 +390,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
public function matchedDocs()
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
}
/**
@ -355,24 +404,70 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
throw new Zend_Search_Lucene_Exception('Wildcard query should not be directly used for search. Use $query->rewrite($index)');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Fuzzy query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
foreach ($this->_matches as $term) {
$words[] = $term->text;
$prefix = Zend_Search_Lucene_Index_Term::getPrefix($this->_term->text, $this->_prefixLength);
$prefixByteLength = strlen($prefix);
$prefixUtf8Length = Zend_Search_Lucene_Index_Term::getLength($prefix);
$termLength = Zend_Search_Lucene_Index_Term::getLength($this->_term->text);
$termRest = substr($this->_term->text, $prefixByteLength);
// we calculate length of the rest in bytes since levenshtein() is not UTF-8 compatible
$termRestLength = strlen($termRest);
$scaleFactor = 1/(1 - $this->_minimumSimilarity);
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
foreach ($tokens as $token) {
$termText = $token->getTermText();
if (substr($termText, 0, $prefixByteLength) == $prefix) {
// Calculate similarity
$target = substr($termText, $prefixByteLength);
$maxDistance = isset($this->_maxDistances[strlen($target)])?
$this->_maxDistances[strlen($target)] :
$this->_calculateMaxDistance($prefixUtf8Length, $termRestLength, strlen($target));
if ($termRestLength == 0) {
// we don't have anything to compare. That means if we just add
// the letters for current term we get the new word
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - strlen($target)/$prefixUtf8Length);
} else if (strlen($target) == 0) {
$similarity = (($prefixUtf8Length == 0)? 0 : 1 - $termRestLength/$prefixUtf8Length);
} else if ($maxDistance < abs($termRestLength - strlen($target))){
//just adding the characters of term to target or vice-versa results in too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisesly abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
$similarity = 0;
} else {
$similarity = 1 - levenshtein($termRest, $target)/($prefixUtf8Length + min($termRestLength, strlen($target)));
}
if ($similarity > $this->_minimumSimilarity) {
$words[] = $termText;
}
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($words);
}
/**
@ -385,7 +480,8 @@ class Zend_Search_Lucene_Search_Query_Fuzzy extends Zend_Search_Lucene_Search_Qu
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_term->field === null)? '' : $this->_term->field . ':')
. $this->_term->text . '~'
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '');
. (($this->_minimumSimilarity != self::DEFAULT_MIN_SIMILARITY)? round($this->_minimumSimilarity, 4) : '')
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
}
}

View File

@ -118,12 +118,11 @@ class Zend_Search_Lucene_Search_Query_Insignificant extends Zend_Search_Lucene_S
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
// Do nothing
}

View File

@ -103,10 +103,15 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
*
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
* @param array $signs Array of signs. Sign is boolean|null.
* @throws Zend_Search_Lucene_Exception
*/
public function __construct($terms = null, $signs = null)
{
if (is_array($terms)) {
if (count($terms) > Zend_Search_Lucene::getTermsPerQueryLimit()) {
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
$this->_terms = $terms;
$this->_signs = null;
@ -594,12 +599,11 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
@ -615,7 +619,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($words);
}
/**
@ -647,7 +651,7 @@ class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Searc
}
if ($this->getBoost() != 1) {
$query = '(' . $query . ')^' . $this->getBoost();
$query = '(' . $query . ')^' . round($this->getBoost(), 4);
}
return $query;

View File

@ -26,7 +26,7 @@
require_once 'Zend/Search/Lucene/Search/Query.php';
/**
* Zend_Search_Lucene_Search_Weight_MultiTerm
* Zend_Search_Lucene_Search_Weight_Phrase
*/
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
@ -517,19 +517,18 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
foreach ($this->_terms as $term) {
$words[] = $term->text;
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($words);
}
/**
@ -540,11 +539,10 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
$query = '';
if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
$query .= $this->_terms[0]->field . ':';
$query = $this->_terms[0]->field . ':';
} else {
$query = '';
}
$query .= '"';
@ -562,6 +560,10 @@ class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Q
$query .= '~' . $this->_slop;
}
if ($this->getBoost() != 1) {
$query .= '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -0,0 +1,133 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/**
* Zend_Search_Lucene_Search_Query
*/
require_once 'Zend/Search/Lucene/Search/Query.php';
/**
* Zend_Search_Lucene_Search_Weight
*/
require_once 'Zend/Search/Lucene/Search/Weight.php';
/**
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
* This type of query is not actually involved into query execution.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @internal
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
abstract class Zend_Search_Lucene_Search_Query_Preprocessing extends Zend_Search_Lucene_Search_Query
{
/**
* Matched terms.
*
* Matched terms list.
* It's filled during rewrite operation and may be used for search result highlighting
*
* Array of Zend_Search_Lucene_Index_Term objects
*
* @var array
*/
protected $_matches = null;
/**
* Optimize query in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
}
/**
* Constructs an appropriate Weight implementation for this query.
*
* @param Zend_Search_Lucene_Interface $reader
* @return Zend_Search_Lucene_Search_Weight
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
}
/**
* Execute query in context of index reader
* It also initializes necessary internal structures
*
* @param Zend_Search_Lucene_Interface $reader
* @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
}
/**
* Get document ids likely matching the query
*
* It's an array with document ids as keys (performance considerations)
*
* @return array
*/
public function matchedDocs()
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
}
/**
* Score specified document
*
* @param integer $docId
* @param Zend_Search_Lucene_Interface $reader
* @return float
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('This query is not intended to be executed.');
}
/**
* Return query terms
*
* @return array
*/
public function getQueryTerms()
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Rewrite operation has to be done before retrieving query terms.');
}
}

View File

@ -0,0 +1,286 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query_Processing */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Insignificant */
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
/** Zend_Search_Lucene_Search_Query_Empty */
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
/** Zend_Search_Lucene_Search_Query_Term */
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/**
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
* This type of query is not actually involved into query execution.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @internal
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy extends Zend_Search_Lucene_Search_Query_Preprocessing
{
/**
* word (query parser lexeme) to find.
*
* @var string
*/
private $_word;
/**
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
*
* @var string
*/
private $_encoding;
/**
* Field name.
*
* @var string
*/
private $_field;
/**
* A value between 0 and 1 to set the required similarity
* between the query term and the matching terms. For example, for a
* _minimumSimilarity of 0.5 a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than length(term)*0.5
*
* @var float
*/
private $_minimumSimilarity;
/**
* Class constructor. Create a new preprocessing object for prase query.
*
* @param string $word Non-tokenized word (query parser lexeme) to search.
* @param string $encoding Word encoding.
* @param string $fieldName Field name.
* @param float $minimumSimilarity minimum similarity
*/
public function __construct($word, $encoding, $fieldName, $minimumSimilarity)
{
$this->_word = $word;
$this->_encoding = $encoding;
$this->_field = $fieldName;
$this->_minimumSimilarity = $minimumSimilarity;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if ($this->_field === null) {
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$hasInsignificantSubqueries = false;
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
$searchFields = $index->getFieldNames(true);
} else {
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
}
foreach ($searchFields as $fieldName) {
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_word,
$this->_encoding,
$fieldName,
$this->_minimumSimilarity);
$rewrittenSubquery = $subquery->rewrite($index);
if ( !($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant ||
$rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Empty) ) {
$query->addSubquery($rewrittenSubquery);
}
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
$hasInsignificantSubqueries = true;
}
}
$subqueries = $query->getSubqueries();
if (count($subqueries) == 0) {
$this->_matches = array();
if ($hasInsignificantSubqueries) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
} else {
return new Zend_Search_Lucene_Search_Query_Empty();
}
}
if (count($subqueries) == 1) {
$query = reset($subqueries);
}
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
// -------------------------------------
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
// encoding is not used since we expect binary matching
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
if ($index->hasTerm($term)) {
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
$query->setBoost($this->getBoost());
// Get rewritten query. Important! It also fills terms matching container.
$rewrittenQuery = $query->rewrite($index);
$this->_matches = $query->getQueryTerms();
return $rewrittenQuery;
}
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\pL/u', 'a') == 1) {
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
} else {
$subPatterns = preg_split('/[*?]/', $this->_word);
}
if (count($subPatterns) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search doesn\'t support wildcards (except within Keyword fields).');
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
$this->_matches = array();
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
$query->setBoost($this->getBoost());
// Get rewritten query. Important! It also fills terms matching container.
$rewrittenQuery = $query->rewrite($index);
$this->_matches = $query->getQueryTerms();
return $rewrittenQuery;
}
// Word is tokenized into several tokens
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
}
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\pL/u', 'a') == 1) {
$subPatterns = preg_split('/[*?]/u', iconv($this->_encoding, 'UTF-8', $this->_word));
} else {
$subPatterns = preg_split('/[*?]/', $this->_word);
}
if (count($subPatterns) > 1) {
// Do nothing
return;
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_minimumSimilarity);
$query->_highlightMatches($highlighter);
return;
}
// Word is tokenized into several tokens
// But fuzzy search is supported only for non-multiple word terms
// Do nothing
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
if ($this->_field !== null) {
$query = $this->_field . ':';
} else {
$query = '';
}
$query .= $this->_word;
if ($this->getBoost() != 1) {
$query .= '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -0,0 +1,273 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query_Processing */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Insignificant */
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
/** Zend_Search_Lucene_Search_Query_Empty */
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
/** Zend_Search_Lucene_Search_Query_Term */
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/**
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
* This type of query is not actually involved into query execution.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @internal
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Preprocessing_Phrase extends Zend_Search_Lucene_Search_Query_Preprocessing
{
/**
* Phrase to find.
*
* @var string
*/
private $_phrase;
/**
* Phrase encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
*
* @var string
*/
private $_phraseEncoding;
/**
* Field name.
*
* @var string
*/
private $_field;
/**
* Sets the number of other words permitted between words in query phrase.
* If zero, then this is an exact phrase search. For larger values this works
* like a WITHIN or NEAR operator.
*
* The slop is in fact an edit-distance, where the units correspond to
* moves of terms in the query phrase out of position. For example, to switch
* the order of two words requires two moves (the first move places the words
* atop one another), so to permit re-orderings of phrases, the slop must be
* at least two.
* More exact matches are scored higher than sloppier matches, thus search
* results are sorted by exactness.
*
* The slop is zero by default, requiring exact matches.
*
* @var integer
*/
private $_slop;
/**
* Class constructor. Create a new preprocessing object for prase query.
*
* @param string $phrase Phrase to search.
* @param string $phraseEncoding Phrase encoding.
* @param string $fieldName Field name.
*/
public function __construct($phrase, $phraseEncoding, $fieldName)
{
$this->_phrase = $phrase;
$this->_phraseEncoding = $phraseEncoding;
$this->_field = $fieldName;
}
/**
* Set slop
*
* @param integer $slop
*/
public function setSlop($slop)
{
$this->_slop = $slop;
}
/**
* Get slop
*
* @return integer
*/
public function getSlop()
{
return $this->_slop;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
// Allow to use wildcards within phrases
// They are either removed by text analyzer or used as a part of keyword for keyword fields
//
// if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
// require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
// throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
// }
// Split query into subqueries if field name is not specified
if ($this->_field === null) {
$query = new Zend_Search_Lucene_Search_Query_Boolean();
$query->setBoost($this->getBoost());
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
$searchFields = $index->getFieldNames(true);
} else {
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
}
foreach ($searchFields as $fieldName) {
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
$this->_phraseEncoding,
$fieldName);
$subquery->setSlop($this->getSlop());
$query->addSubquery($subquery->rewrite($index));
}
$this->_matches = $query->getQueryTerms();
return $query;
}
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
// encoding is not used since we expect binary matching
$term = new Zend_Search_Lucene_Index_Term($this->_phrase, $this->_field);
if ($index->hasTerm($term)) {
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
// tokenize phrase using current analyzer and process it as a phrase query
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
if (count($tokens) == 0) {
$this->_matches = array();
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
//It's non-trivial phrase query
$position = -1;
$query = new Zend_Search_Lucene_Search_Query_Phrase();
foreach ($tokens as $token) {
$position += $token->getPositionIncrement();
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, $position);
$query->setSlop($this->getSlop());
}
$this->_matches = $query->getQueryTerms();
return $query;
}
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
/** Skip wildcard queries recognition. Supported wildcards are removed by text analyzer */
// tokenize phrase using current analyzer and process it as a phrase query
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $this->_phraseEncoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
$highlighter->highlight($tokens[0]->getTermText());
return;
}
//It's non-trivial phrase query
$words = array();
foreach ($tokens as $token) {
$words[] = $token->getTermText();
}
$highlighter->highlight($words);
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
if ($this->_field !== null) {
$query = $this->_field . ':';
} else {
$query = '';
}
$query .= '"' . $this->_phrase . '"';
if ($this->_slop != 0) {
$query .= '~' . $this->_slop;
}
if ($this->getBoost() != 1) {
$query .= '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -0,0 +1,334 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Search_Query_Processing */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Insignificant */
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
/** Zend_Search_Lucene_Search_Query_Empty */
require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
/** Zend_Search_Lucene_Search_Query_Term */
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/**
* It's an internal abstract class intended to finalize ase a query processing after query parsing.
* This type of query is not actually involved into query execution.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Search
* @internal
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_Search_Query_Preprocessing_Term extends Zend_Search_Lucene_Search_Query_Preprocessing
{
/**
* word (query parser lexeme) to find.
*
* @var string
*/
private $_word;
/**
* Word encoding (field name is always provided using UTF-8 encoding since it may be retrieved from index).
*
* @var string
*/
private $_encoding;
/**
* Field name.
*
* @var string
*/
private $_field;
/**
* Class constructor. Create a new preprocessing object for prase query.
*
* @param string $word Non-tokenized word (query parser lexeme) to search.
* @param string $encoding Word encoding.
* @param string $fieldName Field name.
*/
public function __construct($word, $encoding, $fieldName)
{
$this->_word = $word;
$this->_encoding = $encoding;
$this->_field = $fieldName;
}
/**
* Re-write query into primitive queries in the context of specified index
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
if ($this->_field === null) {
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
$query->setBoost($this->getBoost());
$hasInsignificantSubqueries = false;
if (Zend_Search_Lucene::getDefaultSearchField() === null) {
$searchFields = $index->getFieldNames(true);
} else {
$searchFields = array(Zend_Search_Lucene::getDefaultSearchField());
}
foreach ($searchFields as $fieldName) {
$subquery = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_word,
$this->_encoding,
$fieldName);
$rewrittenSubquery = $subquery->rewrite($index);
foreach ($rewrittenSubquery->getQueryTerms() as $term) {
$query->addTerm($term);
}
if ($rewrittenSubquery instanceof Zend_Search_Lucene_Search_Query_Insignificant) {
$hasInsignificantSubqueries = true;
}
}
if (count($query->getTerms()) == 0) {
$this->_matches = array();
if ($hasInsignificantSubqueries) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
} else {
return new Zend_Search_Lucene_Search_Query_Empty();
}
}
$this->_matches = $query->getQueryTerms();
return $query;
}
// -------------------------------------
// Recognize exact term matching (it corresponds to Keyword fields stored in the index)
// encoding is not used since we expect binary matching
$term = new Zend_Search_Lucene_Index_Term($this->_word, $this->_field);
if ($index->hasTerm($term)) {
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\pL/u', 'a') == 1) {
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
$wildcardsPattern = '/[*?]/u';
$subPatternsEncoding = 'UTF-8';
} else {
$word = $this->_word;
$wildcardsPattern = '/[*?]/';
$subPatternsEncoding = $this->_encoding;
}
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
if (count($subPatterns) > 1) {
// Wildcard query is recognized
$pattern = '';
foreach ($subPatterns as $id => $subPattern) {
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
if ($id != 0) {
$pattern .= $word[ $subPattern[1] - 1 ];
}
// Check if each subputtern is a single word in terms of current analyzer
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
if (count($tokens) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
$query->setBoost($this->getBoost());
// Get rewritten query. Important! It also fills terms matching container.
$rewrittenQuery = $query->rewrite($index);
$this->_matches = $query->getQueryTerms();
return $rewrittenQuery;
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
$this->_matches = array();
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
//It's not insignificant or one term query
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
/**
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
* analizer design features
*/
foreach ($tokens as $token) {
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, true); // all subterms are required
}
$query->setBoost($this->getBoost());
$this->_matches = $query->getQueryTerms();
return $query;
}
/**
* Query specific matches highlighting
*
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
/** Skip fields detection. We don't need it, since we expect all fields presented in the HTML body and don't differentiate them */
/** Skip exact term matching recognition, keyword fields highlighting is not supported */
// -------------------------------------
// Recognize wildcard queries
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\pL/u', 'a') == 1) {
$word = iconv($this->_encoding, 'UTF-8', $this->_word);
$wildcardsPattern = '/[*?]/u';
$subPatternsEncoding = 'UTF-8';
} else {
$word = $this->_word;
$wildcardsPattern = '/[*?]/';
$subPatternsEncoding = $this->_encoding;
}
$subPatterns = preg_split($wildcardsPattern, $word, -1, PREG_SPLIT_OFFSET_CAPTURE);
if (count($subPatterns) > 1) {
// Wildcard query is recognized
$pattern = '';
foreach ($subPatterns as $id => $subPattern) {
// Append corresponding wildcard character to the pattern before each sub-pattern (except first)
if ($id != 0) {
$pattern .= $word[ $subPattern[1] - 1 ];
}
// Check if each subputtern is a single word in terms of current analyzer
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPattern[0], $subPatternsEncoding);
if (count($tokens) > 1) {
// Do nothing (nothing is highlighted)
return;
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
$query->_highlightMatches($highlighter);
return;
}
// -------------------------------------
// Recognize one-term multi-term and "insignificant" queries
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_word, $this->_encoding);
if (count($tokens) == 0) {
// Do nothing
return;
}
if (count($tokens) == 1) {
$highlighter->highlight($tokens[0]->getTermText());
return;
}
//It's not insignificant or one term query
$words = array();
foreach ($tokens as $token) {
$words[] = $token->getTermText();
}
$highlighter->highlight($words);
}
/**
* Print a query
*
* @return string
*/
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
if ($this->_field !== null) {
$query = $this->_field . ':';
} else {
$query = '';
}
$query .= $this->_word;
if ($this->getBoost() != 1) {
$query .= '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -76,7 +76,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*
* @var array
*/
private $_matches;
private $_matches = null;
/**
@ -90,9 +90,11 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
public function __construct($lowerTerm, $upperTerm, $inclusive)
{
if ($lowerTerm === null && $upperTerm === null) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('At least one term must be non-null');
}
if ($lowerTerm !== null && $upperTerm !== null && $lowerTerm->field != $upperTerm->field) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Both terms must be for the same field');
}
@ -159,6 +161,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
$fields = array($this->_field);
}
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
foreach ($fields as $field) {
$index->resetTermsStream();
@ -185,6 +188,12 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
$index->currentTerm()->field == $field &&
$index->currentTerm()->text < $upperTerm->text) {
$this->_matches[] = $index->currentTerm();
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
$index->nextTerm();
}
@ -196,6 +205,12 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
// Walk up to the end of field data
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
$this->_matches[] = $index->currentTerm();
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
$index->nextTerm();
}
}
@ -226,6 +241,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*/
public function optimize(Zend_Search_Lucene_Interface $index)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
}
@ -238,7 +254,8 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
public function getQueryTerms()
{
if ($this->_matches === null) {
throw new Zend_Search_Lucene_Exception('Search has to be performed first to get matched terms');
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Search or rewrite operations have to be performed before.');
}
return $this->_matches;
@ -253,6 +270,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*/
public function createWeight(Zend_Search_Lucene_Interface $reader)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
}
@ -267,6 +285,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*/
public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
}
@ -280,6 +299,7 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*/
public function matchedDocs()
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
}
@ -293,24 +313,44 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
*/
public function score($docId, Zend_Search_Lucene_Interface $reader)
{
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Range query should not be directly used for search. Use $query->rewrite($index)');
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
foreach ($this->_matches as $term) {
$words[] = $term->text;
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
$lowerTermText = ($this->_lowerTerm !== null)? $this->_lowerTerm->text : null;
$upperTermText = ($this->_upperTerm !== null)? $this->_upperTerm->text : null;
if ($this->_inclusive) {
foreach ($tokens as $token) {
$termText = $token->getTermText();
if (($lowerTermText == null || $lowerTermText <= $termText) &&
($upperTermText == null || $termText <= $upperTermText)) {
$words[] = $termText;
}
}
} else {
foreach ($tokens as $token) {
$termText = $token->getTermText();
if (($lowerTermText == null || $lowerTermText < $termText) &&
($upperTermText == null || $termText < $upperTermText)) {
$words[] = $termText;
}
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($words);
}
/**
@ -326,7 +366,8 @@ class Zend_Search_Lucene_Search_Query_Range extends Zend_Search_Lucene_Search_Qu
. (($this->_lowerTerm !== null)? $this->_lowerTerm->text : 'null')
. ' TO '
. (($this->_upperTerm !== null)? $this->_upperTerm->text : 'null')
. (($this->_inclusive)? ']' : '}');
. (($this->_inclusive)? ']' : '}')
. (($this->getBoost() != 1)? '^' . round($this->getBoost(), 4) : '');
}
}

View File

@ -191,24 +191,13 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
}
/**
* Returns query term
* Query specific matches highlighting
*
* @return array
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function getTerms()
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
return $this->_terms;
}
/**
* Highlight query terms
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
{
$doc->highlight($this->_term->text, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($this->_term->text);
}
/**
@ -219,7 +208,19 @@ class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Que
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_term->field === null)? '':$this->_term->field . ':') . $this->_term->text;
if ($this->_term->field !== null) {
$query = $this->_term->field . ':';
} else {
$query = '';
}
$query .= $this->_term->text;
if ($this->getBoost() != 1) {
$query = $query . '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -59,6 +59,13 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
*/
private $_matches = null;
/**
* Minimum term prefix length (number of minimum non-wildcard characters)
*
* @var integer
*/
private static $_minPrefixLength = 3;
/**
* Zend_Search_Lucene_Search_Query_Wildcard constructor.
*
@ -69,6 +76,26 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
$this->_pattern = $pattern;
}
/**
* Get minimum prefix length
*
* @return integer
*/
public static function getMinPrefixLength()
{
return self::$_minPrefixLength;
}
/**
* Set minimum prefix length
*
* @param integer $minPrefixLength
*/
public static function setMinPrefixLength($minPrefixLength)
{
self::$_minPrefixLength = $minPrefixLength;
}
/**
* Get terms prefix
*
@ -98,6 +125,7 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
*
* @param Zend_Search_Lucene_Interface $index
* @return Zend_Search_Lucene_Search_Query
* @throws Zend_Search_Lucene_Exception
*/
public function rewrite(Zend_Search_Lucene_Interface $index)
{
@ -114,6 +142,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
$prefixLength = strlen($prefix);
$matchExpression = '/^' . str_replace(array('\\?', '\\*'), array('.', '.*') , preg_quote($this->_pattern->text, '/')) . '$/';
if ($prefixLength < self::$_minPrefixLength) {
throw new Zend_Search_Lucene_Exception('At least ' . self::$_minPrefixLength . ' non-wildcard terms are required.');
}
/** @todo check for PCRE unicode support may be performed through Zend_Environment in some future */
if (@preg_match('/\pL/u', 'a') == 1) {
// PCRE unicode support is turned on
@ -121,7 +153,7 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
$matchExpression .= 'u';
}
$maxTerms = Zend_Search_Lucene::getTermsPerQueryLimit();
foreach ($fields as $field) {
$index->resetTermsStream();
@ -133,6 +165,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
substr($index->currentTerm()->text, 0, $prefixLength) == $prefix) {
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
$this->_matches[] = $index->currentTerm();
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
}
$index->nextTerm();
@ -143,6 +179,10 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
while ($index->currentTerm() !== null && $index->currentTerm()->field == $field) {
if (preg_match($matchExpression, $index->currentTerm()->text) === 1) {
$this->_matches[] = $index->currentTerm();
if ($maxTerms != 0 && count($this->_matches) > $maxTerms) {
throw new Zend_Search_Lucene_Exception('Terms per query limit is reached.');
}
}
$index->nextTerm();
@ -258,12 +298,11 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
}
/**
* Highlight query terms
* Query specific matches highlighting
*
* @param integer &$colorIndex
* @param Zend_Search_Lucene_Document_Html $doc
* @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter Highlighter object (also contains doc for highlighting)
*/
public function highlightMatchesDOM(Zend_Search_Lucene_Document_Html $doc, &$colorIndex)
protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
{
$words = array();
@ -274,14 +313,15 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
$matchExpression .= 'u';
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($doc->getFieldUtf8Value('body'), 'UTF-8');
$docBody = $highlighter->getDocument()->getFieldUtf8Value('body');
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($docBody, 'UTF-8');
foreach ($tokens as $token) {
if (preg_match($matchExpression, $token->getTermText()) === 1) {
$words[] = $token->getTermText();
}
}
$doc->highlight($words, $this->_getHighlightColor($colorIndex));
$highlighter->highlight($words);
}
/**
@ -292,7 +332,19 @@ class Zend_Search_Lucene_Search_Query_Wildcard extends Zend_Search_Lucene_Search
public function __toString()
{
// It's used only for query visualisation, so we don't care about characters escaping
return (($this->_pattern->field === null)? '' : $this->_pattern->field . ':') . $this->_pattern->text;
if ($this->_pattern->field !== null) {
$query = $this->_pattern->field . ':';
} else {
$query = '';
}
$query .= $this->_pattern->text;
if ($this->getBoost() != 1) {
$query = $query . '^' . round($this->getBoost(), 4);
}
return $query;
}
}

View File

@ -19,13 +19,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry_Term */
require_once 'Zend/Search/Lucene/Search/QueryEntry/Term.php';
@ -35,11 +31,6 @@ require_once 'Zend/Search/Lucene/Search/QueryEntry/Phrase.php';
/** Zend_Search_Lucene_Search_QueryEntry_Subquery */
require_once 'Zend/Search/Lucene/Search/QueryEntry/Subquery.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene

View File

@ -19,24 +19,15 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -111,32 +102,11 @@ class Zend_Search_Lucene_Search_QueryEntry_Phrase extends Zend_Search_Lucene_Sea
*/
public function getQuery($encoding)
{
if (strpos($this->_phrase, '?') !== false || strpos($this->_phrase, '*') !== false) {
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcards are only allowed in a single terms.');
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_phrase, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
//It's not empty or one term query
$position = -1;
$query = new Zend_Search_Lucene_Search_Query_Phrase();
foreach ($tokens as $token) {
$position += $token->getPositionIncrement();
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, $position);
}
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Phrase($this->_phrase,
$encoding,
($this->_field !== null)?
iconv($encoding, 'UTF-8', $this->_field) :
null);
if ($this->_proximityQuery) {
$query->setSlop($this->_wordsDistance);

View File

@ -19,20 +19,12 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -67,6 +59,7 @@ class Zend_Search_Lucene_Search_QueryEntry_Subquery extends Zend_Search_Lucene_S
*/
public function processFuzzyProximityModifier($parameter = null)
{
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' sign must follow term or phrase');
}

View File

@ -19,24 +19,15 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Analysis_Analyzer */
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -113,91 +104,26 @@ class Zend_Search_Lucene_Search_QueryEntry_Term extends Zend_Search_Lucene_Searc
*/
public function getQuery($encoding)
{
if (strpos($this->_term, '?') !== false || strpos($this->_term, '*') !== false) {
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is not supported for terms with wildcards.');
}
$pattern = '';
$subPatterns = explode('*', $this->_term);
$astericFirstPass = true;
foreach ($subPatterns as $subPattern) {
if (!$astericFirstPass) {
$pattern .= '*';
} else {
$astericFirstPass = false;
}
$subPatternsL2 = explode('?', $subPattern);
$qMarkFirstPass = true;
foreach ($subPatternsL2 as $subPatternL2) {
if (!$qMarkFirstPass) {
$pattern .= '?';
} else {
$qMarkFirstPass = false;
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($subPatternL2, $encoding);
if (count($tokens) > 1) {
throw new Zend_Search_Lucene_Search_QueryParserException('Wildcard search is supported only for non-multiple word terms');
}
foreach ($tokens as $token) {
$pattern .= $token->getTermText();
}
}
}
$term = new Zend_Search_Lucene_Index_Term($pattern, $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Wildcard($term);
if ($this->_fuzzyQuery) {
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy($this->_term,
$encoding,
($this->_field !== null)?
iconv($encoding, 'UTF-8', $this->_field) :
null,
$this->_similarity
);
$query->setBoost($this->_boost);
return $query;
}
}
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_term, $encoding);
if (count($tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
if (count($tokens) == 1 && !$this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Term($term);
$query->setBoost($this->_boost);
return $query;
}
if (count($tokens) == 1 && $this->_fuzzyQuery) {
$term = new Zend_Search_Lucene_Index_Term($tokens[0]->getTermText(), $this->_field);
$query = new Zend_Search_Lucene_Search_Query_Fuzzy($term, $this->_similarity);
$query->setBoost($this->_boost);
return $query;
}
if ($this->_fuzzyQuery) {
throw new Zend_Search_Lucene_Search_QueryParserException('Fuzzy search is supported only for non-multiple word terms');
}
//It's not empty or one term query
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
/**
* @todo Process $token->getPositionIncrement() to support stemming, synonyms and other
* analizer design features
*/
foreach ($tokens as $token) {
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $this->_field);
$query->addTerm($term, true); // all subterms are required
}
$query->setBoost($this->_boost);
$query = new Zend_Search_Lucene_Search_Query_Preprocessing_Term($this->_term,
$encoding,
($this->_field !== null)?
iconv($encoding, 'UTF-8', $this->_field) :
null
);
$query->setBoost($this->_boost);
return $query;
}
}

View File

@ -19,20 +19,12 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Search_QueryParser */
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -364,6 +356,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
$this->process(self::IN_WHITE_SPACE);
if ($this->getState() != self::ST_WHITE_SPACE) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Unexpected end of query');
}
@ -397,6 +390,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
// check,
if ($this->_queryStringPosition == count($this->_queryString) ||
$this->_queryString[$this->_queryStringPosition] != $lexeme) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Two chars lexeme expected. ' . $this->_positionMsg());
}
@ -413,6 +407,7 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
if ($token->type == Zend_Search_Lucene_Search_QueryToken::TT_FIELD_INDICATOR) {
$token = array_pop($this->_lexemes);
if ($token === null || $token->type != Zend_Search_Lucene_Search_QueryToken::TT_WORD) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Field mark \':\' must follow field name. ' . $this->_positionMsg());
}
@ -497,14 +492,17 @@ class Zend_Search_Lucene_Search_QueryLexer extends Zend_Search_Lucene_FSM
*********************************************************************/
public function lexModifierErrException()
{
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier character can be followed only by number, white space or query syntax element. ' . $this->_positionMsg());
}
public function quoteWithinLexemeErrException()
{
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Quote within lexeme must be escaped by \'\\\' char. ' . $this->_positionMsg());
}
public function wrongNumberErrException()
{
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Wrong number syntax.' . $this->_positionMsg());
}
}

View File

@ -19,7 +19,6 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
@ -32,8 +31,14 @@ require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
/** Zend_Search_Lucene_Search_Query_Boolean */
require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Preprocessing_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Phrase.php';
/** Zend_Search_Lucene_Search_Query_Preprocessing_Term */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Term.php';
/** Zend_Search_Lucene_Search_Query_Preprocessing_Fuzzy */
require_once 'Zend/Search/Lucene/Search/Query/Preprocessing/Fuzzy.php';
/** Zend_Search_Lucene_Search_Query_Wildcard */
require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
@ -50,24 +55,15 @@ require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
/** Zend_Search_Lucene_Search_Query_Insignificant */
require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
/** Zend_Search_Lucene_Search_QueryLexer */
require_once 'Zend/Search/Lucene/Search/QueryLexer.php';
/** Zend_Search_Lucene_Search_QueryParserContext */
require_once 'Zend/Search/Lucene/Search/QueryParserContext.php';
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -153,21 +149,21 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
/**
* Defines query parsing mode.
*
*
* If this option is turned on, then query parser suppress query parser exceptions
* and constructs multi-term query using all words from a query.
*
*
* That helps to avoid exceptions caused by queries, which don't conform to query language,
* but limits possibilities to check, that query entered by user has some inconsistencies.
*
*
*
*
* Default is true.
*
*
* Use {@link Zend_Search_Lucene::suppressQueryParsingExceptions()},
* {@link Zend_Search_Lucene::dontSuppressQueryParsingExceptions()} and
* {@link Zend_Search_Lucene::checkQueryParsingExceptionsSuppressMode()} to operate
* with this setting.
*
*
* @var boolean
*/
private $_suppressQueryParsingExceptions = true;
@ -290,7 +286,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
/**
* Get query parser instance
*
*
* @return Zend_Search_Lucene_Search_QueryParser
*/
private static function _getInstance()
@ -363,9 +359,9 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
{
return self::_getInstance()->_suppressQueryParsingExceptions;
}
/**
* Parses a query string
*
@ -377,42 +373,43 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
public static function parse($strQuery, $encoding = null)
{
self::_getInstance();
// Reset FSM if previous parse operation didn't return it into a correct state
// Reset FSM if previous parse operation didn't return it into a correct state
self::$_instance->reset();
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
try {
self::$_instance->_encoding = ($encoding !== null) ? $encoding : self::$_instance->_defaultEncoding;
self::$_instance->_lastToken = null;
self::$_instance->_context = new Zend_Search_Lucene_Search_QueryParserContext(self::$_instance->_encoding);
self::$_instance->_contextStack = array();
self::$_instance->_tokens = self::$_instance->_lexer->tokenize($strQuery, self::$_instance->_encoding);
// Empty query
if (count(self::$_instance->_tokens) == 0) {
return new Zend_Search_Lucene_Search_Query_Insignificant();
}
foreach (self::$_instance->_tokens as $token) {
try {
self::$_instance->_currentToken = $token;
self::$_instance->process($token->type);
self::$_instance->_lastToken = $token;
} catch (Exception $e) {
if (strpos($e->getMessage(), 'There is no any rule for') !== false) {
throw new Zend_Search_Lucene_Search_QueryParserException( 'Syntax error at char position ' . $token->position . '.' );
}
throw $e;
}
}
if (count(self::$_instance->_contextStack) != 0) {
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing.' );
}
return self::$_instance->_context->getQuery();
} catch (Zend_Search_Lucene_Search_QueryParserException $e) {
if (self::$_instance->_suppressQueryParsingExceptions) {
@ -421,12 +418,12 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
$query = new Zend_Search_Lucene_Search_Query_MultiTerm();
$termsSign = (self::$_instance->_defaultOperator == self::B_AND) ? true /* required term */ :
null /* optional term */;
foreach ($queryTokens as $token) {
$query->addTerm(new Zend_Search_Lucene_Index_Term($token->getTermText()), $termsSign);
}
return $query;
} else {
throw $e;
@ -434,7 +431,6 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
}
}
/*********************************************************************
* Actions implementation
*
@ -492,6 +488,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
public function processModifierParameter()
{
if ($this->_lastToken === null) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
@ -506,6 +503,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
default:
// It's not a user input exception
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Lexeme modifier parameter must follow lexeme modifier. Char position 0.' );
}
}
@ -526,6 +524,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
public function subqueryEnd()
{
if (count($this->_contextStack) == 0) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Syntax Error: mismatched parentheses, every opening must have closing. Char position ' . $this->_currentToken->position . '.' );
}
@ -560,6 +559,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
{
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
if (count($tokens) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
@ -569,6 +569,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
if (count($tokens) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
@ -577,6 +578,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
}
if ($from === null && $to === null) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
}
@ -602,6 +604,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
{
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_rqFirstTerm, $this->_encoding);
if (count($tokens) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$from = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
@ -611,6 +614,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
$tokens = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($this->_currentToken->text, $this->_encoding);
if (count($tokens) > 1) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Range query boundary terms must be non-multiple word terms');
} else if (count($tokens) == 1) {
$to = new Zend_Search_Lucene_Index_Term(reset($tokens)->getTermText(), $this->_context->getField());
@ -619,6 +623,7 @@ class Zend_Search_Lucene_Search_QueryParser extends Zend_Search_Lucene_FSM
}
if ($from === null && $to === null) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('At least one range query boundary term must be non-empty term');
}

View File

@ -22,7 +22,6 @@
/** Zend_Search_Lucene_FSM */
require_once 'Zend/Search/Lucene/FSM.php';
/** Zend_Search_Lucene_Index_Term */
require_once 'Zend/Search/Lucene/Index/Term.php';
@ -41,19 +40,12 @@ require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
/** Zend_Search_Lucene_Search_Query_Phrase */
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/** Zend_Search_Lucene_Search_QueryParserException */
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
/** Zend_Search_Lucene_Search_BooleanExpressionRecognizer */
require_once 'Zend/Search/Lucene/Search/BooleanExpressionRecognizer.php';
/** Zend_Search_Lucene_Search_QueryEntry */
require_once 'Zend/Search/Lucene/Search/QueryEntry.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -171,6 +163,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
public function setNextEntrySign($sign)
{
if ($this->_mode === self::GM_BOOLEAN) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
}
@ -181,6 +174,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
} else if ($sign == Zend_Search_Lucene_Search_QueryToken::TT_PROHIBITED) {
$this->_nextEntrySign = false;
} else {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unrecognized sign type.');
}
}
@ -213,6 +207,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
{
// Check, that modifier has came just after word or phrase
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
}
@ -220,6 +215,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
// there are no entries or last entry is boolean operator
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('\'~\' modifier must follow word or phrase.');
}
@ -237,6 +233,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
{
// Check, that modifier has came just after word or phrase
if ($this->_nextEntryField !== null || $this->_nextEntrySign !== null) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
}
@ -244,6 +241,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
if (!$lastEntry instanceof Zend_Search_Lucene_Search_QueryEntry) {
// there are no entries or last entry is boolean operator
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('\'^\' modifier must follow word, phrase or subquery.');
}
@ -260,6 +258,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
public function addLogicalOperator($operator)
{
if ($this->_mode === self::GM_SIGNS) {
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('It\'s not allowed to mix boolean and signs styles in the same subquery.');
}
@ -316,6 +315,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
$expressionRecognizer = new Zend_Search_Lucene_Search_BooleanExpressionRecognizer();
require_once 'Zend/Search/Lucene/Exception.php';
try {
foreach ($this->_entries as $entry) {
if ($entry instanceof Zend_Search_Lucene_Search_QueryEntry) {
@ -345,6 +345,7 @@ class Zend_Search_Lucene_Search_QueryParserContext
// throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error. Error message: \'' .
// $e->getMessage() . '\'.' );
// It's query syntax error message and it should be user friendly. So FSM message is omitted
require_once 'Zend/Search/Lucene/Search/QueryParserException.php';
throw new Zend_Search_Lucene_Search_QueryParserException('Boolean expression error.');
}

View File

@ -19,11 +19,6 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -213,6 +208,7 @@ class Zend_Search_Lucene_Search_QueryToken
break;
default:
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unrecognized query syntax lexeme: \'' . $tokenText . '\'');
}
break;
@ -221,8 +217,8 @@ class Zend_Search_Lucene_Search_QueryToken
$this->type = self::TT_NUMBER;
default:
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Unrecognized lexeme type: \'' . $tokenCategory . '\'');
}
}
}

View File

@ -94,7 +94,7 @@ class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene
public static function mkdirs($dir, $mode = 0777, $recursive = true)
{
if (is_null($dir) || $dir === '') {
if (($dir === null) || $dir === '') {
return false;
}
if (is_dir($dir) || $dir === '/') {

View File

@ -19,12 +19,6 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -184,18 +178,18 @@ abstract class Zend_Search_Lucene_Storage_File
* Returns a long integer from the current position in the file
* and advances the file pointer.
*
* @return integer
* @return integer|float
* @throws Zend_Search_Lucene_Exception
*/
public function readLong()
{
$str = $this->_fread(8);
/**
* Check, that we work in 64-bit mode.
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
if (PHP_INT_SIZE > 4) {
$str = $this->_fread(8);
return ord($str[0]) << 56 |
ord($str[1]) << 48 |
ord($str[2]) << 40 |
@ -205,18 +199,7 @@ abstract class Zend_Search_Lucene_Storage_File
ord($str[6]) << 8 |
ord($str[7]);
} else {
if ((ord($str[0]) != 0) ||
(ord($str[1]) != 0) ||
(ord($str[2]) != 0) ||
(ord($str[3]) != 0) ||
((ord($str[0]) & 0x80) != 0)) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
return ord($str[4]) << 24 |
ord($str[5]) << 16 |
ord($str[6]) << 8 |
ord($str[7]);
return $this->readLong32Bit();
}
}
@ -243,19 +226,80 @@ abstract class Zend_Search_Lucene_Storage_File
chr($value>>8 & 0xFF) .
chr($value & 0xFF), 8 );
} else {
if ($value > 0x7FFFFFFF) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
$this->_fwrite( "\x00\x00\x00\x00" .
chr($value>>24 & 0xFF) .
chr($value>>16 & 0xFF) .
chr($value>>8 & 0xFF) .
chr($value & 0xFF), 8 );
$this->writeLong32Bit($value);
}
}
/**
* Returns a long integer from the current position in the file,
* advances the file pointer and return it as float (for 32-bit platforms).
*
* @return integer|float
* @throws Zend_Search_Lucene_Exception
*/
public function readLong32Bit()
{
$wordHigh = $this->readInt();
$wordLow = $this->readInt();
if ($wordHigh & (int)0x80000000) {
// It's a negative value since the highest bit is set
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
return $wordLow;
} else {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
}
}
if ($wordLow < 0) {
// Value is large than 0x7FFF FFFF. Represent low word as float.
$wordLow &= 0x7FFFFFFF;
$wordLow += (float)0x80000000;
}
if ($wordHigh == 0) {
// Return value as integer if possible
return $wordLow;
}
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
}
/**
* Writes long integer to the end of file (32-bit platforms implementation)
*
* @param integer|float $value
* @throws Zend_Search_Lucene_Exception
*/
public function writeLong32Bit($value)
{
if ($value < (int)0x80000000) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
}
if ($value < 0) {
$wordHigh = (int)0xFFFFFFFF;
$wordLow = (int)$value;
} else {
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
if ($wordLow > 0x7FFFFFFF) {
// Highest bit of low word is set. Translate it to the corresponding negative integer value
$wordLow -= 0x80000000;
$wordLow |= 0x80000000;
}
}
$this->writeInt($wordHigh);
$this->writeInt($wordLow);
}
/**
* Returns a variable-length integer from the current
@ -402,6 +446,7 @@ abstract class Zend_Search_Lucene_Storage_File
}
if ($chars < 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
}

View File

@ -19,14 +19,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Storage_File */
require_once 'Zend/Search/Lucene/Storage/File.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -56,6 +51,7 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
if (strpos($mode, 'w') === false && !is_readable($filename)) {
// opening for reading non-readable file
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('File \'' . $filename . '\' is not readable.');
}
@ -66,6 +62,7 @@ class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Stor
if ($this->_fileHandle === false) {
ini_set('track_errors', $trackErrors);
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception($php_errormsg);
}

View File

@ -19,14 +19,9 @@
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Storage_File */
require_once 'Zend/Search/Lucene/Storage/File.php';
/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';
/**
* @category Zend
* @package Zend_Search_Lucene
@ -294,14 +289,14 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
*/
public function readLong()
{
$str = substr($this->_data, $this->_position, 8);
$this->_position += 8;
/**
* Check, that we work in 64-bit mode.
* fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
*/
if (PHP_INT_SIZE > 4) {
$str = substr($this->_data, $this->_position, 8);
$this->_position += 8;
return ord($str[0]) << 56 |
ord($str[1]) << 48 |
ord($str[2]) << 40 |
@ -311,18 +306,7 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
ord($str[6]) << 8 |
ord($str[7]);
} else {
if ((ord($str[0]) != 0) ||
(ord($str[1]) != 0) ||
(ord($str[2]) != 0) ||
(ord($str[3]) != 0) ||
((ord($str[0]) & 0x80) != 0)) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
return ord($str[4]) << 24 |
ord($str[5]) << 16 |
ord($str[6]) << 8 |
ord($str[7]);
return $this->readLong32Bit();
}
}
@ -352,21 +336,81 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
chr($value>>8 & 0xFF) .
chr($value & 0xFF);
} else {
if ($value > 0x7FFFFFFF) {
throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
}
$this->_data .= chr(0) . chr(0) . chr(0) . chr(0) .
chr($value>>24 & 0xFF) .
chr($value>>16 & 0xFF) .
chr($value>>8 & 0xFF) .
chr($value & 0xFF);
$this->writeLong32Bit($value);
}
$this->_position = strlen($this->_data);
}
/**
* Returns a long integer from the current position in the file,
* advances the file pointer and return it as float (for 32-bit platforms).
*
* @return integer|float
* @throws Zend_Search_Lucene_Exception
*/
public function readLong32Bit()
{
$wordHigh = $this->readInt();
$wordLow = $this->readInt();
if ($wordHigh & (int)0x80000000) {
// It's a negative value since the highest bit is set
if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) {
return $wordLow;
} else {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
}
}
if ($wordLow < 0) {
// Value is large than 0x7FFF FFFF. Represent low word as float.
$wordLow &= 0x7FFFFFFF;
$wordLow += (float)0x80000000;
}
if ($wordHigh == 0) {
// Return value as integer if possible
return $wordLow;
}
return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
}
/**
* Writes long integer to the end of file (32-bit platforms implementation)
*
* @param integer|float $value
* @throws Zend_Search_Lucene_Exception
*/
public function writeLong32Bit($value)
{
if ($value < (int)0x80000000) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
}
if ($value < 0) {
$wordHigh = (int)0xFFFFFFFF;
$wordLow = (int)$value;
} else {
$wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
$wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
if ($wordLow > 0x7FFFFFFF) {
// Highest bit of low word is set. Translate it to the corresponding negative integer value
$wordLow -= 0x80000000;
$wordLow |= 0x80000000;
}
}
$this->writeInt($wordHigh);
$this->writeInt($wordLow);
}
/**
* Returns a variable-length integer from the current
@ -523,6 +567,7 @@ class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_
}
if ($chars < 0) {
require_once 'Zend/Search/Lucene/Exception.php';
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
}

View File

@ -0,0 +1,175 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
/** Zend_Search_Lucene_Index_TermsStream_Interface */
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
/** Zend_Search_Lucene_Index_TermsPriorityQueue */
require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
/**
* @category Zend
* @package Zend_Search_Lucene
* @subpackage Index
* @copyright Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
*/
class Zend_Search_Lucene_TermStreamsPriorityQueue implements Zend_Search_Lucene_Index_TermsStream_Interface
{
/**
* Array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
*
* @var array
*/
protected $_termStreams;
/**
* Terms stream queue
*
* @var Zend_Search_Lucene_Index_TermsPriorityQueue
*/
protected $_termsStreamQueue = null;
/**
* Last Term in a terms stream
*
* @var Zend_Search_Lucene_Index_Term
*/
protected $_lastTerm = null;
/**
* Object constructor
*
* @param array $termStreams array of term streams (Zend_Search_Lucene_Index_TermsStream_Interface objects)
*/
public function __construct(array $termStreams)
{
$this->_termStreams = $termStreams;
$this->resetTermsStream();
}
/**
* Reset terms stream.
*/
public function resetTermsStream()
{
$this->_termsStreamQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
foreach ($this->_termStreams as $termStream) {
$termStream->resetTermsStream();
// Skip "empty" containers
if ($termStream->currentTerm() !== null) {
$this->_termsStreamQueue->put($termStream);
}
}
$this->nextTerm();
}
/**
* Skip terms stream up to specified term preffix.
*
* Prefix contains fully specified field info and portion of searched term
*
* @param Zend_Search_Lucene_Index_Term $prefix
*/
public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
{
$termStreams = array();
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
$termStreams[] = $termStream;
}
foreach ($termStreams as $termStream) {
$termStream->skipTo($prefix);
if ($termStream->currentTerm() !== null) {
$this->_termsStreamQueue->put($termStream);
}
}
$this->nextTerm();
}
/**
* Scans term streams and returns next term
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function nextTerm()
{
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
if ($this->_termsStreamQueue->top() === null ||
$this->_termsStreamQueue->top()->currentTerm()->key() !=
$termStream->currentTerm()->key()) {
// We got new term
$this->_lastTerm = $termStream->currentTerm();
if ($termStream->nextTerm() !== null) {
// Put segment back into the priority queue
$this->_termsStreamQueue->put($termStream);
}
return $this->_lastTerm;
}
if ($termStream->nextTerm() !== null) {
// Put segment back into the priority queue
$this->_termsStreamQueue->put($termStream);
}
}
// End of stream
$this->_lastTerm = null;
return null;
}
/**
* Returns term in current position
*
* @return Zend_Search_Lucene_Index_Term|null
*/
public function currentTerm()
{
return $this->_lastTerm;
}
/**
* Close terms stream
*
* Should be used for resources clean up if stream is not read up to the end
*/
public function closeTermsStream()
{
while (($termStream = $this->_termsStreamQueue->pop()) !== null) {
$termStream->closeTermsStream();
}
$this->_termsStreamQueue = null;
$this->_lastTerm = null;
}
}