<?php
/**
 * Zend Framework
 *
 * LICENSE
 *
 * This source file is subject to the new BSD license that is bundled
 * with this package in the file LICENSE.txt.
 * It is also available through the world-wide-web at this URL:
 * http://framework.zend.com/license/new-bsd
 * If you did not receive a copy of the license and are unable to
 * obtain it through the world-wide-web, please send an email
 * to license@zend.com so we can send you a copy immediately.
 *
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Storage
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */



/** Zend_Search_Lucene_Exception */
require_once 'Zend/Search/Lucene/Exception.php';


/**
 * @category   Zend
 * @package    Zend_Search_Lucene
 * @subpackage Storage
 * @copyright  Copyright (c) 2005-2008 Zend Technologies USA Inc. (http://www.zend.com)
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 */
abstract class Zend_Search_Lucene_Storage_File
{
    /**
     * Reads $length number of bytes at the current position in the
     * file and advances the file pointer.
     *
     * @param integer $length
     * @return string
     */
    abstract protected function _fread($length=1);


    /**
     * Sets the file position indicator and advances the file pointer.
     * The new position, measured in bytes from the beginning of the file,
     * is obtained by adding offset to the position specified by whence,
     * whose values are defined as follows:
     * SEEK_SET - Set position equal to offset bytes.
     * SEEK_CUR - Set position to current location plus offset.
     * SEEK_END - Set position to end-of-file plus offset. (To move to
     * a position before the end-of-file, you need to pass a negative value
     * in offset.)
     * Upon success, returns 0; otherwise, returns -1
     *
     * @param integer $offset
     * @param integer $whence
     * @return integer
     */
    abstract public function seek($offset, $whence=SEEK_SET);

    /**
     * Get file position.
     *
     * @return integer
     */
    abstract public function tell();

    /**
     * Flush output.
     *
     * Returns true on success or false on failure.
     *
     * @return boolean
     */
    abstract public function flush();

    /**
     * Writes $length number of bytes (all, if $length===null) to the end
     * of the file.
     *
     * @param string $data
     * @param integer $length
     */
    abstract protected function _fwrite($data, $length=null);

    /**
     * Lock file
     *
     * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
     *
     * @param integer $lockType
     * @return boolean
     */
    abstract public function lock($lockType, $nonBlockinLock = false);

    /**
     * Unlock file
     */
    abstract public function unlock();

    /**
     * Reads a byte from the current position in the file
     * and advances the file pointer.
     *
     * @return integer
     */
    public function readByte()
    {
        return ord($this->_fread(1));
    }

    /**
     * Writes a byte to the end of the file.
     *
     * @param integer $byte
     */
    public function writeByte($byte)
    {
        return $this->_fwrite(chr($byte), 1);
    }

    /**
     * Read num bytes from the current position in the file
     * and advances the file pointer.
     *
     * @param integer $num
     * @return string
     */
    public function readBytes($num)
    {
        return $this->_fread($num);
    }

    /**
     * Writes num bytes of data (all, if $num===null) to the end
     * of the string.
     *
     * @param string $data
     * @param integer $num
     */
    public function writeBytes($data, $num=null)
    {
        $this->_fwrite($data, $num);
    }


    /**
     * Reads an integer from the current position in the file
     * and advances the file pointer.
     *
     * @return integer
     */
    public function readInt()
    {
        $str = $this->_fread(4);

        return  ord($str[0]) << 24 |
                ord($str[1]) << 16 |
                ord($str[2]) << 8  |
                ord($str[3]);
    }


    /**
     * Writes an integer to the end of file.
     *
     * @param integer $value
     */
    public function writeInt($value)
    {
        settype($value, 'integer');
        $this->_fwrite( chr($value>>24 & 0xFF) .
                        chr($value>>16 & 0xFF) .
                        chr($value>>8  & 0xFF) .
                        chr($value     & 0xFF),   4  );
    }


    /**
     * Returns a long integer from the current position in the file
     * and advances the file pointer.
     *
     * @return integer
     * @throws Zend_Search_Lucene_Exception
     */
    public function readLong()
    {
        $str = $this->_fread(8);

        /**
         * Check, that we work in 64-bit mode.
         * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
         */
        if (PHP_INT_SIZE > 4) {
            return  ord($str[0]) << 56  |
                    ord($str[1]) << 48  |
                    ord($str[2]) << 40  |
                    ord($str[3]) << 32  |
                    ord($str[4]) << 24  |
                    ord($str[5]) << 16  |
                    ord($str[6]) << 8   |
                    ord($str[7]);
        } else {
            if ((ord($str[0])          != 0) ||
                (ord($str[1])          != 0) ||
                (ord($str[2])          != 0) ||
                (ord($str[3])          != 0) ||
                ((ord($str[0]) & 0x80) != 0)) {
                     throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
                 }

            return  ord($str[4]) << 24  |
                    ord($str[5]) << 16  |
                    ord($str[6]) << 8   |
                    ord($str[7]);
        }
    }

    /**
     * Writes long integer to the end of file
     *
     * @param integer $value
     * @throws Zend_Search_Lucene_Exception
     */
    public function writeLong($value)
    {
        /**
         * Check, that we work in 64-bit mode.
         * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
         */
        if (PHP_INT_SIZE > 4) {
            settype($value, 'integer');
            $this->_fwrite( chr($value>>56 & 0xFF) .
                            chr($value>>48 & 0xFF) .
                            chr($value>>40 & 0xFF) .
                            chr($value>>32 & 0xFF) .
                            chr($value>>24 & 0xFF) .
                            chr($value>>16 & 0xFF) .
                            chr($value>>8  & 0xFF) .
                            chr($value     & 0xFF),   8  );
        } else {
            if ($value > 0x7FFFFFFF) {
                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
            }

            $this->_fwrite( "\x00\x00\x00\x00"     .
                            chr($value>>24 & 0xFF) .
                            chr($value>>16 & 0xFF) .
                            chr($value>>8  & 0xFF) .
                            chr($value     & 0xFF),   8  );
        }
    }



    /**
     * Returns a variable-length integer from the current
     * position in the file and advances the file pointer.
     *
     * @return integer
     */
    public function readVInt()
    {
        $nextByte = ord($this->_fread(1));
        $val = $nextByte & 0x7F;

        for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
            $nextByte = ord($this->_fread(1));
            $val |= ($nextByte & 0x7F) << $shift;
        }
        return $val;
    }

    /**
     * Writes a variable-length integer to the end of file.
     *
     * @param integer $value
     */
    public function writeVInt($value)
    {
        settype($value, 'integer');
        while ($value > 0x7F) {
            $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
            $value >>= 7;
        }
        $this->_fwrite(chr($value));
    }


    /**
     * Reads a string from the current position in the file
     * and advances the file pointer.
     *
     * @return string
     */
    public function readString()
    {
        $strlen = $this->readVInt();
        if ($strlen == 0) {
            return '';
        } else {
            /**
             * This implementation supports only Basic Multilingual Plane
             * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
             * "supplementary characters" (characters whose code points are
             * greater than 0xFFFF)
             * Java 2 represents these characters as a pair of char (16-bit)
             * values, the first from the high-surrogates range (0xD800-0xDBFF),
             * the second from the low-surrogates range (0xDC00-0xDFFF). Then
             * they are encoded as usual UTF-8 characters in six bytes.
             * Standard UTF-8 representation uses four bytes for supplementary
             * characters.
             */

            $str_val = $this->_fread($strlen);

            for ($count = 0; $count < $strlen; $count++ ) {
                if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
                    $addBytes = 1;
                    if (ord($str_val[$count]) & 0x20 ) {
                        $addBytes++;

                        // Never used. Java2 doesn't encode strings in four bytes
                        if (ord($str_val[$count]) & 0x10 ) {
                            $addBytes++;
                        }
                    }
                    $str_val .= $this->_fread($addBytes);
                    $strlen += $addBytes;

                    // Check for null character. Java2 encodes null character
                    // in two bytes.
                    if (ord($str_val[$count])   == 0xC0 &&
                        ord($str_val[$count+1]) == 0x80   ) {
                        $str_val[$count] = 0;
                        $str_val = substr($str_val,0,$count+1)
                                 . substr($str_val,$count+2);
                    }
                    $count += $addBytes;
                }
            }

            return $str_val;
        }
    }

    /**
     * Writes a string to the end of file.
     *
     * @param string $str
     * @throws Zend_Search_Lucene_Exception
     */
    public function writeString($str)
    {
        /**
         * This implementation supports only Basic Multilingual Plane
         * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
         * "supplementary characters" (characters whose code points are
         * greater than 0xFFFF)
         * Java 2 represents these characters as a pair of char (16-bit)
         * values, the first from the high-surrogates range (0xD800-0xDBFF),
         * the second from the low-surrogates range (0xDC00-0xDFFF). Then
         * they are encoded as usual UTF-8 characters in six bytes.
         * Standard UTF-8 representation uses four bytes for supplementary
         * characters.
         */

        // convert input to a string before iterating string characters
        settype($str, 'string');

        $chars = $strlen = strlen($str);
        $containNullChars = false;

        for ($count = 0; $count < $strlen; $count++ ) {
            /**
             * String is already in Java 2 representation.
             * We should only calculate actual string length and replace
             * \x00 by \xC0\x80
             */
            if ((ord($str[$count]) & 0xC0) == 0xC0) {
                $addBytes = 1;
                if (ord($str[$count]) & 0x20 ) {
                    $addBytes++;

                    // Never used. Java2 doesn't encode strings in four bytes
                    // and we dont't support non-BMP characters
                    if (ord($str[$count]) & 0x10 ) {
                        $addBytes++;
                    }
                }
                $chars -= $addBytes;

                if (ord($str[$count]) == 0 ) {
                    $containNullChars = true;
                }
                $count += $addBytes;
            }
        }

        if ($chars < 0) {
            throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
        }

        $this->writeVInt($chars);
        if ($containNullChars) {
            $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
        } else {
            $this->_fwrite($str);
        }
    }


    /**
     * Reads binary data from the current position in the file
     * and advances the file pointer.
     *
     * @return string
     */
    public function readBinary()
    {
        return $this->_fread($this->readVInt());
    }
}