<?php
/*
   v3_highlighter.inc: syntax highlighting w/ manual-links.
   Copyright (C) 2006, 2007 Kalle Räisänen

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

   (!defined('T_ML_COMMENT'))
      ? define('T_ML_COMMENT', T_COMMENT)
      : define('T_DOC_COMMENT', T_ML_COMMENT);
   defined('T_HALT_COMPILER') or
      define('T_HALT_COMPILER', T_CASE);
   defined('T_OLD_FUNCTION') or
      define('T_OLD_FUNCTION', T_HALT_COMPILER);

   require_once(V3_ROOT . '/includes/v3.inc');

   define('VH_ALL'      , 'xh-all');
   define('VH_COMMENT'  , 'xh-comment');
   define('VH_DEFAULT'  , 'xh-default');
   define('VH_FUNCTION' , 'xh-function');
   define('VH_HTML'     , 'xh-html');
   define('VH_KEYWORD'  , 'xh-keyword');
   define('VH_LINK'     , 'xh-link');
   define('VH_NUMBER'   , 'xh-number');
   define('VH_STRING'   , 'xh-string');
   define('VH_STRVAR'   , 'xh-strvar');
   define('VH_VARIABLE' , 'xh-variable');

   define('VH_SPAN_FMT' , '<span class="%s">%s</span>');
   define('VH_MAN_URL'  , '<a href="http://www.php.net/%s" class="%s">%s</a>');
   define('VH_W3C_URL'  , '<a href="http://www.w3schools.com/tags/%s" class="%s">%s</a>');

   function _x($str, $times)
   {
      return $times ? $str . _x($str, $times - 1) : '';
   }


   v3_factory::load_module('template');

   function _handle_tpl(&$token)
   {
   }

   class v3_highlighter
   {
      protected static $FUNCTIONS = null;
      protected $_filename = '';
      protected $_string   = '';
      protected $_return   = false;
      protected $_callback = false;
      protected $_override_handler = false;

      function __construct($f_or_s=false, $return=false, $callback=false, $override_handler=false)
      {
         $this->_filename         = file_exists($f_or_s) ? $f_or_s : false;
         $this->_string           = $this->_filename === false ? $f_or_s : file_get_contents($f_or_s);
         $this->_return           = $return;
         $this->_callback         = $callback;
         $this->_override_handler = $override_handler;
         if(self::$FUNCTIONS === null) {
            self::$FUNCTIONS = array_map('strtolower', _vbk(get_defined_functions(), 'internal'));
         }
      }

      public function add_handler($token_id, $handler)
      {
         $this->_callback[$token_id] = $handler;
      }

      public function add_handlers($handlers)
      {
         foreach($handlers as $token_id => $handler)
            $this->add_handler($token_id, $handler);
      }

      public function set_file($filename)
      {
         $this->_filename = $filename;
         $this->_string   = file_get_contents($filename);
      }

      public function set_string($string)
      {
         $this->_string   = $string;
      }

      protected function _ws2html($text)
      {
         $out = preg_replace('/^ /m', '&nbsp;', $text);
         $out = str_replace(array("\n",       "\t",            '  '),
                            array("<br />\n", _x('&nbsp;', 3), _x('&nbsp;', 2)),
                            $out);
         return $out;
      }

      protected function _text2html($text, $return=true)
      {
         $out = '';
         if(is_array($text)) {
            foreach($text as $t) {
               $out .= $this->_text2html($t, true);
            }
         } else {
            $out = $this->_ws2html(htmlentities($text));
         }

         return $return ? $out : print $out;
      }

      protected function _htmlspan($text, $style)
      {
         return ($style != ''
                     ? sprintf(VH_SPAN_FMT, $style, $text)
                     : $text);
      }

      protected function _mklink($href, $title, $fmt=VH_MAN_URL)
      {
         return sprintf($fmt, $href, VH_LINK, $title);
      }

      protected function _linkspan($href, $title, $style=VH_DEFAULT, $fmt=VH_MAN_URL)
      {
         return $this->_htmlspan(
            $this->_mklink($href, $title, $fmt),
            $style
         );
      }

      protected function _php_handletoken($id, $text)
      {
         static $in_hd = false;

         switch($id) {
            case T_OPEN_TAG:
            case T_CLOSE_TAG:
            case T_OPEN_TAG_WITH_ECHO:
               return $this->_linkspan('language.basic-syntax#language.basic-syntax.phpmode', $text, VH_KEYWORD);
            case T_COMMENT:
            case T_ML_COMMENT:
            case T_DOC_COMMENT:
               $lnk  = $this->_mklink('language.basic-syntax.comments', '$1');
               $text = preg_replace('#(/\*|\*/|\#|//)#', $lnk, $text);
               return $this->_htmlspan($text, VH_COMMENT);
            case T_DNUMBER:
            case T_LNUMBER:
               return $this->_htmlspan($text, VH_NUMBER);
            case T_ARRAY:
            case T_ECHO:
            case T_EMPTY:
            case T_EVAL:
            case T_EXIT:
            case T_ISSET:
            case T_LIST:
            case T_PRINT:
            case T_INCLUDE:
            case T_INCLUDE_ONCE:
            case T_REQUIRE:
            case T_REQUIRE_ONCE:
            case T_RETURN:
            case T_UNSET:
               return $this->_linkspan('function.' . $text, $text, VH_FUNCTION);
            case T_IF:  # stupid PHP.net...
               return $this->_linkspan('control-structures#control-structures.if', $text, VH_KEYWORD);
            case T_DO:
            case T_ELSE:
            case T_ELSEIF:
            case T_FOR:
            case T_FOREACH:
            case T_WHILE:
            case T_CONTINUE:
            case T_DECLARE:
            case T_BREAK:
            case T_AS:
               return $this->_linkspan(
                     'control-structures.'. ($id == T_DO ? 'do.while' : ($id == T_AS ? 'foreach' : $text)),
                     $text,
                     VH_KEYWORD
               );
            case T_SWITCH:
            case T_CASE:
            case T_DEFAULT:
               return $this->_linkspan('http://www.php.net/control-structures.switch', $text, VH_KEYWORD);
            case T_ENDFOR:
            case T_ENDFOREACH:
            case T_ENDIF:
            case T_ENDSWITCH:
            case T_ENDWHILE:
               return $this->_linkspan('control-structures.alternative-syntax', $text, VH_KEYWORD);
            case T_STRING:
               if($in_hd)
                  return $this->_php_handletoken(T_CONSTANT_ENCAPSED_STRING, $text);

               return in_array(strtolower($text), self::$FUNCTIONS)
                        ? $this->_linkspan('function.' . $text, $text, VH_FUNCTION)
                        : $this->_htmlspan($text, VH_DEFAULT);
            case T_ARRAY_CAST:
            case T_BOOL_CAST:
            case T_DOUBLE_CAST:
            case T_INT_CAST:
            case T_OBJECT_CAST:
            case T_STRING_CAST:
            case T_UNSET_CAST:
               $man = 'language.types.%s#language.types.%s.casting';
               preg_match('|\((\w+)\)|', $text, $matches);
               switch($type = strtolower($matches[1])) {
                  case 'int':
                     $type = 'integer'; break;
                  case 'bool':
                     $type = 'boolean'; break;
                  case 'double': case 'real':
                     $type = 'float';   break;
                  default:
                     # do nothing
                     break;
               }
               $link = (($type == 'unset') # (unset) is undocumented
                           ? 'function.unset'
                           : sprintf($man, $type, $type));
               return $this->_linkspan($link, $text, VH_KEYWORD);
            case T_START_HEREDOC:
            case T_END_HEREDOC:
               $in_hd = $id == T_END_HEREDOC ? false : true;

               return $this->_linkspan(
                  'language.types.string#language.types.string.syntax.heredoc',
                  $text, VH_KEYWORD
               );
            case T_CONSTANT_ENCAPSED_STRING:
            case T_ENCAPSED_AND_WHITESPACE:
               return $this->_htmlspan($text, VH_STRING);
            case T_VARIABLE:
               return $this->_htmlspan($text, VH_VARIABLE);
            case T_FILE:
            case T_LINE:
            case T_FUNC_C:
            case T_CLASS_C:
            case T_METHOD_C: # that __METHOD__ is T_METHOD_C is undocumented. Shocking, I know.
               return $this->_linkspan('language.constants.predefined', $text, VH_VARIABLE);
            case T_BOOLEAN_AND:
            case T_BOOLEAN_OR:
            case T_LOGICAL_OR:
            case T_LOGICAL_XOR:
            case T_LOGICAL_AND:
               return $this->_linkspan('language.operators.logical', $text, VH_KEYWORD);
            case T_IS_EQUAL:
            case T_IS_GREATER_OR_EQUAL:
            case T_IS_IDENTICAL:
            case T_IS_NOT_EQUAL:
            case T_IS_NOT_IDENTICAL:
            case T_IS_SMALLER_OR_EQUAL:
               return $this->_linkspan('language.operators.comparison', $text, VH_KEYWORD);
            case T_DEC:
            case T_INC:
               return $this->_linkspan('language.operators.increment', $text, VH_KEYWORD);
            case T_MINUS_EQUAL:
            case T_AND_EQUAL:
            case T_CONCAT_EQUAL:
            case T_DIV_EQUAL:
            case T_MOD_EQUAL:
            case T_MUL_EQUAL:
            case T_OR_EQUAL:
            case T_PLUS_EQUAL:
            case T_SL_EQUAL:
            case T_SR_EQUAL:
            case T_XOR_EQUAL:
               return $this->_linkspan('language.operators.assignment', $text, VH_KEYWORD);
            case T_SL:
            case T_SR:
               return $this->_linkspan('language.operators.bitwise', $text, VH_KEYWORD);
            case T_DOUBLE_ARROW:
               return $this->_linkspan('language.types.array', $text, VH_KEYWORD);
            case T_ABSTRACT:
            case T_CONST:
            case T_CLASS:
            case T_CLONE:
            case T_CONST:
            case T_EXTENDS:
            case T_FINAL:
            case T_IMPLEMENTS:
            case T_INSTANCEOF:
            case T_INTERFACE:
            case T_NEW:
            case T_OBJECT_OPERATOR:
            case T_PAAMAYIM_NEKUDOTAYIM: case T_DOUBLE_COLON:
            case T_PRIVATE:
            case T_PUBLIC:
            case T_PROTECTED:
            case T_STATIC:
            case T_VAR:
               return $this->_linkspan(
                  'language.oop' . ($id == T_VAR ? '' : '5'), $text, VH_KEYWORD
               );
            case T_THROW:
            case T_TRY:
            case T_CATCH:
               return $this->_linkspan('language.exceptions', $text, VH_KEYWORD);
            case T_GLOBAL:
               return $this->_linkspan('language.variables.scope', $text, VH_KEYWORD);
            case T_CURLY_OPEN:
            case T_ENDDECLARE:
            case T_HALT_COMPILER:
            case T_OLD_FUNCTION:
            case T_USE:
            case T_DOLLAR_OPEN_CURLY_BRACES:
               return $this->_htmlspan($text, VH_KEYWORD);
            case T_FUNCTION:
               return $this->_linkspan('language.functions', $text, VH_KEYWORD);
            case T_INLINE_HTML:
               # the following will not work for '<script>',
               # which is split into '<s' and 'cript>' by the tokeniser.
               $text = preg_replace('/(&lt;[!\/]?)(\w+)\b/',
                                    '$1<a href="http://www.w3schools.com/tags/tag_$2.asp" '.
                                     'class="'. VH_LINK . '">$2</a>',
                                    $text);
               $text = preg_replace('|(http://www.w3schools.com/tags/tag_h)[1-6](.asp)|',
                                    '$1n$2',
                                    $text);
               $regexes = array(
                  '(' . _pq(TPL_COMMENT_START) . ')(.*?)(' . _pq(TPL_COMMEND_END) . ')',
                  '(' . _pq(TPL_VAR_PREFIX) . ')([^\n]*?)(' . _pq(TPL_VAR_SUFFIX) . ')',
                  '(\/?' . _pq(TPL_SUB_PREFIX) . ')([^\n]*?)(' . _pq(TPL_SUB_SUFFIX) . ')'
               );
               $repl = $this->_htmlspan('$1' . $this->_htmlspan('$2', VH_VARIABLE) . '$3', VH_FUNCTION);

               foreach($regexes as $re)
                  $text = preg_replace("/$re/s", $repl, $text);

               return $this->_htmlspan($text, VH_HTML);
            case T_WHITESPACE:
               return ($text == '&nbsp;' ? ' ' : $text);
            default:
               return $this->_htmlspan($text, VH_DEFAULT);
         } # switch

         throw new Exception(
            sprintf(
               '%s: Can\'t happen error: token: %02x [%s]; text: "%s"',
               __METHOD__, $id, token_name($id), $text
            )
         );
      } # _php_handletoken()


      public function highlight_string(&$string, $return=false, $callbacks=false, $override_handler=false)
      {
         $out       = sprintf('<code class="%s">', VH_ALL);
         $in_string = 0;
         $tokens    = token_get_all($string);
         unset($string);

         $operators = array(
            'arithmetic'   => array('-', '+', '/', '*', '%'),
            'assignment'   => array('='),
            'bitwise'      => array('~', '^', '|', '&'),
            'comparison'   => array('>', '<', '?', ':'),
            'errorcontrol' => array('@'),
            'execution'    => array('`'),
            'logical'      => array('!'),
            'string'       => array('.')
         );

         $all_operators = array();

         foreach($operators as $type => $arr) {
            foreach($arr as $v)
               $all_operators[$v] = $type;
         }

         for($tk_cnt = 0; $tk_cnt < count($tokens); $tk_cnt++) {
            $token = $tokens[$tk_cnt];

            if(is_string($token)) { # I wish the tokeniser'd just give me IDs for everything...
               $next_token = isset($tokens[$tk_cnt + 1]) ? $tokens[$tk_cnt + 1] : false;
               $text = $this->_text2html($token);

               if($token == '"') {
                  $out .= $this->_php_handletoken(T_CONSTANT_ENCAPSED_STRING, $text);
                  $in_string ^= 1;
               } else if($token == '&' && $next_token !== false &&
                         is_array($next_token) && $next_token[0] == T_VARIABLE) {
                  # & == variable-reference, rather than bitwise and.
                  $out .= $this->_htmlspan(
                     $this->_mklink('language.references', $text),
                     VH_KEYWORD
                  );
               } else if(in_array($token, array_keys($all_operators))) {
                  $out .= $this->_linkspan(
                     'language.operators.' . $all_operators[$token],
                     $text,
                     VH_KEYWORD
                  );
               } else {
                  $out .= $this->_php_handletoken(-1, $text);
               }
            } else {
               $token[1] = $this->_text2html($token[1]);

               if(is_array($callbacks) && isset($callbacks[$token[0]])) {
                  $ret = $callbacks[$token[0]]($token);
                  if($override_handler && $ret !== false) {
                     $out .= $token[1];
                     continue;
                  }
               }
               $out .= $this->_php_handletoken(
                  $in_string ? T_CONSTANT_ENCAPSED_STRING : $token[0],
                  $token[1]
               );
            }

            if(!$return) {
               echo $out;
               $out = '';
            }
         } # for

         $out = str_replace(
            '<a href="http://www.w3schools.com/tags/tag_s.asp" class="xh-link">s</a></span><span class="'. VH_HTML .'">cript',
            $this->_mklink('tag_script.asp', 'script', VH_W3C_URL),
            $out
         );
         $out .= '</code>';

         return $return ? $out : print $out;
      } # highlight_string


      public function highlight_file($path, $return=false, $callbacks=false, $override_handler=false)
      {
         $scont = file_get_contents($path);
         return $this->highlight_string($scont, $return, $callbacks, $override_handler);
      } # highlight_file


      public function highlight()
      {
         $string = strlen($this->_string) ? $this->_string : file_get_contents($this->_filename);

         return $this->highlight_string($string, $this->_return, $this->_callback, $this->_override_handler);
      } # highlight
   }
?>