204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /**
 | |
|  * Lexer adapted from Simple Test: http://sourceforge.net/projects/simpletest/
 | |
|  * For an intro to the Lexer see:
 | |
|  * https://web.archive.org/web/20120125041816/http://www.phppatterns.com/docs/develop/simple_test_lexer_notes
 | |
|  *
 | |
|  * @author Marcus Baker http://www.lastcraft.com
 | |
|  */
 | |
| 
 | |
| namespace dokuwiki\Parsing\Lexer;
 | |
| 
 | |
| /**
 | |
|  * Compounded regular expression.
 | |
|  *
 | |
|  * Any of the contained patterns could match and when one does it's label is returned.
 | |
|  */
 | |
| class ParallelRegex
 | |
| {
 | |
|     /** @var string[] patterns to match */
 | |
|     protected $patterns;
 | |
|     /** @var string[] labels for above patterns */
 | |
|     protected $labels;
 | |
|     /** @var string the compound regex matching all patterns */
 | |
|     protected $regex;
 | |
|     /** @var bool case sensitive matching? */
 | |
|     protected $case;
 | |
| 
 | |
|     /**
 | |
|      * Constructor. Starts with no patterns.
 | |
|      *
 | |
|      * @param boolean $case    True for case sensitive, false
 | |
|      *                         for insensitive.
 | |
|      */
 | |
|     public function __construct($case)
 | |
|     {
 | |
|         $this->case = $case;
 | |
|         $this->patterns = array();
 | |
|         $this->labels = array();
 | |
|         $this->regex = null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Adds a pattern with an optional label.
 | |
|      *
 | |
|      * @param mixed       $pattern Perl style regex. Must be UTF-8
 | |
|      *                             encoded. If its a string, the (, )
 | |
|      *                             lose their meaning unless they
 | |
|      *                             form part of a lookahead or
 | |
|      *                             lookbehind assertation.
 | |
|      * @param bool|string $label   Label of regex to be returned
 | |
|      *                             on a match. Label must be ASCII
 | |
|      */
 | |
|     public function addPattern($pattern, $label = true)
 | |
|     {
 | |
|         $count = count($this->patterns);
 | |
|         $this->patterns[$count] = $pattern;
 | |
|         $this->labels[$count] = $label;
 | |
|         $this->regex = null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Attempts to match all patterns at once against a string.
 | |
|      *
 | |
|      * @param string $subject      String to match against.
 | |
|      * @param string $match        First matched portion of
 | |
|      *                             subject.
 | |
|      * @return bool|string         False if no match found, label if label exists, true if not
 | |
|      */
 | |
|     public function match($subject, &$match)
 | |
|     {
 | |
|         if (count($this->patterns) == 0) {
 | |
|             return false;
 | |
|         }
 | |
|         if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | |
|             $match = "";
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         $match = $matches[0];
 | |
|         $size = count($matches);
 | |
|         // FIXME this could be made faster by storing the labels as keys in a hashmap
 | |
|         for ($i = 1; $i < $size; $i++) {
 | |
|             if ($matches[$i] && isset($this->labels[$i - 1])) {
 | |
|                 return $this->labels[$i - 1];
 | |
|             }
 | |
|         }
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Attempts to split the string against all patterns at once
 | |
|      *
 | |
|      * @param string $subject      String to match against.
 | |
|      * @param array $split         The split result: array containing, pre-match, match & post-match strings
 | |
|      * @return boolean             True on success.
 | |
|      *
 | |
|      * @author Christopher Smith <chris@jalakai.co.uk>
 | |
|      */
 | |
|     public function split($subject, &$split)
 | |
|     {
 | |
|         if (count($this->patterns) == 0) {
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         if (! preg_match($this->getCompoundedRegex(), $subject, $matches)) {
 | |
|             if (function_exists('preg_last_error')) {
 | |
|                 $err = preg_last_error();
 | |
|                 switch ($err) {
 | |
|                     case PREG_BACKTRACK_LIMIT_ERROR:
 | |
|                         msg('A PCRE backtrack error occured. Try to increase the pcre.backtrack_limit in php.ini', -1);
 | |
|                         break;
 | |
|                     case PREG_RECURSION_LIMIT_ERROR:
 | |
|                         msg('A PCRE recursion error occured. Try to increase the pcre.recursion_limit in php.ini', -1);
 | |
|                         break;
 | |
|                     case PREG_BAD_UTF8_ERROR:
 | |
|                         msg('A PCRE UTF-8 error occured. This might be caused by a faulty plugin', -1);
 | |
|                         break;
 | |
|                     case PREG_INTERNAL_ERROR:
 | |
|                         msg('A PCRE internal error occured. This might be caused by a faulty plugin', -1);
 | |
|                         break;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             $split = array($subject, "", "");
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         $idx = count($matches)-2;
 | |
|         list($pre, $post) = preg_split($this->patterns[$idx].$this->getPerlMatchingFlags(), $subject, 2);
 | |
|         $split = array($pre, $matches[0], $post);
 | |
| 
 | |
|         return isset($this->labels[$idx]) ? $this->labels[$idx] : true;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Compounds the patterns into a single
 | |
|      * regular expression separated with the
 | |
|      * "or" operator. Caches the regex.
 | |
|      * Will automatically escape (, ) and / tokens.
 | |
|      *
 | |
|      * @return null|string
 | |
|      */
 | |
|     protected function getCompoundedRegex()
 | |
|     {
 | |
|         if ($this->regex == null) {
 | |
|             $cnt = count($this->patterns);
 | |
|             for ($i = 0; $i < $cnt; $i++) {
 | |
|                 /*
 | |
|                  * decompose the input pattern into "(", "(?", ")",
 | |
|                  * "[...]", "[]..]", "[^]..]", "[...[:...:]..]", "\x"...
 | |
|                  * elements.
 | |
|                  */
 | |
|                 preg_match_all('/\\\\.|' .
 | |
|                                '\(\?|' .
 | |
|                                '[()]|' .
 | |
|                                '\[\^?\]?(?:\\\\.|\[:[^]]*:\]|[^]\\\\])*\]|' .
 | |
|                                '[^[()\\\\]+/', $this->patterns[$i], $elts);
 | |
| 
 | |
|                 $pattern = "";
 | |
|                 $level = 0;
 | |
| 
 | |
|                 foreach ($elts[0] as $elt) {
 | |
|                     /*
 | |
|                      * for "(", ")" remember the nesting level, add "\"
 | |
|                      * only to the non-"(?" ones.
 | |
|                      */
 | |
| 
 | |
|                     switch ($elt) {
 | |
|                         case '(':
 | |
|                             $pattern .= '\(';
 | |
|                             break;
 | |
|                         case ')':
 | |
|                             if ($level > 0)
 | |
|                                 $level--; /* closing (? */
 | |
|                             else $pattern .= '\\';
 | |
|                             $pattern .= ')';
 | |
|                             break;
 | |
|                         case '(?':
 | |
|                             $level++;
 | |
|                             $pattern .= '(?';
 | |
|                             break;
 | |
|                         default:
 | |
|                             if (substr($elt, 0, 1) == '\\')
 | |
|                                 $pattern .= $elt;
 | |
|                             else $pattern .= str_replace('/', '\/', $elt);
 | |
|                     }
 | |
|                 }
 | |
|                 $this->patterns[$i] = "($pattern)";
 | |
|             }
 | |
|             $this->regex = "/" . implode("|", $this->patterns) . "/" . $this->getPerlMatchingFlags();
 | |
|         }
 | |
|         return $this->regex;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      * Accessor for perl regex mode flags to use.
 | |
|      * @return string       Perl regex flags.
 | |
|      */
 | |
|     protected function getPerlMatchingFlags()
 | |
|     {
 | |
|         return ($this->case ? "msS" : "msSi");
 | |
|     }
 | |
| }
 |