Overview

Namespaces

  • Yep
    • Tokenizer

Classes

  • Yep\Tokenizer\PhpTokenizer
  • Yep\Tokenizer\TokenIterator
  • Yep\Tokenizer\Tokenizer

Interfaces

  • Yep\Tokenizer\ITokenizer

Exceptions

  • Yep\Tokenizer\UnexpectedTokenException
  • Overview
  • Namespace
  • Class
 1: <?php
 2: namespace Yep\Tokenizer;
 3: 
 4: class Tokenizer implements ITokenizer {
 5:     protected $regexp;
 6:     protected $keys;
 7:     protected $count = 0;
 8: 
 9:     /**
10:      * Tokenizer constructor
11:      *
12:      * @param array  $patterns
13:      * @param string $flags
14:      */
15:     public function __construct(array $patterns, $flags = '') {
16:         $tmp = [];
17: 
18:         foreach ($patterns as $pattern_key => $pattern_value) {
19:             $tmp[] = "?<$pattern_key>$pattern_value";
20:         }
21: 
22:         $this->regexp = '~(' . implode(')|(', $tmp) . ')~Au' . $flags;
23:         $this->keys = array_keys($patterns);
24:         $this->count = count($patterns);
25:     }
26: 
27:     /**
28:      * Tokenize input
29:      *
30:      * @param string $input
31:      * @return array
32:      * @throws UnexpectedTokenException
33:      */
34:     public function tokenize($input) {
35:         preg_match_all($this->regexp, $input, $tokens, PREG_SET_ORDER);
36:         $length = 0;
37: 
38:         foreach ($tokens as $token_i => &$token) {
39:             $type = null;
40: 
41:             for ($i = 0; $i < $this->count; $i++) {
42:                 if (isset($this->keys[$i], $token[$this->keys[$i]]) && $token[$this->keys[$i]] != null) {
43:                     $type = $this->keys[$i];
44:                     break;
45:                 }
46:             }
47: 
48:             $token = [
49:                 self::TYPE     => $type,
50:                 self::VALUE    => $token[0],
51:                 self::POSITION => $token_i,
52:                 self::OFFSET   => $length,
53:             ];
54: 
55:             $length += $token[self::LENGTH] = mb_strlen($token[self::VALUE]);
56:         }
57: 
58:         if ($length !== mb_strlen($input)) {
59:             $text = mb_substr($input, 0, $length);
60:             $line = mb_substr_count($text, "\n") + 1;
61:             $col = $length - mb_strrpos("\n$text", "\n") + 1;
62:             $token = str_replace("\n", '\n', mb_substr($input, $length, 10));
63: 
64:             throw new UnexpectedTokenException(sprintf('Unexpected "%s" on line %d, column %d.', $token, $line, $col));
65:         }
66: 
67:         return $tokens;
68:     }
69: 
70: }
71: 
API documentation generated by ApiGen