1: <?php
2: namespace Yep\Tokenizer;
3:
4: !defined('T_POW') && define('T_POW', 1001);
5: !defined('T_ELLIPSIS') && define('T_ELLIPSIS', 1002);
6: !defined('T_POW_EQUAL') && define('T_POW_EQUAL', 1003);
7: !defined('T_CHARACTER') && define('T_CHARACTER', 1004);
8: !defined('T_BAD_CHARACTER') && define('T_BAD_CHARACTER', 1005);
9:
10: class PhpTokenizer implements ITokenizer {
11: const
12: TOKEN_UNKNOWN = null,
13: TOKEN_POW = T_POW, TOKEN_ELLIPSIS = T_ELLIPSIS, TOKEN_POW_EQUAL = T_POW_EQUAL, TOKEN_REQUIRE_ONCE = T_REQUIRE_ONCE, TOKEN_REQUIRE = T_REQUIRE, TOKEN_EVAL = T_EVAL,
14: TOKEN_INCLUDE_ONCE = T_INCLUDE_ONCE, TOKEN_INCLUDE = T_INCLUDE, TOKEN_LOGICAL_OR = T_LOGICAL_OR, TOKEN_LOGICAL_XOR = T_LOGICAL_XOR, TOKEN_LOGICAL_AND = T_LOGICAL_AND,
15: TOKEN_PRINT = T_PRINT, TOKEN_SR_EQUAL = T_SR_EQUAL, TOKEN_SL_EQUAL = T_SL_EQUAL, TOKEN_XOR_EQUAL = T_XOR_EQUAL, TOKEN_OR_EQUAL = T_OR_EQUAL, TOKEN_AND_EQUAL = T_AND_EQUAL,
16: TOKEN_MOD_EQUAL = T_MOD_EQUAL, TOKEN_CONCAT_EQUAL = T_CONCAT_EQUAL, TOKEN_DIV_EQUAL = T_DIV_EQUAL, TOKEN_MUL_EQUAL = T_MUL_EQUAL, TOKEN_MINUS_EQUAL = T_MINUS_EQUAL,
17: TOKEN_PLUS_EQUAL = T_PLUS_EQUAL, TOKEN_BOOLEAN_OR = T_BOOLEAN_OR, TOKEN_BOOLEAN_AND = T_BOOLEAN_AND, TOKEN_IS_NOT_IDENTICAL = T_IS_NOT_IDENTICAL, TOKEN_IS_IDENTICAL = T_IS_IDENTICAL,
18: TOKEN_IS_NOT_EQUAL = T_IS_NOT_EQUAL, TOKEN_IS_EQUAL = T_IS_EQUAL, TOKEN_IS_GREATER_OR_EQUAL = T_IS_GREATER_OR_EQUAL, TOKEN_IS_SMALLER_OR_EQUAL = T_IS_SMALLER_OR_EQUAL, TOKEN_SR = T_SR,
19: TOKEN_SL = T_SL, TOKEN_INSTANCEOF = T_INSTANCEOF, TOKEN_UNSET_CAST = T_UNSET_CAST, TOKEN_BOOL_CAST = T_BOOL_CAST, TOKEN_OBJECT_CAST = T_OBJECT_CAST, TOKEN_ARRAY_CAST = T_ARRAY_CAST,
20: TOKEN_STRING_CAST = T_STRING_CAST, TOKEN_DOUBLE_CAST = T_DOUBLE_CAST, TOKEN_INT_CAST = T_INT_CAST, TOKEN_DEC = T_DEC, TOKEN_INC = T_INC, TOKEN_CLONE = T_CLONE, TOKEN_NEW = T_NEW,
21: TOKEN_EXIT = T_EXIT, TOKEN_IF = T_IF, TOKEN_ELSEIF = T_ELSEIF, TOKEN_ELSE = T_ELSE, TOKEN_ENDIF = T_ENDIF, TOKEN_LNUMBER = T_LNUMBER, TOKEN_DNUMBER = T_DNUMBER, TOKEN_STRING = T_STRING,
22: TOKEN_STRING_VARNAME = T_STRING_VARNAME, TOKEN_VARIABLE = T_VARIABLE, TOKEN_NUM_STRING = T_NUM_STRING, TOKEN_INLINE_HTML = T_INLINE_HTML, TOKEN_CHARACTER = T_CHARACTER,
23: TOKEN_BAD_CHARACTER = T_BAD_CHARACTER, TOKEN_ENCAPSED_AND_WHITESPACE = T_ENCAPSED_AND_WHITESPACE, TOKEN_CONSTANT_ENCAPSED_STRING = T_CONSTANT_ENCAPSED_STRING, TOKEN_ECHO = T_ECHO,
24: TOKEN_DO = T_DO, TOKEN_WHILE = T_WHILE, TOKEN_ENDWHILE = T_ENDWHILE, TOKEN_FOR = T_FOR, TOKEN_ENDFOR = T_ENDFOR, TOKEN_FOREACH = T_FOREACH, TOKEN_ENDFOREACH = T_ENDFOREACH,
25: TOKEN_DECLARE = T_DECLARE, TOKEN_ENDDECLARE = T_ENDDECLARE, TOKEN_AS = T_AS, TOKEN_SWITCH = T_SWITCH, TOKEN_ENDSWITCH = T_ENDSWITCH, TOKEN_CASE = T_CASE, TOKEN_DEFAULT = T_DEFAULT,
26: TOKEN_BREAK = T_BREAK, TOKEN_CONTINUE = T_CONTINUE, TOKEN_GOTO = T_GOTO, TOKEN_FUNCTION = T_FUNCTION, TOKEN_CONST = T_CONST, TOKEN_RETURN = T_RETURN, TOKEN_YIELD = T_YIELD,
27: TOKEN_TRY = T_TRY, TOKEN_CATCH = T_CATCH, TOKEN_FINALLY = T_FINALLY, TOKEN_THROW = T_THROW, TOKEN_USE = T_USE, TOKEN_INSTEADOF = T_INSTEADOF, TOKEN_GLOBAL = T_GLOBAL,
28: TOKEN_PUBLIC = T_PUBLIC, TOKEN_PROTECTED = T_PROTECTED, TOKEN_PRIVATE = T_PRIVATE, TOKEN_FINAL = T_FINAL, TOKEN_ABSTRACT = T_ABSTRACT, TOKEN_STATIC = T_STATIC, TOKEN_VAR = T_VAR,
29: TOKEN_UNSET = T_UNSET, TOKEN_ISSET = T_ISSET, TOKEN_EMPTY = T_EMPTY, TOKEN_HALT_COMPILER = T_HALT_COMPILER, TOKEN_CLASS = T_CLASS, TOKEN_TRAIT = T_TRAIT, TOKEN_INTERFACE = T_INTERFACE,
30: TOKEN_EXTENDS = T_EXTENDS, TOKEN_IMPLEMENTS = T_IMPLEMENTS, TOKEN_OBJECT_OPERATOR = T_OBJECT_OPERATOR, TOKEN_DOUBLE_ARROW = T_DOUBLE_ARROW, TOKEN_LIST = T_LIST, TOKEN_ARRAY = T_ARRAY,
31: TOKEN_CALLABLE = T_CALLABLE, TOKEN_CLASS_C = T_CLASS_C, TOKEN_TRAIT_C = T_TRAIT_C, TOKEN_METHOD_C = T_METHOD_C, TOKEN_FUNC_C = T_FUNC_C, TOKEN_LINE = T_LINE, TOKEN_FILE = T_FILE,
32: TOKEN_COMMENT = T_COMMENT, TOKEN_DOC_COMMENT = T_DOC_COMMENT, TOKEN_OPEN_TAG = T_OPEN_TAG, TOKEN_OPEN_TAG_WITH_ECHO = T_OPEN_TAG_WITH_ECHO, TOKEN_CLOSE_TAG = T_CLOSE_TAG,
33: TOKEN_WHITESPACE = T_WHITESPACE, TOKEN_START_HEREDOC = T_START_HEREDOC, TOKEN_END_HEREDOC = T_END_HEREDOC, TOKEN_DOLLAR_OPEN_CURLY_BRACES = T_DOLLAR_OPEN_CURLY_BRACES,
34: TOKEN_CURLY_OPEN = T_CURLY_OPEN, TOKEN_PAAMAYIM_NEKUDOTAYIM = T_PAAMAYIM_NEKUDOTAYIM, TOKEN_NAMESPACE = T_NAMESPACE, TOKEN_NS_C = T_NS_C, TOKEN_DIR = T_DIR,
35: TOKEN_NS_SEPARATOR = T_NS_SEPARATOR, TOKEN_DOUBLE_COLON = T_DOUBLE_COLON;
36:
37: public function tokenize($code) {
38: $php_tokens = token_get_all($code);
39: $length = 0;
40: $tokens = [];
41:
42: foreach ($php_tokens as $php_token_i => $php_token) {
43: $token = [self::POSITION => $php_token_i];
44:
45: if (!is_array($php_token)) {
46: $token[self::TYPE] = self::TOKEN_UNKNOWN;
47: $token[self::VALUE] = $php_token;
48: }
49: else {
50: $token[self::TYPE] = $php_token[0];
51: $token[self::VALUE] = $php_token[1];
52: }
53:
54: $token[self::OFFSET] = $length;
55: $length += $token[self::LENGTH] = mb_strlen($token[self::VALUE]);
56:
57: $tokens[] = $token;
58: }
59:
60: return $tokens;
61: }
62: }
63: