EmailLexer.php 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. <?php
  2. namespace Egulias\EmailValidator;
  3. use Doctrine\Common\Lexer\AbstractLexer;
  4. class EmailLexer extends AbstractLexer
  5. {
  6. //ASCII values
  7. const C_DEL = 127;
  8. const C_NUL = 0;
  9. const S_AT = 64;
  10. const S_BACKSLASH = 92;
  11. const S_DOT = 46;
  12. const S_DQUOTE = 34;
  13. const S_OPENPARENTHESIS = 49;
  14. const S_CLOSEPARENTHESIS = 261;
  15. const S_OPENBRACKET = 262;
  16. const S_CLOSEBRACKET = 263;
  17. const S_HYPHEN = 264;
  18. const S_COLON = 265;
  19. const S_DOUBLECOLON = 266;
  20. const S_SP = 267;
  21. const S_HTAB = 268;
  22. const S_CR = 269;
  23. const S_LF = 270;
  24. const S_IPV6TAG = 271;
  25. const S_LOWERTHAN = 272;
  26. const S_GREATERTHAN = 273;
  27. const S_COMMA = 274;
  28. const S_SEMICOLON = 275;
  29. const S_OPENQBRACKET = 276;
  30. const S_CLOSEQBRACKET = 277;
  31. const S_SLASH = 278;
  32. const S_EMPTY = null;
  33. const GENERIC = 300;
  34. const CRLF = 301;
  35. const INVALID = 302;
  36. const ASCII_INVALID_FROM = 127;
  37. const ASCII_INVALID_TO = 199;
  38. /**
  39. * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
  40. *
  41. * @var array
  42. */
  43. protected $charValue = array(
  44. '(' => self::S_OPENPARENTHESIS,
  45. ')' => self::S_CLOSEPARENTHESIS,
  46. '<' => self::S_LOWERTHAN,
  47. '>' => self::S_GREATERTHAN,
  48. '[' => self::S_OPENBRACKET,
  49. ']' => self::S_CLOSEBRACKET,
  50. ':' => self::S_COLON,
  51. ';' => self::S_SEMICOLON,
  52. '@' => self::S_AT,
  53. '\\' => self::S_BACKSLASH,
  54. '/' => self::S_SLASH,
  55. ',' => self::S_COMMA,
  56. '.' => self::S_DOT,
  57. '"' => self::S_DQUOTE,
  58. '-' => self::S_HYPHEN,
  59. '::' => self::S_DOUBLECOLON,
  60. ' ' => self::S_SP,
  61. "\t" => self::S_HTAB,
  62. "\r" => self::S_CR,
  63. "\n" => self::S_LF,
  64. "\r\n" => self::CRLF,
  65. 'IPv6' => self::S_IPV6TAG,
  66. '{' => self::S_OPENQBRACKET,
  67. '}' => self::S_CLOSEQBRACKET,
  68. '' => self::S_EMPTY,
  69. '\0' => self::C_NUL,
  70. );
  71. protected $hasInvalidTokens = false;
  72. protected $previous;
  73. private static $nullToken = [
  74. 'value' => '',
  75. 'type' => null,
  76. 'position' => 0,
  77. ];
  78. public function __construct()
  79. {
  80. $this->previous = $this->token = self::$nullToken;
  81. }
  82. /**
  83. * @return void
  84. */
  85. public function reset()
  86. {
  87. $this->hasInvalidTokens = false;
  88. parent::reset();
  89. $this->previous = $this->token = self::$nullToken;
  90. }
  91. public function hasInvalidTokens()
  92. {
  93. return $this->hasInvalidTokens;
  94. }
  95. /**
  96. * @param string $type
  97. * @throws \UnexpectedValueException
  98. * @return boolean
  99. */
  100. public function find($type)
  101. {
  102. $search = clone $this;
  103. $search->skipUntil($type);
  104. if (!$search->lookahead) {
  105. throw new \UnexpectedValueException($type . ' not found');
  106. }
  107. return true;
  108. }
  109. /**
  110. * getPrevious
  111. *
  112. * @return array token
  113. */
  114. public function getPrevious()
  115. {
  116. return $this->previous;
  117. }
  118. /**
  119. * moveNext
  120. *
  121. * @return boolean
  122. */
  123. public function moveNext()
  124. {
  125. $this->previous = $this->token;
  126. $hasNext = parent::moveNext();
  127. $this->token = $this->token ?: self::$nullToken;
  128. return $hasNext;
  129. }
  130. /**
  131. * Lexical catchable patterns.
  132. *
  133. * @return string[]
  134. */
  135. protected function getCatchablePatterns()
  136. {
  137. return array(
  138. '[a-zA-Z_]+[46]?', //ASCII and domain literal
  139. '[^\x00-\x7F]', //UTF-8
  140. '[0-9]+',
  141. '\r\n',
  142. '::',
  143. '\s+?',
  144. '.',
  145. );
  146. }
  147. /**
  148. * Lexical non-catchable patterns.
  149. *
  150. * @return string[]
  151. */
  152. protected function getNonCatchablePatterns()
  153. {
  154. return array('[\xA0-\xff]+');
  155. }
  156. /**
  157. * Retrieve token type. Also processes the token value if necessary.
  158. *
  159. * @param string $value
  160. * @throws \InvalidArgumentException
  161. * @return integer
  162. */
  163. protected function getType(&$value)
  164. {
  165. if ($this->isNullType($value)) {
  166. return self::C_NUL;
  167. }
  168. if ($this->isValid($value)) {
  169. return $this->charValue[$value];
  170. }
  171. if ($this->isUTF8Invalid($value)) {
  172. $this->hasInvalidTokens = true;
  173. return self::INVALID;
  174. }
  175. return self::GENERIC;
  176. }
  177. protected function isValid($value)
  178. {
  179. if (isset($this->charValue[$value])) {
  180. return true;
  181. }
  182. return false;
  183. }
  184. /**
  185. * @param string $value
  186. * @return bool
  187. */
  188. protected function isNullType($value)
  189. {
  190. if ($value === "\0") {
  191. return true;
  192. }
  193. return false;
  194. }
  195. /**
  196. * @param string $value
  197. * @return bool
  198. */
  199. protected function isUTF8Invalid($value)
  200. {
  201. if (preg_match('/\p{Cc}+/u', $value)) {
  202. return true;
  203. }
  204. return false;
  205. }
  206. /**
  207. * @return string
  208. */
  209. protected function getModifiers()
  210. {
  211. return 'iu';
  212. }
  213. }