PseudoLocalizationTranslator.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation;
  11. use Symfony\Contracts\Translation\TranslatorInterface;
  12. /**
  13. * This translator should only be used in a development environment.
  14. */
  15. final class PseudoLocalizationTranslator implements TranslatorInterface
  16. {
  17. private const EXPANSION_CHARACTER = '~';
  18. private $translator;
  19. private $accents;
  20. private $expansionFactor;
  21. private $brackets;
  22. private $parseHTML;
  23. private $localizableHTMLAttributes;
  24. /**
  25. * Available options:
  26. * * accents:
  27. * type: boolean
  28. * default: true
  29. * description: replace ASCII characters of the translated string with accented versions or similar characters
  30. * example: if true, "foo" => "ƒöö".
  31. *
  32. * * expansion_factor:
  33. * type: float
  34. * default: 1
  35. * validation: it must be greater than or equal to 1
  36. * description: expand the translated string by the given factor with spaces and tildes
  37. * example: if 2, "foo" => "~foo ~"
  38. *
  39. * * brackets:
  40. * type: boolean
  41. * default: true
  42. * description: wrap the translated string with brackets
  43. * example: if true, "foo" => "[foo]"
  44. *
  45. * * parse_html:
  46. * type: boolean
  47. * default: false
  48. * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
  49. * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
  50. *
  51. * * localizable_html_attributes:
  52. * type: string[]
  53. * default: []
  54. * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
  55. * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
  56. */
  57. public function __construct(TranslatorInterface $translator, array $options = [])
  58. {
  59. $this->translator = $translator;
  60. $this->accents = $options['accents'] ?? true;
  61. if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
  62. throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
  63. }
  64. $this->brackets = $options['brackets'] ?? true;
  65. $this->parseHTML = $options['parse_html'] ?? false;
  66. if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
  67. $this->parseHTML = false;
  68. }
  69. $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
  70. }
  71. /**
  72. * {@inheritdoc}
  73. */
  74. public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null)
  75. {
  76. $trans = '';
  77. $visibleText = '';
  78. foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
  79. if ($visible) {
  80. $visibleText .= $text;
  81. }
  82. if (!$localizable) {
  83. $trans .= $text;
  84. continue;
  85. }
  86. $this->addAccents($trans, $text);
  87. }
  88. $this->expand($trans, $visibleText);
  89. $this->addBrackets($trans);
  90. return $trans;
  91. }
  92. private function getParts(string $originalTrans): array
  93. {
  94. if (!$this->parseHTML) {
  95. return [[true, true, $originalTrans]];
  96. }
  97. $html = mb_convert_encoding($originalTrans, 'HTML-ENTITIES', mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
  98. $useInternalErrors = libxml_use_internal_errors(true);
  99. $dom = new \DOMDocument();
  100. $dom->loadHTML('<trans>'.$html.'</trans>');
  101. libxml_clear_errors();
  102. libxml_use_internal_errors($useInternalErrors);
  103. return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
  104. }
  105. private function parseNode(\DOMNode $node): array
  106. {
  107. $parts = [];
  108. foreach ($node->childNodes as $childNode) {
  109. if (!$childNode instanceof \DOMElement) {
  110. $parts[] = [true, true, $childNode->nodeValue];
  111. continue;
  112. }
  113. $parts[] = [false, false, '<'.$childNode->tagName];
  114. /** @var \DOMAttr $attribute */
  115. foreach ($childNode->attributes as $attribute) {
  116. $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
  117. $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
  118. foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
  119. if ('' === $match) {
  120. continue;
  121. }
  122. $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
  123. }
  124. $parts[] = [false, false, '"'];
  125. }
  126. $parts[] = [false, false, '>'];
  127. $parts = array_merge($parts, $this->parseNode($childNode, $parts));
  128. $parts[] = [false, false, '</'.$childNode->tagName.'>'];
  129. }
  130. return $parts;
  131. }
  132. private function addAccents(string &$trans, string $text): void
  133. {
  134. $trans .= $this->accents ? strtr($text, [
  135. ' ' => ' ',
  136. '!' => '¡',
  137. '"' => '″',
  138. '#' => '♯',
  139. '$' => '€',
  140. '%' => '‰',
  141. '&' => '⅋',
  142. '\'' => '´',
  143. '(' => '{',
  144. ')' => '}',
  145. '*' => '⁎',
  146. '+' => '⁺',
  147. ',' => '،',
  148. '-' => '‐',
  149. '.' => '·',
  150. '/' => '⁄',
  151. '0' => '⓪',
  152. '1' => '①',
  153. '2' => '②',
  154. '3' => '③',
  155. '4' => '④',
  156. '5' => '⑤',
  157. '6' => '⑥',
  158. '7' => '⑦',
  159. '8' => '⑧',
  160. '9' => '⑨',
  161. ':' => '∶',
  162. ';' => '⁏',
  163. '<' => '≤',
  164. '=' => '≂',
  165. '>' => '≥',
  166. '?' => '¿',
  167. '@' => '՞',
  168. 'A' => 'Å',
  169. 'B' => 'Ɓ',
  170. 'C' => 'Ç',
  171. 'D' => 'Ð',
  172. 'E' => 'É',
  173. 'F' => 'Ƒ',
  174. 'G' => 'Ĝ',
  175. 'H' => 'Ĥ',
  176. 'I' => 'Î',
  177. 'J' => 'Ĵ',
  178. 'K' => 'Ķ',
  179. 'L' => 'Ļ',
  180. 'M' => 'Ṁ',
  181. 'N' => 'Ñ',
  182. 'O' => 'Ö',
  183. 'P' => 'Þ',
  184. 'Q' => 'Ǫ',
  185. 'R' => 'Ŕ',
  186. 'S' => 'Š',
  187. 'T' => 'Ţ',
  188. 'U' => 'Û',
  189. 'V' => 'Ṽ',
  190. 'W' => 'Ŵ',
  191. 'X' => 'Ẋ',
  192. 'Y' => 'Ý',
  193. 'Z' => 'Ž',
  194. '[' => '⁅',
  195. '\\' => '∖',
  196. ']' => '⁆',
  197. '^' => '˄',
  198. '_' => '‿',
  199. '`' => '‵',
  200. 'a' => 'å',
  201. 'b' => 'ƀ',
  202. 'c' => 'ç',
  203. 'd' => 'ð',
  204. 'e' => 'é',
  205. 'f' => 'ƒ',
  206. 'g' => 'ĝ',
  207. 'h' => 'ĥ',
  208. 'i' => 'î',
  209. 'j' => 'ĵ',
  210. 'k' => 'ķ',
  211. 'l' => 'ļ',
  212. 'm' => 'ɱ',
  213. 'n' => 'ñ',
  214. 'o' => 'ö',
  215. 'p' => 'þ',
  216. 'q' => 'ǫ',
  217. 'r' => 'ŕ',
  218. 's' => 'š',
  219. 't' => 'ţ',
  220. 'u' => 'û',
  221. 'v' => 'ṽ',
  222. 'w' => 'ŵ',
  223. 'x' => 'ẋ',
  224. 'y' => 'ý',
  225. 'z' => 'ž',
  226. '{' => '(',
  227. '|' => '¦',
  228. '}' => ')',
  229. '~' => '˞',
  230. ]) : $text;
  231. }
  232. private function expand(string &$trans, string $visibleText): void
  233. {
  234. if (1.0 >= $this->expansionFactor) {
  235. return;
  236. }
  237. $visibleLength = $this->strlen($visibleText);
  238. $missingLength = (int) (ceil($visibleLength * $this->expansionFactor)) - $visibleLength;
  239. if ($this->brackets) {
  240. $missingLength -= 2;
  241. }
  242. if (0 >= $missingLength) {
  243. return;
  244. }
  245. $words = [];
  246. $wordsCount = 0;
  247. foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
  248. $wordLength = $this->strlen($word);
  249. if ($wordLength >= $missingLength) {
  250. continue;
  251. }
  252. if (!isset($words[$wordLength])) {
  253. $words[$wordLength] = 0;
  254. }
  255. ++$words[$wordLength];
  256. ++$wordsCount;
  257. }
  258. if (!$words) {
  259. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  260. return;
  261. }
  262. arsort($words, \SORT_NUMERIC);
  263. $longestWordLength = max(array_keys($words));
  264. while (true) {
  265. $r = mt_rand(1, $wordsCount);
  266. foreach ($words as $length => $count) {
  267. $r -= $count;
  268. if ($r <= 0) {
  269. break;
  270. }
  271. }
  272. $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
  273. $missingLength -= $length + 1;
  274. if (0 === $missingLength) {
  275. return;
  276. }
  277. while ($longestWordLength >= $missingLength) {
  278. $wordsCount -= $words[$longestWordLength];
  279. unset($words[$longestWordLength]);
  280. if (!$words) {
  281. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  282. return;
  283. }
  284. $longestWordLength = max(array_keys($words));
  285. }
  286. }
  287. }
  288. private function addBrackets(string &$trans): void
  289. {
  290. if (!$this->brackets) {
  291. return;
  292. }
  293. $trans = '['.$trans.']';
  294. }
  295. private function strlen(string $s): int
  296. {
  297. return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
  298. }
  299. }