AsciiSlugger.php 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\String\Slugger;
  11. use Symfony\Component\String\AbstractUnicodeString;
  12. use Symfony\Component\String\UnicodeString;
  13. use Symfony\Contracts\Translation\LocaleAwareInterface;
  14. if (!interface_exists(LocaleAwareInterface::class)) {
  15. throw new \LogicException('You cannot use the "Symfony\Component\String\Slugger\AsciiSlugger" as the "symfony/translation-contracts" package is not installed. Try running "composer require symfony/translation-contracts".');
  16. }
  17. /**
  18. * @author Titouan Galopin <galopintitouan@gmail.com>
  19. */
  20. class AsciiSlugger implements SluggerInterface, LocaleAwareInterface
  21. {
  22. private const LOCALE_TO_TRANSLITERATOR_ID = [
  23. 'am' => 'Amharic-Latin',
  24. 'ar' => 'Arabic-Latin',
  25. 'az' => 'Azerbaijani-Latin',
  26. 'be' => 'Belarusian-Latin',
  27. 'bg' => 'Bulgarian-Latin',
  28. 'bn' => 'Bengali-Latin',
  29. 'de' => 'de-ASCII',
  30. 'el' => 'Greek-Latin',
  31. 'fa' => 'Persian-Latin',
  32. 'he' => 'Hebrew-Latin',
  33. 'hy' => 'Armenian-Latin',
  34. 'ka' => 'Georgian-Latin',
  35. 'kk' => 'Kazakh-Latin',
  36. 'ky' => 'Kirghiz-Latin',
  37. 'ko' => 'Korean-Latin',
  38. 'mk' => 'Macedonian-Latin',
  39. 'mn' => 'Mongolian-Latin',
  40. 'or' => 'Oriya-Latin',
  41. 'ps' => 'Pashto-Latin',
  42. 'ru' => 'Russian-Latin',
  43. 'sr' => 'Serbian-Latin',
  44. 'sr_Cyrl' => 'Serbian-Latin',
  45. 'th' => 'Thai-Latin',
  46. 'tk' => 'Turkmen-Latin',
  47. 'uk' => 'Ukrainian-Latin',
  48. 'uz' => 'Uzbek-Latin',
  49. 'zh' => 'Han-Latin',
  50. ];
  51. private $defaultLocale;
  52. private $symbolsMap = [
  53. 'en' => ['@' => 'at', '&' => 'and'],
  54. ];
  55. /**
  56. * Cache of transliterators per locale.
  57. *
  58. * @var \Transliterator[]
  59. */
  60. private $transliterators = [];
  61. /**
  62. * @param array|\Closure|null $symbolsMap
  63. */
  64. public function __construct(string $defaultLocale = null, $symbolsMap = null)
  65. {
  66. if (null !== $symbolsMap && !\is_array($symbolsMap) && !$symbolsMap instanceof \Closure) {
  67. throw new \TypeError(sprintf('Argument 2 passed to "%s()" must be array, Closure or null, "%s" given.', __METHOD__, \gettype($symbolsMap)));
  68. }
  69. $this->defaultLocale = $defaultLocale;
  70. $this->symbolsMap = $symbolsMap ?? $this->symbolsMap;
  71. }
  72. /**
  73. * {@inheritdoc}
  74. */
  75. public function setLocale($locale)
  76. {
  77. $this->defaultLocale = $locale;
  78. }
  79. /**
  80. * {@inheritdoc}
  81. */
  82. public function getLocale()
  83. {
  84. return $this->defaultLocale;
  85. }
  86. /**
  87. * {@inheritdoc}
  88. */
  89. public function slug(string $string, string $separator = '-', string $locale = null): AbstractUnicodeString
  90. {
  91. $locale = $locale ?? $this->defaultLocale;
  92. $transliterator = [];
  93. if ('de' === $locale || 0 === strpos($locale, 'de_')) {
  94. // Use the shortcut for German in UnicodeString::ascii() if possible (faster and no requirement on intl)
  95. $transliterator = ['de-ASCII'];
  96. } elseif (\function_exists('transliterator_transliterate') && $locale) {
  97. $transliterator = (array) $this->createTransliterator($locale);
  98. }
  99. if ($this->symbolsMap instanceof \Closure) {
  100. $symbolsMap = $this->symbolsMap;
  101. array_unshift($transliterator, static function ($s) use ($symbolsMap, $locale) {
  102. return $symbolsMap($s, $locale);
  103. });
  104. }
  105. $unicodeString = (new UnicodeString($string))->ascii($transliterator);
  106. if (\is_array($this->symbolsMap) && isset($this->symbolsMap[$locale])) {
  107. foreach ($this->symbolsMap[$locale] as $char => $replace) {
  108. $unicodeString = $unicodeString->replace($char, ' '.$replace.' ');
  109. }
  110. }
  111. return $unicodeString
  112. ->replaceMatches('/[^A-Za-z0-9]++/', $separator)
  113. ->trim($separator)
  114. ;
  115. }
  116. private function createTransliterator(string $locale): ?\Transliterator
  117. {
  118. if (\array_key_exists($locale, $this->transliterators)) {
  119. return $this->transliterators[$locale];
  120. }
  121. // Exact locale supported, cache and return
  122. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$locale] ?? null) {
  123. return $this->transliterators[$locale] = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  124. }
  125. // Locale not supported and no parent, fallback to any-latin
  126. if (false === $str = strrchr($locale, '_')) {
  127. return $this->transliterators[$locale] = null;
  128. }
  129. // Try to use the parent locale (ie. try "de" for "de_AT") and cache both locales
  130. $parent = substr($locale, 0, -\strlen($str));
  131. if ($id = self::LOCALE_TO_TRANSLITERATOR_ID[$parent] ?? null) {
  132. $transliterator = \Transliterator::create($id.'/BGN') ?? \Transliterator::create($id);
  133. }
  134. return $this->transliterators[$locale] = $this->transliterators[$parent] = $transliterator ?? null;
  135. }
  136. }