#!/usr/bin/env php * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ require __DIR__.'/vendor/autoload.php'; use Symfony\Component\Filesystem\Filesystem; use Symfony\Component\Finder\Finder; use Symfony\Component\VarExporter\VarExporter; Builder::cleanTarget(); $emojisCodePoints = Builder::getEmojisCodePoints(); Builder::saveRules(Builder::buildRules($emojisCodePoints)); final class Builder { private const TARGET_DIR = __DIR__.'/../src/data/emoji/'; public static function getEmojisCodePoints(): array { $lines = file(__DIR__.'/vendor/unicode-org/cldr/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/emoji/emoji-test.txt'); $emojisCodePoints = []; foreach ($lines as $line) { $line = trim($line); if (!$line || str_starts_with($line, '#')) { if (str_starts_with($line, '# group:')) { $group = ltrim(explode(':', $line)[1]); if ($group == "Activities") { $group = "Activities"; } else if ($group == "Animals & Nature") { $group = "Nature"; } else if ($group == "Component") { $group = "Component"; } else if ($group == "Flags") { $group = "Flags"; } else if ($group == "Food & Drink") { $group = "Food"; } else if ($group == "Objects") { $group = "Objects"; } else if ($group == "People & Body") { $group = "People"; } else if ($group == "Smileys & Emotion") { $group = "Smileys"; } else if ($group == "Symbols") { $group = "Symbols"; } else if ($group == "Travel & Places") { $group = "Travel"; } else { echo "Group not found"; } } continue; } // 263A FE0F ; fully-qualified # ☺️ E0.6 smiling face preg_match('{^(?[\w ]+) +; [\w-]+ +# (?.+) E\d+\.\d+ ?(?.+)$}Uu', $line, $matches); if (!$matches) { throw new \DomainException("Could not parse line: \"$line\"."); } $codePoints = strtolower(trim($matches['codePoints'])); $emojisCodePoints[$codePoints] = [$matches['emoji'], $group]; // We also add a version without the "Zero Width Joiner" $codePoints = str_replace('200d ', '', $codePoints); $emojisCodePoints[$codePoints] = [$matches['emoji'], $group]; } return $emojisCodePoints; } public static function buildRules(array $emojisCodePoints): Generator { $files = (new Finder()) ->files() ->in([ // __DIR__.'/vendor/unicode-org/cldr/common/annotationsDerived', __DIR__.'/vendor/unicode-org/cldr/common/annotations', ]) ->name('*.xml') ; $ignored = []; $mapsByLocale = []; foreach ($files as $file) { $locale = $file->getBasename('.xml'); $document = new DOMDocument(); $document->loadXML(file_get_contents($file)); $xpath = new DOMXPath($document); $results = $xpath->query('.//annotation[@type="tts"]'); foreach ($results as $result) { $emoji = $result->getAttribute('cp'); $name = $result->textContent; $parts = preg_split('//u', $emoji, -1, \PREG_SPLIT_NO_EMPTY); $emojiCodePoints = implode(' ', array_map('dechex', array_map('mb_ord', $parts))); if (!array_key_exists($emojiCodePoints, $emojisCodePoints)) { $ignored[] = [ 'locale' => $locale, 'emoji' => $emoji, 'name' => $name, ]; continue; } self::testEmoji($emoji, $locale); $codePointsCount = mb_strlen($emoji); $mapsByLocale[$locale][$codePointsCount][$emoji] = [$name, $emojisCodePoints[$emojiCodePoints][1]]; } } ksort($mapsByLocale); foreach ($mapsByLocale as $locale => $maps) { $parentLocale = $locale; while (false !== $i = strrpos($parentLocale, '_')) { $parentLocale = substr($parentLocale, 0, $i); $maps += $mapsByLocale[$parentLocale] ?? []; } yield strtolower($locale) => self::createRules($maps); } } public static function cleanTarget(): void { $fs = new Filesystem(); $fs->remove(self::TARGET_DIR); $fs->mkdir(self::TARGET_DIR); } public static function saveRules(iterable $rulesByLocale): void { $firstChars = []; foreach ($rulesByLocale as $locale => $rules) { $rulesOutput = ''; foreach ($rules as $rule => [$text, $group]) { $emojiSequence = ''; $chars = mb_str_split($rule); foreach ($chars as $char) { $emojiSequence .= sprintf('\U%08X', mb_ord($char)); } $rulesOutput .= ' _emojis[EmojiModel::' . $group . '].append(Emoji{QString::fromUtf8("' . $emojiSequence . '"), QStringLiteral("' . $text . "\")});\n"; } file_put_contents(self::TARGET_DIR."/$locale.cpp", "// SPDX-FileCopyrightText: None\n// SPDX-License-Identifier: LGPL-2.0-or-later\n// This file is auto-generated. All changes will be lost. See tools/README.md\n// clang-format off\n\n#include \n#include \n#include \"../../emojimap.h\"\n\nclass ${locale}EmojiMap: public EmojiMap {\n\npublic:\n QHash> langEmojiMap()\n {\n QHash> _emojis;\n" . $rulesOutput . "};\n"); foreach ($rules as $k => $v) { $firstChars[$k[0]] = $k[0]; } } sort($firstChars); $quickCheck = '"'.str_replace('%', '\\x', rawurlencode(implode('', $firstChars))).'"'; } private static function testEmoji(string $emoji, string $locale): void { if (!\Transliterator::createFromRules("\\$emoji > test ;")) { throw new \RuntimeException(sprintf('Could not create transliterator for "%s" in "%s" locale. Error: "%s".', $emoji, $locale, intl_get_error_message())); } } private static function createRules(array $maps): array { // We must sort the maps by the number of code points, because the order really matters: // 🫶🏼 must be before 🫶 krsort($maps); $maps = array_merge(...$maps); return $maps; } }