Files
neochat/tools/update-emojis.py
Volker Krause 9f64457521 Optimize compile time of emoji data maps
Compiling the two files including the emoji data now doesn't take
significantly longer anymore than before. Size of the .text section
also drops by more than 1Mb, while runtime or memory cost remains
unchanged.

There's more that could be done here, but not having to wait minutes
for those files to build is worth it on its own already.
2026-02-16 14:38:14 -05:00

102 lines
3.6 KiB
Python
Executable File

#!/bin/python
# SPDX-FileCopyrightText: 2022 Tobias Fella <tobias.fella@kde.org>
# SPDX-FileCopyrightText: 2022 Gary Wang <wzc782970009@gmail.com>
# SPDX-License-Identifier: BSD-2-Clause
import requests
import re
def escape_sequence(unicode_str: str, codepoint_spliter: str) -> str:
codepoints = unicode_str.split(codepoint_spliter)
escape_sequence = ""
for codepoint in codepoints:
escape_sequence += "\\U" + codepoint.rjust(8, "0")
return escape_sequence
# GitLab uses the emoji shortnames from Gemojione
# See also: https://docs.gitlab.com/ee/development/fe_guide/emojis.html
gemojione = requests.get('https://raw.githubusercontent.com/bonusly/gemojione/master/config/index.json')
emoji_unicode_shortname_map = {}
gemojione_json = gemojione.json()
for (shortcode, props) in gemojione_json.items():
escaped_sequence = escape_sequence(props['unicode'], "-")
emoji_unicode_shortname_map[escaped_sequence] = shortcode
response = requests.get('https://unicode.org/Public/emoji/14.0/emoji-test.txt')
group = ""
file = open("../src/libneochat/emojis.h", "w")
# REUSE-IgnoreStart
file.write("// SPDX-FileCopyrightText: None\n")
file.write("// SPDX-License-Identifier: LGPL-2.0-or-later\n")
# REUSE-IgnoreEnd
file.write("// This file is auto-generated. All changes will be lost. See tools/update-emojis.py\n")
file.write("// clang-format off\n")
tones_file = open("../src/libneochat/emojitones_data.h", "w")
# REUSE-IgnoreStart
tones_file.write("// SPDX-FileCopyrightText: None\n")
tones_file.write("// SPDX-License-Identifier: LGPL-2.0-or-later\n")
# REUSE-IgnoreEnd
tones_file.write("// This file is auto-generated. All changes will be lost. See tools/update-emojis.py\n")
tones_file.write("// clang-format off\n")
for line in response.text.split("\n"):
if line.startswith("# group"):
raw_group = line.split(": ")[1]
if raw_group == "Activities":
group = "Activities"
elif raw_group == "Animals & Nature":
group = "Nature"
elif raw_group == "Component":
group = "Component"
elif raw_group == "Flags":
group = "Flags"
elif raw_group == "Food & Drink":
group = "Food"
elif raw_group == "Objects":
group = "Objects"
elif raw_group == "People & Body":
group = "People"
elif raw_group == "Smileys & Emotion":
group = "Smileys"
elif raw_group == "Symbols":
group = "Symbols"
elif raw_group == "Travel & Places":
group = "Travel"
else:
print("Unknown group:" + group)
group = ""
elif line.startswith("#") or line == "":
pass
else:
parts = line.split(";")
first = parts[0].strip()
escaped_sequence = escape_sequence(first, " ")
x = re.search(".*E[0-9]+.[0-9] ", parts[1])
description = parts[1].removeprefix(x.group())
shortcode = description
if "flag:" in description:
description = "Flag of " + description.split(": ")[1]
if "unqualified" in line or "minimally-qualified" in line:
continue
is_skin_tone = "skin tone" in description
if escaped_sequence in emoji_unicode_shortname_map:
shortcode = emoji_unicode_shortname_map[escaped_sequence]
emoji_args = 'u8"{0}", u8"{1}", u8"{2}"'.format(escaped_sequence, shortcode, description)
if is_skin_tone:
tones_file.write("{u8\"" + description.split(":")[0] + "\", " + emoji_args + "},\n")
continue
file.write("{EmojiModel::" + group + ", " + emoji_args + "},\n")
file.close()
tones_file.close()