Files
stepanalyser/.venv/lib/python3.12/site-packages/ezdxf/lldxf/encoding.py
Christian Anetzberger a197de9456 initial
2026-01-22 20:23:51 +01:00

86 lines
2.6 KiB
Python

# Copyright (c) 2016-2023, Manfred Moitzi
# License: MIT License
import re
import codecs
import binascii
surrogate_escape = codecs.lookup_error("surrogateescape")
BACKSLASH_UNICODE = re.compile(r"(\\U\+[A-F0-9]{4})")
MIF_ENCODED = re.compile(r"(\\M\+[1-5][A-F0-9]{4})")
def dxf_backslash_replace(exc: Exception):
if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
s = ""
# mypy does not recognize properties: exc.start, exc.end, exc.object
for c in exc.object[exc.start : exc.end]:
x = ord(c)
if x <= 0xFF:
s += "\\x%02x" % x
elif 0xDC80 <= x <= 0xDCFF:
# Delegate surrogate handling:
return surrogate_escape(exc)
elif x <= 0xFFFF:
s += "\\U+%04x" % x
else:
s += "\\U+%08x" % x
return s, exc.end
else:
raise TypeError(f"Can't handle {exc.__class__.__name__}")
def encode(s: str, encoding="utf8") -> bytes:
"""Shortcut to use the correct error handler"""
return s.encode(encoding, errors="dxfreplace")
def _decode(s: str) -> str:
if s.startswith(r"\U+"):
return chr(int(s[3:], 16))
else:
return s
def has_dxf_unicode(s: str) -> bool:
"""Returns ``True`` if string `s` contains ``\\U+xxxx`` encoded characters."""
return bool(re.search(BACKSLASH_UNICODE, s))
def decode_dxf_unicode(s: str) -> str:
"""Decode ``\\U+xxxx`` encoded characters."""
return "".join(_decode(part) for part in re.split(BACKSLASH_UNICODE, s))
def has_mif_encoding(s: str) -> bool:
"""Returns ``True`` if string `s` contains MIF encoded (``\\M+cxxxx``) characters.
"""
return bool(re.search(MIF_ENCODED, s))
def decode_mif_to_unicode(s: str) -> str:
"""Decode MIF encoded characters ``\\M+cxxxx``."""
return "".join(_decode_mif(part) for part in re.split(MIF_ENCODED, s))
MIF_CODE_PAGE = {
# See https://docs.intellicad.org/files/oda/2021_11/oda_drawings_docs/frames.html?frmname=topic&frmfile=FontHandling.html
"1": "cp932", # Japanese (Shift-JIS)
"2": "cp950", # Traditional Chinese (Big 5)
"3": "cp949", # Wansung (KS C-5601-1987)
"4": "cp1391", # Johab (KS C-5601-1992)
"5": "cp936", # Simplified Chinese (GB 2312-80)
}
def _decode_mif(s: str) -> str:
if s.startswith(r"\M+"):
try:
code_page = MIF_CODE_PAGE[s[3]]
codec = codecs.lookup(code_page)
byte_data = binascii.unhexlify(s[4:])
return codec.decode(byte_data)[0]
except Exception:
pass
return s