unicode_utils.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. import unicodedata
  2. import sys
  3. import re
  4. from setuptools.extern import six
  5. # HFS Plus uses decomposed UTF-8
  6. def decompose(path):
  7. if isinstance(path, six.text_type):
  8. return unicodedata.normalize('NFD', path)
  9. try:
  10. path = path.decode('utf-8')
  11. path = unicodedata.normalize('NFD', path)
  12. path = path.encode('utf-8')
  13. except UnicodeError:
  14. pass # Not UTF-8
  15. return path
  16. def filesys_decode(path):
  17. """
  18. Ensure that the given path is decoded,
  19. NONE when no expected encoding works
  20. """
  21. if isinstance(path, six.text_type):
  22. return path
  23. fs_enc = sys.getfilesystemencoding() or 'utf-8'
  24. candidates = fs_enc, 'utf-8'
  25. for enc in candidates:
  26. try:
  27. return path.decode(enc)
  28. except UnicodeDecodeError:
  29. continue
  30. def try_encode(string, enc):
  31. "turn unicode encoding into a functional routine"
  32. try:
  33. return string.encode(enc)
  34. except UnicodeEncodeError:
  35. return None
  36. CODING_RE = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)')
  37. def detect_encoding(fp):
  38. first_line = fp.readline()
  39. fp.seek(0)
  40. m = CODING_RE.match(first_line)
  41. if m is None:
  42. return None
  43. return m.group(1).decode('ascii')