-# Ensure that these are not unicode, which
-# can cause odd problems elsewhere. Note that
-# in python3, encode and decode do not return
-# strings, so we have to force the type.
-VERSION = V.encode ('ascii', 'ignore').decode ("utf-8")
-PROGRAM_VERSION = MAJOR.encode ('ascii', 'ignore').decode ("utf-8")
+
+def sanitize(s):
+ # round-trip to remove anything in the string that is not encodable in
+ # ASCII, yet still keep a real (utf8-encoded internally) string.
+ s = s.encode ('ascii', 'ignore').decode ("utf-8")
+ # In Python3, bytes is the class of binary content and encode() returns
+ # bytes to transform a string according to a text encoding; str is the
+ # class of normal strings (utf8-encoded internally) and decode() returns
+ # that type.
+ # Python 2 did not initially cater for encoding problems and can use str
+ # for both binary content and for (decoded) strings. The Unicode type was
+ # added to correspond to Python 3 str, and the Python 2 str type should
+ # only correspond to bytes. Alas, almost everything in the Python 2
+ # ecosystem has been written with str in mind and doesn't handle Unicode
+ # objects correctly. If Python 2 is in use, s will be a Unicode object and
+ # to avoid strange problems later we convert back to str, but in utf-8
+ # nonetheless.
+ if not isinstance(s, str):
+ s = s.encode("utf-8")
+ return s
+VERSION = sanitize(V)
+PROGRAM_VERSION = sanitize(MAJOR)
+del sanitize