From c9e32b8be95671340643c2e77a19af544af73266 Mon Sep 17 00:00:00 2001 From: Ryan Lovelett Date: Tue, 12 Jan 2016 17:07:27 -0500 Subject: [PATCH] [gyb] Force UTF-8 encoding when parsing templates on Linux Python 3 on Linux reads the system locale information to determine what it should use as the default encoding for strings read from files (this is different from OS X which is always UTF-8 by default [1]). Since all the Swift gyb templates are UTF-8 encoded there is effectively no reason to parse them as anything else. This patch forces the gyb template parser to read the template using UTF-8 encoding. It accounts for both reading and writing to a file as well as reading from stdin and writing to stdout. Two changes of note are that it now includes a __future__ import that should make Python 2 behave a little closer to Python 3 in terms of unicode support. Additionally Python 2 can no longer use cStringIO because it does not support unicode [2]. To test this patch I ran these commands before and after the patch. Note: that before the patch if the locale was set to something other than UTF-8, ASCII for instance, the Python 3 runs would fail. See [3] for example failure message. Without stdin/stdout: $ python2 utils/gyb -o Arrays.2.7.swift stdlib/public/core/Arrays.swift.gyb $ python3 utils/gyb -o Arrays.3.5.swift stdlib/public/core/Arrays.swift.gyb $ diff -u Arrays.2.7.swift Arrays.3.5.swift With stdin/stdout: $ cat stdlib/public/core/Arrays.swift.gyb | python2 utils/gyb > Arrays.2.7.stdin.stdout.swift $ cat stdlib/public/core/Arrays.swift.gyb | python3 utils/gyb > Arrays.3.5.stdin.stdout.swift $ diff -u Arrays.2.7.stdin.stdout.swift Arrays.3.5.stdin.stdout.swift [1] https://docs.python.org/3/howto/unicode.html#unicode-filenames [2] https://docs.python.org/2/library/stringio.html#cStringIO.StringIO [3] https://lists.swift.org/pipermail/swift-dev/Week-of-Mon-20160111/000780.html --- utils/gyb.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/utils/gyb.py b/utils/gyb.py index 1f3e6e7..b96c5a9 100755 --- a/utils/gyb.py +++ b/utils/gyb.py @@ -3,16 +3,18 @@ # this one's short). See -h output for instructions from __future__ import print_function +from __future__ import unicode_literals import re try: - from cStringIO import StringIO + from StringIO import StringIO except ImportError: from io import StringIO import tokenize import textwrap from bisect import bisect import os +from io import open def getLineStarts(s): """Return a list containing the start index of each line in s. @@ -371,7 +373,7 @@ class ParseContext: def __init__(self, filename, template=None): self.filename = os.path.abspath(filename) if template is None: - with open(filename) as f: + with open(filename, 'r', encoding='utf-8') as f: self.template = f.read() else: self.template = template @@ -1045,8 +1047,8 @@ def main(): help='''Bindings to be set in the template's execution context''' ) - parser.add_argument('file', type=argparse.FileType(), help='Path to GYB template file (defaults to stdin)', nargs='?', default=sys.stdin) - parser.add_argument('-o', dest='target', type=argparse.FileType('w'), help='Output file (defaults to stdout)', default=sys.stdout) + parser.add_argument('file', help='Path to GYB template file (defaults to stdin)', nargs='?', default=sys.stdin.fileno()) + parser.add_argument('-o', dest='target', help='Output file (defaults to stdout)', default=sys.stdout.fileno()) parser.add_argument('--test', action='store_true', default=False, help='Run a self-test') parser.add_argument('--verbose-test', action='store_true', default=False, help='Run a verbose self-test') parser.add_argument('--dump', action='store_true', default=False, help='Dump the parsed template to stdout') @@ -1061,14 +1063,14 @@ def main(): sys.exit(1) bindings = dict( x.split('=', 1) for x in args.defines ) - ast = parseTemplate(args.file.name, args.file.read()) + ast = parseTemplate(str(args.file), open(args.file, 'r', encoding='utf-8').read()) if args.dump: print(ast) # Allow the template to import .py files from its own directory - sys.path = [os.path.split(args.file.name)[0] or '.'] + sys.path - - args.target.write(executeTemplate(ast, args.line_directive, **bindings)) + sys.path = [os.path.split(str(args.file))[0] or '.'] + sys.path + + open(args.target, 'w+', encoding='utf-8').write(executeTemplate(ast, args.line_directive, **bindings)) if __name__ == '__main__': main() -- 2.7.0