From e1a9154555486ec574b10d0d23e043f748f61683 Mon Sep 17 00:00:00 2001
From: Carsten Varming <varming@amazon.com>
Date: Fri, 4 Jan 2019 17:02:56 -0500
Subject: [PATCH] Specify UTF-8 encoding in python build scripts

---
 scripts/mk_genfile_common.py | 7 ++++---
 scripts/mk_util.py           | 3 ++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/mk_genfile_common.py b/scripts/mk_genfile_common.py
index a65b41026..6354613fe 100644
--- a/scripts/mk_genfile_common.py
+++ b/scripts/mk_genfile_common.py
@@ -8,6 +8,7 @@
 # You should **not** import ``mk_util`` here
 # to avoid having this code depend on the
 # of the Python build system.
+import io
 import os
 import pprint
 import logging
@@ -622,7 +623,7 @@ def mk_gparams_register_modules_internal(h_files_full_path, path):
     reg_mod_descr_pat = re.compile('[ \t]*REG_MODULE_DESCRIPTION\(\'([^\']*)\', *\'([^\']*)\'\)')
     for h_file in sorted_headers_by_component(h_files_full_path):
         added_include = False
-        with open(h_file, 'r') as fin:
+        with io.open(h_file, encoding='utf-8', mode='r') as fin:
             for line in fin:
                 m = reg_pat.match(line)
                 if m:
@@ -696,7 +697,7 @@ def mk_install_tactic_cpp_internal(h_files_full_path, path):
     for h_file in sorted_headers_by_component(h_files_full_path):
         added_include = False
         try:
-            with open(h_file, 'r') as fin:
+            with io.open(h_file, encoding='utf-8', mode='r') as fin:
                 for line in fin:
                     if tactic_pat.match(line):
                         if not added_include:
@@ -764,7 +765,7 @@ def mk_mem_initializer_cpp_internal(h_files_full_path, path):
     finalizer_pat        = re.compile('[ \t]*ADD_FINALIZER\(\'([^\']*)\'\)')
     for h_file in sorted_headers_by_component(h_files_full_path):
         added_include = False
-        with open(h_file, 'r') as fin:
+        with io.open(h_file, encoding='utf-8', mode='r') as fin:
             for line in fin:
                 m = initializer_pat.match(line)
                 if m:
diff --git a/scripts/mk_util.py b/scripts/mk_util.py
index 9076b582f..6552a942f 100644
--- a/scripts/mk_util.py
+++ b/scripts/mk_util.py
@@ -6,6 +6,7 @@
 #
 # Author: Leonardo de Moura (leonardo)
 ############################################
+import io
 import sys
 import os
 import re
@@ -806,7 +807,7 @@ def extract_c_includes(fname):
     # We should generate and error for any occurrence of #include that does not match the previous pattern.
     non_std_inc_pat = re.compile(".*#include.*")
 
-    f = open(fname, 'r')
+    f = io.open(fname, encoding='utf-8', mode='r')
     linenum = 1
     for line in f:
         m1 = std_inc_pat.match(line)