File build-scripts-migration-to-python3.patch of Package mozc
From 38cd4a0fda5b963611a212b4debc0c9aa1c61538 Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu@debian.org>
Date: Sat, 23 May 2020 20:41:49 +0900
Subject: [PATCH] Change from python2 code to python3
The original patch was took from:
https://github.com/google/mozc/issues/462
https://github.com/google/mozc/files/4048079/mozc-2.23.2815.102-python-3.patch.txt
Signed-off-by: Nobuhiro Iwamatsu <iwamatsu@debian.org>
---
src/android/gen_emoji_data.py | 224 +++++++++---------
src/android/gen_emoticon_data.py | 23 +-
src/android/gen_mozc_drawable.py | 10 +-
src/android/gen_touch_event_stats.py | 8 +-
src/android/run_android_test.py | 17 +-
src/base/gen_character_set.py | 13 +-
src/base/gen_config_file_stream_data.py | 6 +-
src/build_mozc.py | 20 +-
src/build_tools/android_util.py | 6 +-
src/build_tools/binary_size_checker.py | 8 +-
src/build_tools/build_and_sign_pkg_mac.py | 4 +-
src/build_tools/build_breakpad.py | 4 +-
src/build_tools/build_diskimage_mac.py | 2 +-
src/build_tools/change_reference_mac.py | 4 +-
src/build_tools/code_generator_util.py | 49 ++--
src/build_tools/codesign_mac.py | 26 +-
src/build_tools/copy_dll_and_symbol.py | 4 +-
src/build_tools/copy_file.py | 2 +-
src/build_tools/copy_qt_frameworks_mac.py | 6 +-
src/build_tools/embed_file.py | 38 +--
src/build_tools/embed_pathname.py | 6 +-
src/build_tools/ensure_gyp_module_path.py | 12 +-
src/build_tools/gen_win32_resource_header.py | 2 +-
src/build_tools/mozc_version.py | 17 +-
src/build_tools/redirect.py | 7 +-
src/build_tools/run_after_chdir.py | 2 +-
.../serialized_string_array_builder.py | 6 +-
src/build_tools/test_tools/gtest_report.py | 10 +-
src/build_tools/test_tools/test_launcher.py | 5 +-
src/build_tools/tweak_data.py | 2 +-
src/build_tools/tweak_info_plist.py | 6 +-
src/build_tools/tweak_info_plist_strings.py | 4 +-
src/build_tools/tweak_macinstaller_script.py | 2 +-
src/build_tools/tweak_pkgproj.py | 4 +-
src/build_tools/util.py | 6 +-
src/build_tools/zlib_util.py | 4 +-
src/chrome/nacl/nacl_net_test_server.py | 49 ++--
src/chrome/nacl/pnacl_translate.py | 24 +-
src/chrome/nacl/run_nacl_test.py | 2 +-
src/client/gen_client_quality_test_data.py | 44 ++--
src/composer/internal/gen_typing_model.py | 19 +-
src/converter/gen_boundary_data.py | 16 +-
.../gen_quality_regression_test_data.py | 22 +-
src/converter/gen_segmenter_code.py | 25 +-
src/data/test/calculator/gen_test.py | 64 +++--
src/data_manager/gen_connection_data.py | 33 ++-
src/dictionary/gen_pos_map.py | 18 +-
src/dictionary/gen_pos_rewrite_rule.py | 19 +-
src/dictionary/gen_suffix_data.py | 8 +-
src/dictionary/gen_user_pos_data.py | 4 +-
src/dictionary/gen_zip_code_seed.py | 22 +-
src/dictionary/zip_code_util.py | 10 +-
src/gui/character_pad/data/gen_cp932_map.py | 26 +-
.../data/gen_local_character_map.py | 27 ++-
.../character_pad/data/gen_unicode_blocks.py | 15 +-
.../character_pad/data/gen_unicode_data.py | 21 +-
src/gui/character_pad/data/gen_unihan_data.py | 43 ++--
src/mac/generate_mapping.py | 36 +--
src/prediction/gen_zero_query_data.py | 50 ++--
src/prediction/gen_zero_query_number_data.py | 10 +-
src/prediction/gen_zero_query_util.py | 2 +-
src/rewriter/gen_counter_suffix_array.py | 2 +-
src/rewriter/gen_emoji_rewriter_data.py | 40 ++--
src/rewriter/gen_reading_correction_data.py | 4 +-
.../gen_single_kanji_rewriter_data.py | 8 +-
src/session/gen_session_stress_test_data.py | 18 +-
src/unix/ibus/gen_mozc_xml.py | 42 ++--
src/usage_stats/gen_stats_list.py | 21 +-
src/win32/installer/postbuilds_win.py | 16 +-
69 files changed, 685 insertions(+), 644 deletions(-)
diff --git a/src/android/gen_emoji_data.py b/src/android/gen_emoji_data.py
index 8a351272..385a030a 100644
--- a/src/android/gen_emoji_data.py
+++ b/src/android/gen_emoji_data.py
@@ -48,23 +48,23 @@ from build_tools import code_generator_util
# We assign 100,000 and greater values for carrier emoji, so "offset" should be
# less than 100,000.
_CATEGORY_MAP = {
- 'SMILEY_PEOPLE': {'category': 'FACE', 'offset': 0},
- 'ANIMALS_NATURE': {'category': 'FOOD', 'offset': 0},
- 'FOOD_DRINK': {'category': 'FOOD', 'offset': 10000},
- 'TRAVEL_PLACES': {'category': 'CITY', 'offset': 0},
- 'ACTIVITY': {'category': 'ACTIVITY', 'offset': 0},
- 'OBJECTS': {'category': 'ACTIVITY', 'offset': 10000},
- 'SYMBOLS': {'category': 'NATURE', 'offset': 0},
- 'FLAGS': {'category': 'NATURE', 'offset': 10000},
+ b'SMILEY_PEOPLE': {'category': b'FACE', 'offset': 0},
+ b'ANIMALS_NATURE': {'category': b'FOOD', 'offset': 0},
+ b'FOOD_DRINK': {'category': b'FOOD', 'offset': 10000},
+ b'TRAVEL_PLACES': {'category': b'CITY', 'offset': 0},
+ b'ACTIVITY': {'category': b'ACTIVITY', 'offset': 0},
+ b'OBJECTS': {'category': b'ACTIVITY', 'offset': 10000},
+ b'SYMBOLS': {'category': b'NATURE', 'offset': 0},
+ b'FLAGS': {'category': b'NATURE', 'offset': 10000},
}
-_CATEGORY_LIST = list(set(
- [entry['category'] for entry in _CATEGORY_MAP.itervalues()]))
+_CATEGORY_LIST = sorted(set(
+ [entry['category'] for entry in _CATEGORY_MAP.values()]))
def ReadData(stream):
category_map = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
- stream = code_generator_util.ParseColumnStream(stream, delimiter='\t')
+ stream = code_generator_util.ParseColumnStream(stream, delimiter=b'\t')
stream = code_generator_util.SelectColumn(stream, [0, 2, 8, 9, 10, 11, 12])
for (code, pua_code, japanese_name, docomo_name, softbank_name, kddi_name,
category_index) in stream:
@@ -76,19 +76,19 @@ def ReadData(stream):
sys.exit(-1)
if not code:
# Use dummy code point
- code = '0'
+ code = b'0'
if not pua_code:
# Use dummy code point
- pua_code = '0'
- if pua_code[0] == '>':
+ pua_code = b'0'
+ if pua_code[0:1] == b'>':
# Don't skip entires which has non-primary PUA codepoint since they also
# has unique Unicode codepoint.
# e.g. "BLACK SQUARE BUTTON" and "LARGE BLUE CIRCLE"
pua_code = pua_code[1:]
- code_values = [int(c, 16) for c in re.split(r' +', code.strip())]
+ code_values = [int(c, 16) for c in re.split(br' +', code.strip())]
pua_code_value = int(pua_code, 16)
- (category, index) = category_index.split('-')
+ (category, index) = category_index.split(b'-')
index = int(index) + _CATEGORY_MAP[category]['offset']
category = _CATEGORY_MAP[category]['category']
category_map[category].append(
@@ -98,92 +98,92 @@ def ReadData(stream):
_CHARACTER_NORMALIZE_MAP = {
- u'A': 'A',
- u'B': 'B',
- u'C': 'C',
- u'D': 'D',
- u'E': 'E',
- u'F': 'F',
- u'G': 'G',
- u'H': 'H',
- u'I': 'I',
- u'J': 'J',
- u'K': 'K',
- u'L': 'L',
- u'M': 'M',
- u'N': 'N',
- u'O': 'O',
- u'P': 'P',
- u'Q': 'Q',
- u'R': 'R',
- u'S': 'S',
- u'T': 'T',
- u'U': 'U',
- u'V': 'V',
- u'W': 'W',
- u'X': 'X',
- u'Y': 'Y',
- u'Z': 'Z',
-
- u'a': 'a',
- u'b': 'b',
- u'c': 'c',
- u'd': 'd',
- u'e': 'e',
- u'f': 'f',
- u'g': 'g',
- u'h': 'h',
- u'i': 'i',
- u'j': 'j',
- u'k': 'k',
- u'l': 'l',
- u'm': 'm',
- u'n': 'n',
- u'o': 'o',
- u'p': 'p',
- u'q': 'q',
- u'r': 'r',
- u's': 's',
- u't': 't',
- u'u': 'u',
- u'v': 'v',
- u'w': 'w',
- u'x': 'x',
- u'y': 'y',
- u'z': 'z',
-
- u'0': '0',
- u'1': '1',
- u'2': '2',
- u'3': '3',
- u'4': '4',
- u'5': '5',
- u'6': '6',
- u'7': '7',
- u'8': '8',
- u'9': '9',
-
- u'(': '(',
- u')': ')',
+ 'A': 'A',
+ 'B': 'B',
+ 'C': 'C',
+ 'D': 'D',
+ 'E': 'E',
+ 'F': 'F',
+ 'G': 'G',
+ 'H': 'H',
+ 'I': 'I',
+ 'J': 'J',
+ 'K': 'K',
+ 'L': 'L',
+ 'M': 'M',
+ 'N': 'N',
+ 'O': 'O',
+ 'P': 'P',
+ 'Q': 'Q',
+ 'R': 'R',
+ 'S': 'S',
+ 'T': 'T',
+ 'U': 'U',
+ 'V': 'V',
+ 'W': 'W',
+ 'X': 'X',
+ 'Y': 'Y',
+ 'Z': 'Z',
+
+ 'a': 'a',
+ 'b': 'b',
+ 'c': 'c',
+ 'd': 'd',
+ 'e': 'e',
+ 'f': 'f',
+ 'g': 'g',
+ 'h': 'h',
+ 'i': 'i',
+ 'j': 'j',
+ 'k': 'k',
+ 'l': 'l',
+ 'm': 'm',
+ 'n': 'n',
+ 'o': 'o',
+ 'p': 'p',
+ 'q': 'q',
+ 'r': 'r',
+ 's': 's',
+ 't': 't',
+ 'u': 'u',
+ 'v': 'v',
+ 'w': 'w',
+ 'x': 'x',
+ 'y': 'y',
+ 'z': 'z',
+
+ '0': '0',
+ '1': '1',
+ '2': '2',
+ '3': '3',
+ '4': '4',
+ '5': '5',
+ '6': '6',
+ '7': '7',
+ '8': '8',
+ '9': '9',
+
+ '(': '(',
+ ')': ')',
}
def PreprocessName(name):
if not name:
- return 'null'
- name = unicode(name, 'utf-8')
- name = u''.join(_CHARACTER_NORMALIZE_MAP.get(c, c) for c in name)
+ return b'null'
+ name = str(name, 'utf-8')
+ name = ''.join(_CHARACTER_NORMALIZE_MAP.get(c, c) for c in name)
name = name.encode('utf-8')
- name = name.replace('(', '\\n(')
- return '"%s"' % name
+ name = name.replace(b'(', b'\\n(')
+ return b'"%b"' % name
def OutputData(category_map, stream):
- for data_list in category_map.itervalues():
+ for data_list in category_map.values():
data_list.sort()
- stream.write('package org.mozc.android.inputmethod.japanese.emoji;\n'
- 'public class EmojiData {\n')
+ stream.write(b'package org.mozc.android.inputmethod.japanese.emoji;\n'
+ b'public class EmojiData {\n')
for category in _CATEGORY_LIST:
# The content of data list is
@@ -197,45 +197,45 @@ def OutputData(category_map, stream):
data_list = [c for c in category_map[category]
if c[3] or c[4] or c[5] or c[6]]
stream.write(
- ' public static final String[] %s_VALUES = new String[]{\n' %
+ b' public static final String[] %b_VALUES = new String[]{\n' %
category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
- stream.write(' %s,\n' % code_generator_util.ToJavaStringLiteral(codes))
- stream.write(' };\n')
+ stream.write(b' %b,\n' % code_generator_util.ToJavaStringLiteral(codes))
+ stream.write(b' };\n')
stream.write(
- ' public static final String[] %s_PUA_VALUES = new String[]{\n' %
+ b' public static final String[] %b_PUA_VALUES = new String[]{\n' %
category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
stream.write(
- ' %s,\n' % code_generator_util.ToJavaStringLiteral(pua_code))
- stream.write(' };\n')
+ b' %b,\n' % code_generator_util.ToJavaStringLiteral(pua_code))
+ stream.write(b' };\n')
stream.write(
- ' public static final String[] UNICODE_%s_NAME = {\n' % category)
+ b' public static final String[] UNICODE_%b_NAME = {\n' % category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
- stream.write(' %s, \n' % PreprocessName(japanese))
- stream.write(' };\n')
+ stream.write(b' %b, \n' % PreprocessName(japanese))
+ stream.write(b' };\n')
stream.write(
- ' public static final String[] DOCOMO_%s_NAME = {\n' % category)
+ b' public static final String[] DOCOMO_%b_NAME = {\n' % category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
- stream.write(' %s, \n' % PreprocessName(docomo))
- stream.write(' };\n')
+ stream.write(b' %b, \n' % PreprocessName(docomo))
+ stream.write(b' };\n')
stream.write(
- ' public static final String[] SOFTBANK_%s_NAME = {\n' % category)
+ b' public static final String[] SOFTBANK_%b_NAME = {\n' % category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
- stream.write(' %s, \n' % PreprocessName(softbank))
- stream.write(' };\n')
+ stream.write(b' %b, \n' % PreprocessName(softbank))
+ stream.write(b' };\n')
stream.write(
- ' public static final String[] KDDI_%s_NAME = {\n' % category)
+ b' public static final String[] KDDI_%b_NAME = {\n' % category)
for _, codes, pua_code, japanese, docomo, softbank, kddi in data_list:
- stream.write(' %s, \n' % PreprocessName(kddi))
- stream.write(' };\n')
+ stream.write(b' %b, \n' % PreprocessName(kddi))
+ stream.write(b' };\n')
- stream.write('}\n')
+ stream.write(b'}\n')
def ParseOptions():
@@ -248,10 +248,10 @@ def ParseOptions():
def main():
options = ParseOptions()
- with open(options.emoji_data) as stream:
+ with open(options.emoji_data, 'rb') as stream:
emoji_data = ReadData(stream)
- with open(options.output, 'w') as stream:
+ with open(options.output, 'wb') as stream:
OutputData(emoji_data, stream)
diff --git a/src/android/gen_emoticon_data.py b/src/android/gen_emoticon_data.py
index 5e6878b6..310e5e8e 100644
--- a/src/android/gen_emoticon_data.py
+++ b/src/android/gen_emoticon_data.py
@@ -38,7 +38,6 @@ The first line of .tsv file is ignored because it is used as label.
__author__ = "matsuzakit"
-import io
import optparse
import sys
import unicodedata
@@ -58,7 +57,8 @@ def ParseSymbolFile(file_name, value_column, category_column,
"""Parses symbol file and returns tag->symbols dictionary."""
tag2symbol = {}
is_first_line = True
- for line in io.open(file_name, encoding='utf-8'):
+ file = open(file_name, encoding='utf-8')
+ for line in file:
line_parts = line.rstrip().split('\t')
if is_first_line:
# Skip the first line, which is used as label.
@@ -77,6 +77,7 @@ def ParseSymbolFile(file_name, value_column, category_column,
tag2symbol.setdefault(tag, []).append(symbol)
if tag in expand_variant_tags and symbol != normalized:
tag2symbol[tag].append(normalized)
+ file.close()
return tag2symbol
@@ -110,7 +111,7 @@ def GetStringArrayOfSymbols(tag_name, original_symbols, ordering_rule_list):
else:
symbols = original_symbols
- _ESCAPE = (u'"', u'\\')
+ _ESCAPE = ('"', '\\')
for symbol in symbols:
# Escape characters (defined above) have to be escaped by backslashes.
# e.g.
@@ -125,7 +126,7 @@ def GetStringArrayOfSymbols(tag_name, original_symbols, ordering_rule_list):
# which include '\u0022' will terminate here.
# They are not what we want so before such characters we place '\'
# in order to escape them.
- line = ['%s\\u%04x' % ('' if c not in _ESCAPE else '\u005c', ord(c))
+ line = ['%s\\u%04x' % ('' if c not in _ESCAPE else '\\u005c', ord(c))
for c in symbol]
# The white space is quick fix for the backslash at the tail of symbol.
lines.append(' "%s", // %s ' % (''.join(line), symbol))
@@ -135,8 +136,8 @@ def GetStringArrayOfSymbols(tag_name, original_symbols, ordering_rule_list):
def WriteOut(output, tag2symbol, class_name, ordering_rule_list):
body = [GetStringArrayOfSymbols(tag, symbols, ordering_rule_list)
- for tag, symbols in tag2symbol.iteritems()]
- with io.open(output, 'w', encoding='utf-8') as out_file:
+ for tag, symbols in sorted(tag2symbol.items())]
+ with open(output, 'w', encoding='utf-8') as out_file:
out_file.write(TEMPLATE_CLASS % (PACKAGE_NAME, class_name, '\n'.join(body)))
@@ -162,12 +163,14 @@ def ParseOption():
def CreateOrderingRuleList(file_name):
ordering_rule_list = []
- for line in io.open(file_name, encoding='utf-8'):
+ file = open(file_name, encoding='utf-8')
+ for line in file:
# Do not forget such line of which content is ' '.
# Such line has to be appended into the list.
- if not line.startswith(u'# ') and not line.startswith(u'\n'):
- value = line.rstrip(u'\r\n')
+ if not line.startswith('# ') and not line.startswith('\n'):
+ value = line.rstrip('\r\n')
ordering_rule_list.append(value)
+ file.close()
return ordering_rule_list
@@ -176,7 +179,7 @@ def main():
if not (options.input and options.output and options.class_name and
options.value_column is not None and
options.category_column is not None):
- print 'Some options cannot be omitted. See --help.'
+ print('Some options cannot be omitted. See --help.')
sys.exit(1)
tag2symbol = ParseSymbolFile(options.input,
options.value_column,
diff --git a/src/android/gen_mozc_drawable.py b/src/android/gen_mozc_drawable.py
index f3e4cbb4..0a738f95 100644
--- a/src/android/gen_mozc_drawable.py
+++ b/src/android/gen_mozc_drawable.py
@@ -41,14 +41,14 @@ and it'd cause an crash error, unfortunately.
__author__ = "hidehiko"
-import cStringIO as StringIO
+import io
import logging
import optparse
import os
import re
import struct
import sys
-from xml.etree import cElementTree as ElementTree
+from xml.etree import ElementTree
from build_tools import util
@@ -289,7 +289,7 @@ class MozcDrawableConverter(object):
def _ConsumeFloatList(self, s, num):
"""Parses num floating values from s."""
result = []
- for _ in xrange(num):
+ for _ in range(num):
value, s = self._ConsumeFloat(s)
result.append(value)
return result, s
@@ -1035,12 +1035,12 @@ class MozcDrawableConverter(object):
# Interface for drawable conversion.
def ConvertPictureDrawable(self, path):
- output = _OutputStream(StringIO.StringIO())
+ output = _OutputStream(io.BytesIO())
self._ConvertPictureDrawableInternal(ElementTree.parse(path), output)
return output.output.getvalue()
def ConvertStateListDrawable(self, drawable_source_list):
- output = _OutputStream(StringIO.StringIO())
+ output = _OutputStream(io.BytesIO())
output.WriteByte(DRAWABLE_STATE_LIST)
output.WriteByte(len(drawable_source_list))
for (state_list, path) in drawable_source_list:
diff --git a/src/android/gen_touch_event_stats.py b/src/android/gen_touch_event_stats.py
index 4916666f..9edfc31b 100644
--- a/src/android/gen_touch_event_stats.py
+++ b/src/android/gen_touch_event_stats.py
@@ -43,7 +43,7 @@ import csv
import optparse
import os
import struct
-import urllib
+import urllib.parse
def ReadCollectedKeyboards(stream):
@@ -112,7 +112,7 @@ def WriteKeyboardData(keyboard_value, stream):
# c.f. usage_stats/usage_stats_uploader.cc
keys = ('sxa', 'sya', 'sxv', 'syv', 'dxa', 'dya', 'dxv', 'dyv')
stream.write(struct.pack('>i', len(keyboard_value)))
- for source_id, source_value in keyboard_value.iteritems():
+ for source_id, source_value in sorted(keyboard_value.items()):
stream.write(struct.pack('>i', source_id))
# Note that we are calculating
# "Average of average" and "Average of variance".
@@ -124,10 +124,10 @@ def WriteKeyboardData(keyboard_value, stream):
def WriteData(stats, output_dir):
- for base_name_orientation in stats.iterkeys():
+ for base_name_orientation in stats.keys():
with open(os.path.join(output_dir,
'%s_%s.touch_stats' % (
- urllib.unquote(base_name_orientation[0]),
+ urllib.parse.unquote(base_name_orientation[0]),
base_name_orientation[1])),
'wb') as stream:
WriteKeyboardData(stats[base_name_orientation], stream)
diff --git a/src/android/run_android_test.py b/src/android/run_android_test.py
index ebb98638..79865a51 100644
--- a/src/android/run_android_test.py
+++ b/src/android/run_android_test.py
@@ -44,8 +44,9 @@ import multiprocessing
import optparse
import os
import subprocess
+import sys
import time
-from xml.etree import cElementTree as ElementTree
+from xml.etree import ElementTree
from build_tools import android_util
from build_tools.test_tools import gtest_report
@@ -142,9 +143,9 @@ def AppendPrefixToSuiteName(in_file_name, out_file_name, prefix):
AppendPrefix(root)
for elem in root.findall('testsuite'):
AppendPrefix(elem)
- with open(out_file_name, 'w') as f:
+ with open(out_file_name, 'wb') as f:
# Note that ElementTree of 2.6 doesn't write XML declaration.
- f.write('<?xml version="1.0" encoding="utf-8"?>\n')
+ f.write(b'<?xml version="1.0" encoding="utf-8"?>\n')
f.write(ElementTree.tostring(root, 'utf-8'))
@@ -156,7 +157,7 @@ class AndroidDevice(android_util.AndroidDevice):
"""Wait until SD card is mounted."""
retry = 10
sleep = 30
- for _ in xrange(retry):
+ for _ in range(retry):
if self._RunCommand('mount').find('/sdcard') != -1:
self.GetLogger().info('SD card has been mounted.')
return
@@ -458,7 +459,7 @@ def main():
if not options.android_home:
logging.error('--android_home option must be specified.')
- os.exit(1)
+ sys.exit(1)
if options.run_native_test:
binaries = FindTestBinaries(options.test_bin_dir)
@@ -501,11 +502,11 @@ def main():
# result.get() blocks until the test terminates.
error_messages = [result.get() for result in results if result.get()]
if error_messages:
- print '[FAIL] Native tests result : Test failures are found;'
+ print('[FAIL] Native tests result : Test failures are found;')
for message in error_messages:
- print message
+ print(message)
else:
- print '[ OK ] Native tests result : Tests scceeded.'
+ print('[ OK ] Native tests result : Tests scceeded.')
if __name__ == '__main__':
diff --git a/src/base/gen_character_set.py b/src/base/gen_character_set.py
index bb39c472..7ac5b522 100644
--- a/src/base/gen_character_set.py
+++ b/src/base/gen_character_set.py
@@ -33,7 +33,6 @@ __author__ = "taku"
import itertools
import optparse
import re
-import string
import sys
@@ -89,7 +88,8 @@ class CodePointCategorizer(object):
@staticmethod
def _LoadTable(filename, column_index, pattern, validater):
result = set()
- for line in open(filename):
+ fh = open(filename)
+ for line in fh:
if line.startswith('#'):
# Skip a comment line.
continue
@@ -100,6 +100,7 @@ class CodePointCategorizer(object):
ucs = int(match.group(1), 16)
if validater(ucs):
result.add(ucs)
+ fh.close()
return result
@@ -250,7 +251,7 @@ def GenerateCategoryBitmap(category_list, name):
# (at most) four code points.
bit_list = []
for _, group in itertools.groupby(enumerate(category_list),
- lambda (codepoint, _): codepoint / 4):
+ lambda x: x[0] // 4):
# Fill bits from LSB to MSB for each group.
bits = 0
for index, (_, category) in enumerate(group):
@@ -263,7 +264,7 @@ def GenerateCategoryBitmap(category_list, name):
# Output the content. Each line would have (at most) 16 bytes.
for _, group in itertools.groupby(enumerate(bit_list),
- lambda (index, _): index / 16):
+ lambda x: x[0] // 16):
line = [' \"']
for _, bits in group:
line.append('\\x%02X' % bits)
@@ -386,7 +387,7 @@ def GenerateGetCharacterSet(category_list, bitmap_name, bitmap_size):
# Bitmap lookup.
# TODO(hidehiko): the bitmap has two huge 0-bits ranges. Reduce them.
category_map = [
- (bits, category) for category, bits in CATEGORY_BITMAP.iteritems()]
+ (bits, category) for category, bits in CATEGORY_BITMAP.items()]
category_map.sort()
lines.extend([
@@ -451,7 +452,7 @@ def main():
options.jisx0213file)
category_list = [
categorizer.GetCategory(codepoint)
- for codepoint in xrange(categorizer.MaxCodePoint() + 1)]
+ for codepoint in range(categorizer.MaxCodePoint() + 1)]
generated_character_set_header = GenerateCharacterSetHeader(category_list)
# Write the result.
diff --git a/src/base/gen_config_file_stream_data.py b/src/base/gen_config_file_stream_data.py
index 2a7af1a2..53832d18 100644
--- a/src/base/gen_config_file_stream_data.py
+++ b/src/base/gen_config_file_stream_data.py
@@ -58,7 +58,7 @@ def GenerateFileData(path):
result = []
result.append(' { "%s", "' % os.path.basename(path))
with open(path, 'rb') as stream:
- result.extend(r'\x%02X' % ord(byte) for byte in stream.read())
+ result.extend(r'\x%02X' % byte for byte in stream.read())
result.append('", %d }' % os.path.getsize(path))
return ''.join(result)
@@ -93,8 +93,8 @@ def OutputConfigFileStreamData(path_list, output):
def main():
(options, args) = ParseOptions()
if not options.output:
- print >>sys.stderr, (
- 'usage: gen_config_file_stream_data.py --output=filepath input ...')
+ print('usage: gen_config_file_stream_data.py --output=filepath input ...',
+ file=sys.stderr)
sys.exit(2)
with open(options.output, 'w') as output:
diff --git a/src/build_mozc.py b/src/build_mozc.py
index 46fa0827..89d9c939 100644
--- a/src/build_mozc.py
+++ b/src/build_mozc.py
@@ -943,7 +943,7 @@ def RunTests(target_platform, configuration, parallel_num):
logging.info('running %s...', binary)
try:
test_function(binary, gtest_report_dir, options)
- except RunOrDieError, e:
+ except RunOrDieError as e:
logging.error(e)
failed_tests.append(binary)
else:
@@ -1082,7 +1082,7 @@ def RunTestsMain(options, args):
# and '-c' and 'Release' are build options.
targets = []
build_options = []
- for i in xrange(len(args)):
+ for i in range(len(args)):
if args[i].startswith('-'):
# starting with build options
build_options = args[i:]
@@ -1190,14 +1190,14 @@ def CleanMain(options, unused_args):
def ShowHelpAndExit():
"""Shows the help message."""
- print 'Usage: build_mozc.py COMMAND [ARGS]'
- print 'Commands: '
- print ' gyp Generate project files.'
- print ' build Build the specified target.'
- print ' runtests Build all tests and run them.'
- print ' clean Clean all the build files and directories.'
- print ''
- print 'See also the comment in the script for typical usage.'
+ print('Usage: build_mozc.py COMMAND [ARGS]')
+ print('Commands: ')
+ print(' gyp Generate project files.')
+ print(' build Build the specified target.')
+ print(' runtests Build all tests and run them.')
+ print(' clean Clean all the build files and directories.')
+ print('')
+ print('See also the comment in the script for typical usage.')
sys.exit(1)
diff --git a/src/build_tools/android_util.py b/src/build_tools/android_util.py
index 5ea3da31..447ee781 100644
--- a/src/build_tools/android_util.py
+++ b/src/build_tools/android_util.py
@@ -548,7 +548,7 @@ def GetAvailableEmulatorPorts(android_home):
(devices_result, _) = process.communicate()
used_ports = set(int(port) for port
in re.findall(r'emulator-(\d+)', devices_result))
- return [port for port in xrange(5554, 5586, 2) if port not in used_ports]
+ return [port for port in range(5554, 5586, 2) if port not in used_ports]
def SetUpTestingSdkHomeDirectory(dest_android_sdk_home,
@@ -575,7 +575,7 @@ def SetUpTestingSdkHomeDirectory(dest_android_sdk_home,
'create', 'avd',
'--force',
'--sdcard', '512M',]
- for key, value in options.iteritems():
+ for key, value in options.items():
args.extend([key, value])
env = {'ANDROID_SDK_HOME': os.path.abspath(dest_android_sdk_home)}
logging.info('Creating AVD: %s', args)
@@ -615,7 +615,7 @@ def GetAvdProperties(android_sdk_home, avd_name, my_open=open):
def main():
for arg in sys.argv[1:]:
for item in sorted(GetApkProperties(arg).items()):
- print '%s: %s' % item
+ print('%s: %s' % item)
if __name__ == '__main__':
diff --git a/src/build_tools/binary_size_checker.py b/src/build_tools/binary_size_checker.py
index e53dc177..0f5e8b25 100644
--- a/src/build_tools/binary_size_checker.py
+++ b/src/build_tools/binary_size_checker.py
@@ -70,12 +70,12 @@ def CheckFileSize(filename):
actual_size = os.stat(filename).st_size
expected_size = EXPECTED_MAXIMUM_SIZES[basename]
if actual_size < expected_size * 1024 * 1024:
- print 'Pass: %s (size: %d) is smaller than expected (%d MB)' % (
- filename, actual_size, expected_size)
+ print('Pass: %s (size: %d) is smaller than expected (%d MB)' % (
+ filename, actual_size, expected_size))
return True
else:
- print 'WARNING: %s (size: %d) is larger than expected (%d MB)' % (
- filename, actual_size, expected_size)
+ print('WARNING: %s (size: %d) is larger than expected (%d MB)' % (
+ filename, actual_size, expected_size))
return False
diff --git a/src/build_tools/build_and_sign_pkg_mac.py b/src/build_tools/build_and_sign_pkg_mac.py
index 59a1f70f..1cced535 100644
--- a/src/build_tools/build_and_sign_pkg_mac.py
+++ b/src/build_tools/build_and_sign_pkg_mac.py
@@ -44,8 +44,8 @@ import os
import shutil
import sys
-from util import PrintErrorAndExit
-from util import RunOrDie
+from .util import PrintErrorAndExit
+from .util import RunOrDie
def ParseOption():
diff --git a/src/build_tools/build_breakpad.py b/src/build_tools/build_breakpad.py
index 291c8ae0..92fee9d7 100644
--- a/src/build_tools/build_breakpad.py
+++ b/src/build_tools/build_breakpad.py
@@ -54,9 +54,9 @@ def ProcessCall(command):
try:
subprocess.check_output(command)
except subprocess.CalledProcessError as e:
- print e.output
+ print(e.output)
sys.exit(e.returncode)
- print 'Done: %s' % ' '.join(command)
+ print('Done: %s' % ' '.join(command))
def Xcodebuild(projdir, target, arch, sdk, outdir):
diff --git a/src/build_tools/build_diskimage_mac.py b/src/build_tools/build_diskimage_mac.py
index f31c490b..8fa56aea 100644
--- a/src/build_tools/build_diskimage_mac.py
+++ b/src/build_tools/build_diskimage_mac.py
@@ -90,7 +90,7 @@ def main():
# setup volume directory
temp_dir = tempfile.mkdtemp()
CopyFile(path.join(build_dir, ".keystone_install"), temp_dir)
- os.chmod(path.join(temp_dir, ".keystone_install"), 0755) # rwxr-xr-x
+ os.chmod(path.join(temp_dir, ".keystone_install"), 0o755) # rwxr-xr-x
for a in args:
CopyFile(path.join(build_dir, a), temp_dir)
diff --git a/src/build_tools/change_reference_mac.py b/src/build_tools/change_reference_mac.py
index de11b7bd..6e8b1735 100644
--- a/src/build_tools/change_reference_mac.py
+++ b/src/build_tools/change_reference_mac.py
@@ -41,8 +41,8 @@ __author__ = "horo"
import optparse
import os
-from util import PrintErrorAndExit
-from util import RunOrDie
+from .util import PrintErrorAndExit
+from .util import RunOrDie
def ParseOption():
diff --git a/src/build_tools/code_generator_util.py b/src/build_tools/code_generator_util.py
index d489a7c7..80254e74 100644
--- a/src/build_tools/code_generator_util.py
+++ b/src/build_tools/code_generator_util.py
@@ -33,27 +33,26 @@
__author__ = "hidehiko"
import struct
-import types
def ToCppStringLiteral(s):
"""Returns C-style string literal, or NULL if given s is None."""
if s is None:
- return 'NULL'
+ return b'NULL'
- if all(0x20 <= ord(c) <= 0x7E for c in s):
+ if all(0x20 <= c <= 0x7E for c in s):
# All characters are in ascii code.
- return '"%s"' % s.replace('\\', r'\\').replace('"', r'\"')
+ return b'"%b"' % s.replace(b'\\', br'\\').replace(b'"', br'\"')
else:
# One or more characters are non-ascii.
- return '"%s"' % ''.join(r'\x%02X' % ord(c) for c in s)
+ return b'"%b"' % b''.join(br'\x%02X' % c for c in s)
def FormatWithCppEscape(format_text, *args):
"""Returns a string filling format with args."""
literal_list = []
for arg in args:
- if isinstance(arg, (types.StringType, types.NoneType)):
+ if isinstance(arg, (bytes, type(None))):
arg = ToCppStringLiteral(arg)
literal_list.append(arg)
@@ -95,7 +94,7 @@ def WriteCppDataArray(data, variable_name, target_compiler, stream):
if target_compiler and target_compiler.startswith('msvs'):
stream.write('const uint64 k%s_data_wordtype[] = {\n' % variable_name)
- for word_index in xrange(0, len(data), 8):
+ for word_index in range(0, len(data), 8):
word_chunk = data[word_index:word_index + 8].ljust(8, '\x00')
stream.write('0x%016X, ' % struct.unpack('<Q', word_chunk))
if (word_index / 8) % 4 == 3:
@@ -111,7 +110,7 @@ def WriteCppDataArray(data, variable_name, target_compiler, stream):
stream.write('const char k%s_data[] =\n' % variable_name)
# Output 16bytes per line.
chunk_size = 16
- for index in xrange(0, len(data), chunk_size):
+ for index in range(0, len(data), chunk_size):
chunk = data[index:index + chunk_size]
stream.write('"')
stream.writelines(r'\x%02X' % ord(c) for c in chunk)
@@ -126,36 +125,50 @@ def ToJavaStringLiteral(codepoint_list):
if type(codepoint_list) is int:
codepoint_list = (codepoint_list,)
if codepoint_list is None or len(codepoint_list) == 0:
- return 'null'
- result = r'"'
+ return b'null'
+ result = b'"'
for codepoint in codepoint_list:
- utf16_string = unichr(codepoint).encode('utf-16be')
+ utf16_string = chr(codepoint).encode('utf-16be')
if len(utf16_string) == 2:
(u0, l0) = utf16_string
- result += r'\u%02X%02X' % (ord(u0), ord(l0))
+ result += br'\u%02X%02X' % (u0, l0)
else:
(u0, l0, u1, l1) = utf16_string
- result += r'\u%02X%02X\u%02X%02X' % (ord(u0), ord(l0), ord(u1), ord(l1))
- result += r'"'
+ result += br'\u%02X%02X\u%02X%02X' % (u0, l0, u1, l1)
+ result += b'"'
return result
def SkipLineComment(stream, comment_prefix='#'):
"""Skips line comments from stream."""
for line in stream:
+ if isinstance(line, bytes):
+ if isinstance(comment_prefix, str):
+ comment_prefix = comment_prefix.encode('utf-8')
+ line_ending = b'\n'
+ else:
+ line_ending = '\n'
stripped_line = line.strip()
if stripped_line and not stripped_line.startswith(comment_prefix):
- yield line.rstrip('\n')
+ yield line.rstrip(line_ending)
def ParseColumnStream(stream, num_column=None, delimiter=None):
"""Returns parsed columns read from stream."""
if num_column is None:
for line in stream:
- yield line.rstrip('\n').split(delimiter)
+ if isinstance(line, bytes):
+ line_ending = b'\n'
+ else:
+ line_ending = '\n'
+ yield line.rstrip(line_ending).split(delimiter)
else:
for line in stream:
- yield line.rstrip('\n').split(delimiter)[:num_column]
+ if isinstance(line, bytes):
+ line_ending = b'\n'
+ else:
+ line_ending = '\n'
+ yield line.rstrip(line_ending).split(delimiter)[:num_column]
def SelectColumn(stream, column_index):
@@ -172,5 +185,5 @@ def SplitChunk(iterable, n):
grouper extends the last chunk to make it an n-element chunk by adding
appropriate value, but this returns truncated chunk.
"""
- for index in xrange(0, len(iterable), n):
+ for index in range(0, len(iterable), n):
yield iterable[index:index + n]
diff --git a/src/build_tools/codesign_mac.py b/src/build_tools/codesign_mac.py
index 9c1ba137..a558e048 100644
--- a/src/build_tools/codesign_mac.py
+++ b/src/build_tools/codesign_mac.py
@@ -46,17 +46,17 @@ import sys
def RunOrDie(command):
"""Run the command, or die if it failed."""
- print "Running: " + command
+ print("Running: " + command)
try:
output = subprocess.check_output(command, shell=True)
- print >> sys.stderr, "=========="
- print >> sys.stderr, "COMMAND: " + command
- print >> sys.stderr, output
+ print("==========", file=sys.stderr)
+ print("COMMAND: " + command, file=sys.stderr)
+ print(output, file=sys.stderr)
except subprocess.CalledProcessError as e:
- print >> sys.stderr, "=========="
- print >> sys.stderr, "ERROR: " + command
- print >> sys.stderr, e.output
- print >> sys.stderr, "=========="
+ print("==========", file=sys.stderr)
+ print("ERROR: " + command, file=sys.stderr)
+ print(e.output, file=sys.stderr)
+ print("==========", file=sys.stderr)
sys.exit(1)
@@ -119,18 +119,18 @@ def ParseOption():
(options, unused_args) = parser.parse_args()
if not options.target:
- print "Error: --target should be specified."
- print parser.print_help()
+ print("Error: --target should be specified.")
+ print(parser.print_help())
sys.exit(1)
return options
def DumpEnviron():
- print "=== os.environ ==="
+ print("=== os.environ ===")
for key in sorted(os.environ):
- print "%s = %s" % (key, os.getenv(key))
- print "=================="
+ print("%s = %s" % (key, os.getenv(key)))
+ print("==================")
def main():
diff --git a/src/build_tools/copy_dll_and_symbol.py b/src/build_tools/copy_dll_and_symbol.py
index 5bc0fe23..72f2788b 100644
--- a/src/build_tools/copy_dll_and_symbol.py
+++ b/src/build_tools/copy_dll_and_symbol.py
@@ -38,7 +38,7 @@ import optparse
import os
import shutil
-from util import PrintErrorAndExit
+from .util import PrintErrorAndExit
def ParseOption():
"""Parse command line options."""
@@ -98,7 +98,7 @@ def DeployMain(full_filename, src_paths, target_absdir):
if _GetLastModifiedTime(src) <= target_file_mtime:
# Older file found. Ignore.
continue
- print 'Copying %s to %s' % (src, target_file_abspath)
+ print('Copying %s to %s' % (src, target_file_abspath))
shutil.copy2(src, target_file_abspath)
break
diff --git a/src/build_tools/copy_file.py b/src/build_tools/copy_file.py
index 92b5ef3e..7268fd7b 100644
--- a/src/build_tools/copy_file.py
+++ b/src/build_tools/copy_file.py
@@ -52,7 +52,7 @@ def _ErrorExit(message):
Args:
message: The error message to be printed to stderr.
"""
- print >>sys.stderr, message
+ print(message, file=sys.stderr)
sys.exit(1)
diff --git a/src/build_tools/copy_qt_frameworks_mac.py b/src/build_tools/copy_qt_frameworks_mac.py
index 2f7be942..8360e81e 100644
--- a/src/build_tools/copy_qt_frameworks_mac.py
+++ b/src/build_tools/copy_qt_frameworks_mac.py
@@ -41,9 +41,9 @@ __author__ = "horo"
import optparse
import os
-from copy_file import CopyFiles
-from util import PrintErrorAndExit
-from util import RunOrDie
+from .copy_file import CopyFiles
+from .util import PrintErrorAndExit
+from .util import RunOrDie
def ParseOption():
diff --git a/src/build_tools/embed_file.py b/src/build_tools/embed_file.py
index 743f4f0d..9461d74b 100644
--- a/src/build_tools/embed_file.py
+++ b/src/build_tools/embed_file.py
@@ -46,10 +46,10 @@ def _ParseOption():
def _FormatAsUint64LittleEndian(s):
"""Formats a string as uint64 value in little endian order."""
- for _ in xrange(len(s), 8):
- s += '\0'
+ for _ in range(len(s), 8):
+ s += b'\0'
s = s[::-1] # Reverse the string
- return '0x%s' % binascii.b2a_hex(s)
+ return b'0x%b' % binascii.b2a_hex(s)
def main():
@@ -57,30 +57,30 @@ def main():
with open(opts.input, 'rb') as infile:
with open(opts.output, 'wb') as outfile:
outfile.write(
- '#ifdef MOZC_EMBEDDED_FILE_%(name)s\n'
- '#error "%(name)s was already included or defined elsewhere"\n'
- '#else\n'
- '#define MOZC_EMBEDDED_FILE_%(name)s\n'
- 'const uint64 %(name)s_data[] = {\n'
- % {'name': opts.name})
+ b'#ifdef MOZC_EMBEDDED_FILE_%(name)b\n'
+ b'#error "%(name)b was already included or defined elsewhere"\n'
+ b'#else\n'
+ b'#define MOZC_EMBEDDED_FILE_%(name)b\n'
+ b'const uint64 %(name)b_data[] = {\n'
+ % {b'name': opts.name.encode('utf-8')})
while True:
chunk = infile.read(8)
if not chunk:
break
- outfile.write(' ')
+ outfile.write(b' ')
outfile.write(_FormatAsUint64LittleEndian(chunk))
- outfile.write(',\n')
+ outfile.write(b',\n')
outfile.write(
- '};\n'
- 'const EmbeddedFile %(name)s = {\n'
- ' %(name)s_data,\n'
- ' %(size)d,\n'
- '};\n'
- '#endif // MOZC_EMBEDDED_FILE_%(name)s\n'
- % {'name': opts.name,
- 'size': os.stat(opts.input).st_size})
+ b'};\n'
+ b'const EmbeddedFile %(name)b = {\n'
+ b' %(name)b_data,\n'
+ b' %(size)d,\n'
+ b'};\n'
+ b'#endif // MOZC_EMBEDDED_FILE_%(name)b\n'
+ % {b'name': opts.name.encode('utf-8'),
+ b'size': os.stat(opts.input).st_size})
if __name__ == '__main__':
diff --git a/src/build_tools/embed_pathname.py b/src/build_tools/embed_pathname.py
index d7424f38..888e704f 100644
--- a/src/build_tools/embed_pathname.py
+++ b/src/build_tools/embed_pathname.py
@@ -28,7 +28,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""A script to embed the given (relative) path name to C/C++ characters array.
+r"""A script to embed the given (relative) path name to C/C++ characters array.
Example:
./embed_pathname.py --path_to_be_embedded=d:\data\mozc
@@ -53,7 +53,7 @@ def ParseOption():
(options, unused_args) = parser.parse_args()
if not all(vars(options).values()):
- print parser.print_help()
+ print(parser.print_help())
sys.exit(1)
return options
@@ -63,7 +63,7 @@ def main():
opt = ParseOption()
path = os.path.abspath(opt.path_to_be_embedded)
# TODO(yukawa): Consider the case of non-ASCII characters.
- escaped_path = path.encode('string-escape')
+ escaped_path = path.replace('\\', '\\\\')
with open(opt.output, 'w') as output_file:
output_file.write(
'const char %s[] = "%s";\n' % (opt.constant_name, escaped_path))
diff --git a/src/build_tools/ensure_gyp_module_path.py b/src/build_tools/ensure_gyp_module_path.py
index 2ba74c9e..13344427 100644
--- a/src/build_tools/ensure_gyp_module_path.py
+++ b/src/build_tools/ensure_gyp_module_path.py
@@ -48,7 +48,7 @@ def ParseOption():
(options, _) = parser.parse_args()
if not options.expected:
- print parser.print_help()
+ print(parser.print_help())
sys.exit(1)
return options
@@ -59,20 +59,20 @@ def main():
opt = ParseOption()
expected_path = os.path.abspath(opt.expected)
if not os.path.exists(expected_path):
- print '%s does not exist.' % expected_path
+ print('%s does not exist.' % expected_path)
sys.exit(1)
try:
import gyp # NOLINT
except ImportError as e:
- print 'import gyp failed: %s' % e
+ print('import gyp failed: %s' % e)
sys.exit(1)
actual_path = os.path.abspath(gyp.__path__[0])
if expected_path != actual_path:
- print 'Unexpected gyp module is loaded on this environment.'
- print ' expected: %s' % expected_path
- print ' actual : %s' % actual_path
+ print('Unexpected gyp module is loaded on this environment.')
+ print(' expected: %s' % expected_path)
+ print(' actual : %s' % actual_path)
sys.exit(1)
if __name__ == '__main__':
diff --git a/src/build_tools/gen_win32_resource_header.py b/src/build_tools/gen_win32_resource_header.py
index 313c5492..b87599f3 100644
--- a/src/build_tools/gen_win32_resource_header.py
+++ b/src/build_tools/gen_win32_resource_header.py
@@ -39,7 +39,7 @@ See mozc_version.py for the detailed information for version.txt.
__author__ = "yukawa"
import logging
-import mozc_version
+from . import mozc_version
import optparse
import os
import sys
diff --git a/src/build_tools/mozc_version.py b/src/build_tools/mozc_version.py
index a2e93ec9..694ae4e1 100644
--- a/src/build_tools/mozc_version.py
+++ b/src/build_tools/mozc_version.py
@@ -94,7 +94,7 @@ def _GetRevisionForPlatform(revision, target_platform):
last_digit = TARGET_PLATFORM_TO_DIGIT.get(target_platform, None)
if last_digit is None:
logging.critical('target_platform %s is invalid. Accetable ones are %s',
- target_platform, TARGET_PLATFORM_TO_DIGIT.keys())
+ target_platform, list(TARGET_PLATFORM_TO_DIGIT.keys()))
sys.exit(1)
if not revision:
@@ -314,13 +314,14 @@ class MozcVersion(object):
self._properties = {}
if not os.path.isfile(path):
return
- for line in open(path):
- matchobj = re.match(r'(\w+)=(.*)', line.strip())
- if matchobj:
- var = matchobj.group(1)
- val = matchobj.group(2)
- if var not in self._properties:
- self._properties[var] = val
+ with open(path) as file:
+ for line in file:
+ matchobj = re.match(r'(\w+)=(.*)', line.strip())
+ if matchobj:
+ var = matchobj.group(1)
+ val = matchobj.group(2)
+ if var not in self._properties:
+ self._properties[var] = val
# Check mandatory properties.
for key in VERSION_PROPERTIES:
diff --git a/src/build_tools/redirect.py b/src/build_tools/redirect.py
index fc78ef60..68f4eacc 100644
--- a/src/build_tools/redirect.py
+++ b/src/build_tools/redirect.py
@@ -58,14 +58,15 @@ def main():
process = subprocess.Popen(sys.argv, stdout=subprocess.PIPE,
universal_newlines=True)
except:
- print '=========='
- print ' ERROR: %s' % ' '.join(sys.argv)
- print '=========='
+ print('==========')
+ print(' ERROR: %s' % ' '.join(sys.argv))
+ print('==========')
raise
(stdout_content, _) = process.communicate()
# Write the stdout content to the output file.
output_file = open(output_file_name, 'w')
output_file.write(stdout_content)
+ output_file.close()
return process.wait()
if __name__ == '__main__':
diff --git a/src/build_tools/run_after_chdir.py b/src/build_tools/run_after_chdir.py
index 9e063109..f7519c13 100644
--- a/src/build_tools/run_after_chdir.py
+++ b/src/build_tools/run_after_chdir.py
@@ -57,7 +57,7 @@ def main():
sys.argv.insert(0, sys.executable) # Inject the python interpreter path.
# We don't capture stdout and stderr from Popen. The output will just
# be emitted to a terminal or console.
- print sys.argv
+ print(sys.argv)
sys.exit(subprocess.call(sys.argv))
if __name__ == '__main__':
diff --git a/src/build_tools/serialized_string_array_builder.py b/src/build_tools/serialized_string_array_builder.py
index f045a22c..7a2c29c6 100644
--- a/src/build_tools/serialized_string_array_builder.py
+++ b/src/build_tools/serialized_string_array_builder.py
@@ -58,11 +58,11 @@ def SerializeToFile(strings, filename):
f.write(struct.pack('<I', array_size))
# Offset and length array of (4 + 4) * array_size bytes.
- for i in xrange(array_size):
+ for i in range(array_size):
f.write(struct.pack('<I', offsets[i]))
f.write(struct.pack('<I', lengths[i]))
# Strings chunk.
- for i in xrange(array_size):
+ for i in range(array_size):
f.write(strings[i])
- f.write('\0')
+ f.write(b'\0')
diff --git a/src/build_tools/test_tools/gtest_report.py b/src/build_tools/test_tools/gtest_report.py
index 3a161e1d..e01d28e7 100644
--- a/src/build_tools/test_tools/gtest_report.py
+++ b/src/build_tools/test_tools/gtest_report.py
@@ -36,9 +36,9 @@ store as python object.
__author__ = "nona"
-import cStringIO as StringIO
+import io
import logging
-from xml.etree import cElementTree as ElementTree
+from xml.etree import ElementTree
class Failure(object):
@@ -87,13 +87,13 @@ class TestSuite(object):
"""Returns summarized error report text."""
if self.fail_num == 0:
return ''
- output = StringIO.StringIO()
+ output = io.StringIO()
for testcase in self.testcases:
if not testcase.failures:
continue
- print >>output, '%s.%s:' % (self.name, testcase.name)
+ print('%s.%s:' % (self.name, testcase.name), file=output)
for failure in testcase.failures:
- print >>output, failure.contents.encode('utf-8')
+ print(failure.contents.encode('utf-8'), file=output)
return output.getvalue()
@classmethod
diff --git a/src/build_tools/test_tools/test_launcher.py b/src/build_tools/test_tools/test_launcher.py
index 47c0a2ec..1b5cecfc 100644
--- a/src/build_tools/test_tools/test_launcher.py
+++ b/src/build_tools/test_tools/test_launcher.py
@@ -101,11 +101,11 @@ class PathDeleter(object):
time.sleep(1)
try:
shutil.rmtree(self._path)
- except OSError, e:
+ except OSError as e:
logging.error('Failed to remove %s. error: %s', self._path, e)
-def _ExecuteTest((command, gtest_report_dir)):
+def _ExecuteTest(args):
"""Executes tests with specified Test command.
Args:
@@ -122,6 +122,7 @@ def _ExecuteTest((command, gtest_report_dir)):
module, which is used in multiprocessing module.
(http://docs.python.org/library/pickle.html)
"""
+ (command, gtest_report_dir) = args
binary = command[0]
binary_filename = os.path.basename(binary)
tmp_dir = tempfile.mkdtemp()
diff --git a/src/build_tools/tweak_data.py b/src/build_tools/tweak_data.py
index 64d57e14..79b70c49 100644
--- a/src/build_tools/tweak_data.py
+++ b/src/build_tools/tweak_data.py
@@ -55,7 +55,7 @@ def ReplaceVariables(data, environment):
The value for the variable if the variable is defined in the
environment. Otherwise original string is returned.
"""
- if environment.has_key(matchobj.group(1)):
+ if matchobj.group(1) in environment:
return environment[matchobj.group(1)]
return matchobj.group(0)
diff --git a/src/build_tools/tweak_info_plist.py b/src/build_tools/tweak_info_plist.py
index efa3b268..1ff91e2c 100644
--- a/src/build_tools/tweak_info_plist.py
+++ b/src/build_tools/tweak_info_plist.py
@@ -42,8 +42,8 @@ import datetime
import logging
import optparse
import sys
-import mozc_version
-import tweak_data
+from . import mozc_version
+from . import tweak_data
_COPYRIGHT_YEAR = datetime.date.today().year
@@ -81,7 +81,7 @@ def main():
version = mozc_version.MozcVersion(options.version_file)
- copyright_message = (u'© %d Google Inc.' % _COPYRIGHT_YEAR).encode('utf-8')
+ copyright_message = ('© %d Google Inc.' % _COPYRIGHT_YEAR).encode('utf-8')
long_version = version.GetVersionString()
short_version = version.GetVersionInFormat('@MAJOR@.@MINOR@.@BUILD@')
diff --git a/src/build_tools/tweak_info_plist_strings.py b/src/build_tools/tweak_info_plist_strings.py
index e34dd59a..bdd00168 100644
--- a/src/build_tools/tweak_info_plist_strings.py
+++ b/src/build_tools/tweak_info_plist_strings.py
@@ -40,7 +40,7 @@ import datetime
import logging
import optparse
import sys
-import tweak_data
+from . import tweak_data
_COPYRIGHT_YEAR = datetime.date.today().year
@@ -77,7 +77,7 @@ def main():
if options.branding == 'GoogleJapaneseInput':
variables = {
'CF_BUNDLE_NAME_EN': 'Google Japanese Input',
- 'CF_BUNDLE_NAME_JA': u'Google 日本語入力'.encode('utf-8'),
+ 'CF_BUNDLE_NAME_JA': 'Google 日本語入力'.encode('utf-8'),
'NS_HUMAN_READABLE_COPYRIGHT': copyright_message,
'INPUT_MODE_ANNOTATION': 'Google',
}
diff --git a/src/build_tools/tweak_macinstaller_script.py b/src/build_tools/tweak_macinstaller_script.py
index 54542cbe..daed224c 100644
--- a/src/build_tools/tweak_macinstaller_script.py
+++ b/src/build_tools/tweak_macinstaller_script.py
@@ -39,7 +39,7 @@ __author__ = "mukai"
import logging
import optparse
-import mozc_version
+from . import mozc_version
def _ReplaceVariables(data, environment):
diff --git a/src/build_tools/tweak_pkgproj.py b/src/build_tools/tweak_pkgproj.py
index f5ff7287..84f24ffe 100644
--- a/src/build_tools/tweak_pkgproj.py
+++ b/src/build_tools/tweak_pkgproj.py
@@ -45,7 +45,7 @@ import optparse
import os
import plistlib
import re
-import mozc_version
+from . import mozc_version
from os import path
@@ -71,7 +71,7 @@ def _ReplaceVariables(data, environment):
The value for the variable if the variable is defined in the
environment. Otherwise original string is returned.
"""
- if environment.has_key(matchobj.group(1)):
+ if matchobj.group(1) in environment:
return environment[matchobj.group(1)]
return matchobj.group(0)
diff --git a/src/build_tools/util.py b/src/build_tools/util.py
index 85961187..f0ef68d9 100644
--- a/src/build_tools/util.py
+++ b/src/build_tools/util.py
@@ -73,11 +73,11 @@ def GetNumberOfProcessors():
return 1
-class RunOrDieError(StandardError):
+class RunOrDieError(Exception):
"""The exception class for RunOrDie."""
def __init__(self, message):
- StandardError.__init__(self, message)
+ Exception.__init__(self, message)
def RunOrDie(argv):
@@ -105,7 +105,7 @@ def RemoveFile(file_name):
return # Do nothing if not exist.
if IsWindows():
# Read-only files cannot be deleted on Windows.
- os.chmod(file_name, 0700)
+ os.chmod(file_name, 0o700)
logging.debug('Removing file: %s', file_name)
os.unlink(file_name)
diff --git a/src/build_tools/zlib_util.py b/src/build_tools/zlib_util.py
index 47491480..cc6630b0 100644
--- a/src/build_tools/zlib_util.py
+++ b/src/build_tools/zlib_util.py
@@ -58,7 +58,7 @@ def Decompress(input_filename, output_filename):
def main():
if len(sys.argv) != 4:
- print >>sys.stderr, 'Invalid arguments'
+ print('Invalid arguments', file=sys.stderr)
return
if sys.argv[1] == 'compress':
Compress(sys.argv[2], sys.argv[3])
@@ -66,7 +66,7 @@ def main():
if sys.argv[1] == 'decompress':
Decompress(sys.argv[2], sys.argv[3])
return
- print >>sys.stderr, 'Unknown command:', sys.argv[1]
+ print('Unknown command:', sys.argv[1], file=sys.stderr)
if __name__ == '__main__':
diff --git a/src/chrome/nacl/nacl_net_test_server.py b/src/chrome/nacl/nacl_net_test_server.py
index 257fa79e..e88e10dc 100644
--- a/src/chrome/nacl/nacl_net_test_server.py
+++ b/src/chrome/nacl/nacl_net_test_server.py
@@ -45,27 +45,27 @@ Example usage:
--timeout=20
"""
-import BaseHTTPServer
+import _thread
+import http.server
import optparse
import os
import os.path
import shutil
-import SocketServer
+import socketserver
import subprocess
import sys
import tempfile
-import thread
import time
-import urlparse
+import urllib.parse
-class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
+class RequestHandler(http.server.BaseHTTPRequestHandler):
"""Handle the HTTP requests that arrive at the server."""
def do_GET(self):
# pylint: disable=g-bad-name
"""Handles GET request."""
- parsed_path = urlparse.urlparse(self.path)
+ parsed_path = urllib.parse.urlparse(self.path)
options = {'response': 200,
'result': '',
'before_response_sleep': 0.0,
@@ -77,7 +77,7 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
'data': 'DEFAULT_DATA',
'times': 1,
'redirect_location': ''}
- query = urlparse.parse_qsl(parsed_path.query)
+ query = urllib.parse.parse_qsl(parsed_path.query)
for params in query:
options[params[0]] = params[1]
@@ -113,13 +113,14 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
full_path = os.path.join(extra_dir, os.path.basename(parsed_path.path))
if os.path.isfile(full_path):
try:
- data = open(full_path).read()
+ with open(full_path) as f:
+ data = f.read()
self.send_response(200)
self.send_header('Content-Length', len(data))
self.end_headers()
self.wfile.write(data)
- except IOError, (errno, strerror):
- print 'I/O error(%s): %s' % (errno, strerror)
+ except IOError as e:
+ print('I/O error(%s): %s' % (e.errno, e.strerror))
return
try:
@@ -143,14 +144,14 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
time.sleep(float(options['before_data_sleep']))
self.wfile.write(options['data'])
time.sleep(float(options['after_data_sleep']))
- except IOError, (errno, strerror):
- print 'I/O error(%s): %s' % (errno, strerror)
+ except IOError as e:
+ print('I/O error(%s): %s' % (e.errno, e.strerror))
return
def do_POST(self):
# pylint: disable=g-bad-name
"""Handles POST request."""
- parsed_path = urlparse.urlparse(self.path)
+ parsed_path = urllib.parse.urlparse(self.path)
options = {'response': 200,
'result': '',
'before_response_sleep': 0.0,
@@ -159,7 +160,7 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
'after_data_sleep': 0.0,
'content_length': '',
'redirect_location': ''}
- query = urlparse.parse_qsl(parsed_path.query)
+ query = urllib.parse.parse_qsl(parsed_path.query)
for params in query:
options[params[0]] = params[1]
@@ -185,18 +186,18 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
self.wfile.write(post_data)
time.sleep(float(options['after_data_sleep']))
return
- except IOError, (errno, strerror):
- print 'I/O error(%s): %s' % (errno, strerror)
+ except IOError as e:
+ print('I/O error(%s): %s' % (e.errno, e.strerror))
return
def do_HEAD(self):
# pylint: disable=g-bad-name
"""Handles HEAD request."""
- parsed_path = urlparse.urlparse(self.path)
+ parsed_path = urllib.parse.urlparse(self.path)
options = {'response': 200,
'before_response_sleep': 0.0,
'before_head_sleep': 0.0}
- query = urlparse.parse_qsl(parsed_path.query)
+ query = urllib.parse.parse_qsl(parsed_path.query)
for params in query:
options[params[0]] = params[1]
@@ -210,12 +211,12 @@ class RequestHandler(BaseHTTPServer.BaseHTTPRequestHandler):
for name, value in sorted(self.headers.items()):
self.send_header('CLIENT_HEADER_%s' % name, '%s' % value)
self.end_headers()
- except IOError, (errno, strerror):
- print 'I/O error(%s): %s' % (errno, strerror)
+ except IOError as e:
+ print('I/O error(%s): %s' % (e.errno, e.strerror))
return
-class TestServer(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer):
+class TestServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
def Configure(self, serving_dirs):
self.serving_dirs = serving_dirs
self.finished = False
@@ -243,12 +244,12 @@ def main():
server = TestServer(('localhost', 9999), RequestHandler)
server.Configure(options.serving_dirs)
host, port = server.socket.getsockname()
- print 'Starting server %s:%s' % (host, port)
+ print('Starting server %s:%s' % (host, port))
def Serve():
while not server.finished:
server.handle_request()
- thread.start_new_thread(Serve, ())
+ _thread.start_new_thread(Serve, ())
temp_dir = tempfile.mkdtemp()
if options.browser_path:
@@ -257,7 +258,7 @@ def main():
cmd.append('--load-extension=%s' % options.load_extension)
if options.url:
cmd.append('http://%s:%s/%s' % (host, port, options.url))
- print cmd
+ print(cmd)
browser_handle = subprocess.Popen(cmd)
time_started = time.time()
diff --git a/src/chrome/nacl/pnacl_translate.py b/src/chrome/nacl/pnacl_translate.py
index ae10b3c1..02b037d0 100644
--- a/src/chrome/nacl/pnacl_translate.py
+++ b/src/chrome/nacl/pnacl_translate.py
@@ -54,11 +54,11 @@ def Translate(toolchain_root, input_file, output_base):
for target in targets:
cmd = (translate_command, '--allow-llvm-bitcode-input', '-arch', target[0],
input_file, '-o', '%s_%s.nexe' % (output_base, target[1]))
- print 'Running: ' + ' '.join(cmd)
+ print('Running: ' + ' '.join(cmd))
if subprocess.Popen(cmd).wait() != 0:
- print >> sys.stderr, 'ERROR: ' + ' '.join(cmd)
+ print('ERROR: ' + ' '.join(cmd), file=sys.stderr)
raise RuntimeError('Translate Error')
- print 'Done: ' + ' '.join(cmd)
+ print('Done: ' + ' '.join(cmd))
def StripAndTranslate(toolchain_root, input_file, output_base):
@@ -68,21 +68,21 @@ def StripAndTranslate(toolchain_root, input_file, output_base):
temp_dir = tempfile.mkdtemp()
temp_file_base = os.path.join(temp_dir, 'stripped')
cmd = (strip_command, input_file, '-o', temp_file_base)
- print 'Running: ' + ' '.join(cmd)
+ print('Running: ' + ' '.join(cmd))
if subprocess.Popen(cmd).wait() != 0:
- print >> sys.stderr, 'ERROR: ' + ' '.join(cmd)
+ print('ERROR: ' + ' '.join(cmd), file=sys.stderr)
raise RuntimeError('Strip Error')
- print 'Done: ' + ' '.join(cmd)
+ print('Done: ' + ' '.join(cmd))
Translate(toolchain_root, temp_file_base, temp_file_base)
targets = ('arm', 'x86_32', 'x86_64')
for target in targets:
cmd = (strip_command, '%s_%s.nexe' % (temp_file_base, target),
'-o', '%s_%s.nexe' % (output_base, target))
- print 'Running: ' + ' '.join(cmd)
+ print('Running: ' + ' '.join(cmd))
if subprocess.Popen(cmd).wait() != 0:
- print >> sys.stderr, 'ERROR: ' + ' '.join(cmd)
+ print('ERROR: ' + ' '.join(cmd), file=sys.stderr)
raise RuntimeError('Strip Error')
- print 'Done: ' + ' '.join(cmd)
+ print('Done: ' + ' '.join(cmd))
finally:
shutil.rmtree(temp_dir)
@@ -101,15 +101,15 @@ def main():
(options, _) = parser.parse_args()
if not options.toolchain_root:
- print >> sys.stderr, 'Error: toolchain_root is not set.'
+ print('Error: toolchain_root is not set.', file=sys.stderr)
sys.exit(1)
if not options.input:
- print >> sys.stderr, 'Error: input is not set.'
+ print('Error: input is not set.', file=sys.stderr)
sys.exit(1)
if not options.output_base:
- print >> sys.stderr, 'Error: output_base is not set.'
+ print('Error: output_base is not set.', file=sys.stderr)
sys.exit(1)
if options.configuration == 'Release':
diff --git a/src/chrome/nacl/run_nacl_test.py b/src/chrome/nacl/run_nacl_test.py
index 6a7fcf2f..8a1bea19 100644
--- a/src/chrome/nacl/run_nacl_test.py
+++ b/src/chrome/nacl/run_nacl_test.py
@@ -73,7 +73,7 @@ def RunCommand(options, command):
output, _ = process.communicate()
logging.info(output)
if process.returncode != 0:
- raise StandardError('Failed to run the command: ' + ' '.join(args))
+ raise Exception('Failed to run the command: ' + ' '.join(args))
return output
diff --git a/src/client/gen_client_quality_test_data.py b/src/client/gen_client_quality_test_data.py
index b9e13759..6c8af404 100644
--- a/src/client/gen_client_quality_test_data.py
+++ b/src/client/gen_client_quality_test_data.py
@@ -28,17 +28,16 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-import codecs
import logging
import sys
def escape(string):
- return ''.join('\\x%02x' % ord(char) for char in string.encode('utf-8'))
+ return ''.join('\\x%02x' % char for char in string.encode('utf-8'))
def convert_tsv(filename):
- tsv = codecs.open(filename, 'rb', 'utf-8')
+ tsv = open(filename, 'r', encoding='utf-8')
for line in tsv:
line = line.rstrip()
if not line or line.startswith('#'):
@@ -57,38 +56,35 @@ def convert_tsv(filename):
expected = fields[4]
query = fields[5]
- print ' // {"%s", "%s", "%s"},' % (label, expected, query)
- print (' {"%s", "%s", "%s"},' %
+ print(' // {"%s", "%s", "%s"},' % (label, expected, query))
+ print(' {"%s", "%s", "%s"},' %
(escape(label), escape(expected), escape(query)))
tsv.close()
def main():
- sys.stdin = codecs.getreader('utf-8')(sys.stdin)
- sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
- sys.stderr = codecs.getwriter('utf-8')(sys.stderr)
logging.basicConfig(level = logging.INFO)
- print '// Automatically generated by mozc'
- print '#ifndef MOZC_SESSION_QUALITY_MAIN_DATA_H_'
- print '#define MOZC_SESSION_QUALITY_MAIN_DATA_H_'
- print ''
- print 'namespace mozc {'
- print 'struct TestCase {'
- print ' const char* source;'
- print ' const char* expected_result;'
- print ' const char* hiragana_sentence;'
- print '};'
- print ''
- print 'static TestCase test_cases[] = {'
+ print('// Automatically generated by mozc')
+ print('#ifndef MOZC_SESSION_QUALITY_MAIN_DATA_H_')
+ print('#define MOZC_SESSION_QUALITY_MAIN_DATA_H_')
+ print('')
+ print('namespace mozc {')
+ print('struct TestCase {')
+ print(' const char* source;')
+ print(' const char* expected_result;')
+ print(' const char* hiragana_sentence;')
+ print('};')
+ print('')
+ print('static TestCase test_cases[] = {')
for filename in sys.argv[1:]:
convert_tsv(filename)
- print ' {NULL, NULL, NULL}'
- print '};'
- print '} // namespace mozc'
- print '#endif // MOZC_SESSION_QUALITY_MAIN_DATA_H_'
+ print(' {NULL, NULL, NULL}')
+ print('};')
+ print('} // namespace mozc')
+ print('#endif // MOZC_SESSION_QUALITY_MAIN_DATA_H_')
if __name__ == '__main__':
diff --git a/src/composer/internal/gen_typing_model.py b/src/composer/internal/gen_typing_model.py
index 71ba7699..8c867759 100644
--- a/src/composer/internal/gen_typing_model.py
+++ b/src/composer/internal/gen_typing_model.py
@@ -54,14 +54,13 @@ Output file format:
__author__ = "noriyukit"
import bisect
-import codecs
import collections
import optparse
import struct
UNDEFINED_COST = -1
-MAX_UINT16 = struct.unpack('H', '\xFF\xFF')[0]
-MAX_UINT8 = struct.unpack('B', '\xFF')[0]
+MAX_UINT16 = struct.unpack('H', b'\xFF\xFF')[0]
+MAX_UINT8 = struct.unpack('B', b'\xFF')[0]
def ParseArgs():
@@ -113,7 +112,7 @@ def GetMappingTable(values, mapping_table_size):
sorted_values = list(sorted(set(values)))
mapping_table = sorted_values[0]
mapping_table_size_without_special_value = mapping_table_size - 1
- span = len(sorted_values) / (mapping_table_size_without_special_value - 1)
+ span = len(sorted_values) // (mapping_table_size_without_special_value - 1)
mapping_table = [sorted_values[i * span]
for i
in range(0, mapping_table_size_without_special_value - 1)]
@@ -150,7 +149,7 @@ def GetNearestMappingTableIndex(mapping_table, value):
def GetValueTable(unique_characters, mapping_table, dictionary):
result = []
- for key, value in dictionary.iteritems():
+ for key, value in dictionary.items():
index = GetIndexFromKey(unique_characters, key)
while len(result) <= index:
result.append(len(mapping_table) - 1)
@@ -167,13 +166,13 @@ def WriteResult(romaji_transition_cost, output_path):
romaji_transition_cost)
with open(output_path, 'wb') as f:
f.write(struct.pack('<I', len(unique_characters)))
- f.write(''.join(unique_characters))
+ f.write(''.join(unique_characters).encode('utf-8'))
offset = 4 + len(unique_characters)
# Add padding to place value list size at 4-byte boundary.
if offset % 4:
padding_size = 4 - offset % 4
- f.write('\x00' * padding_size)
+ f.write(b'\x00' * padding_size)
offset += padding_size
f.write(struct.pack('<I', len(value_list)))
@@ -184,7 +183,7 @@ def WriteResult(romaji_transition_cost, output_path):
# Add padding to place mapping_table at 4-byte boundary.
if offset % 4:
padding_size = 4 - offset % 4
- f.write('\x00' * padding_size)
+ f.write(b'\x00' * padding_size)
offset += padding_size
for v in mapping_table:
@@ -198,7 +197,8 @@ def main():
# - trigram['vw']['x'] = -500 * log(P(x | 'vw'))
unigram = {}
trigram = collections.defaultdict(dict)
- for line in codecs.open(options.input_path, 'r', encoding='utf-8'):
+ input_file = open(options.input_path, 'r', encoding='utf-8')
+ for line in input_file:
line = line.rstrip()
ngram, cost = line.split('\t')
cost = int(cost)
@@ -206,6 +206,7 @@ def main():
unigram[ngram] = cost
else:
trigram[ngram[:-1]][ngram[-1]] = cost
+ input_file.close()
# Calculate ngram-related cost for each 'vw' and 'x':
# -500 * log( P('x' | 'vw') / P('x') )
diff --git a/src/converter/gen_boundary_data.py b/src/converter/gen_boundary_data.py
index 2a12ee57..35396695 100644
--- a/src/converter/gen_boundary_data.py
+++ b/src/converter/gen_boundary_data.py
@@ -70,7 +70,8 @@ def PatternToRegexp(pattern):
def LoadPatterns(file):
prefix = []
suffix = []
- for line in open(file, 'r'):
+ fh = open(file, 'r')
+ for line in fh:
if len(line) <= 1 or line[0] == '#':
continue
fields = line.split()
@@ -84,8 +85,9 @@ def LoadPatterns(file):
elif label == 'SUFFIX':
suffix.append([re.compile(PatternToRegexp(feature)), cost])
else:
- print 'format error %s' % (line)
+ print('format error %s' % (line))
sys.exit(0)
+ fh.close()
return (prefix, suffix)
@@ -100,19 +102,23 @@ def GetCost(patterns, feature):
def LoadFeatures(filename):
features = []
- for line in open(filename, 'r'):
+ fh = open(filename, 'r')
+ for line in fh:
fields = line.split()
features.append(fields[1])
+ fh.close()
return features
def CountSpecialPos(filename):
count = 0
- for line in open(filename, 'r'):
+ fh = open(filename, 'r')
+ for line in fh:
line = line.rstrip()
if not line or line[0] == '#':
continue
count += 1
+ fh.close()
return count
@@ -141,7 +147,7 @@ def main():
f.write(struct.pack('<H', GetCost(prefix, feature)))
f.write(struct.pack('<H', GetCost(suffix, feature)))
- for _ in xrange(num_special_pos):
+ for _ in range(num_special_pos):
f.write(struct.pack('<H', 0))
f.write(struct.pack('<H', 0))
diff --git a/src/converter/gen_quality_regression_test_data.py b/src/converter/gen_quality_regression_test_data.py
index 642d9310..68fa533a 100644
--- a/src/converter/gen_quality_regression_test_data.py
+++ b/src/converter/gen_quality_regression_test_data.py
@@ -84,7 +84,7 @@ def ParseXML(file):
else _ENABLED)
id = issue.attributes['id'].value
target = GetText(issue.getElementsByTagName('target'))
- for detail in issue.getElementsByTagName(u'detail'):
+ for detail in issue.getElementsByTagName('detail'):
fields = []
fields.append('mozcsu_%s' % id)
for key in ('reading', 'output', 'actionStatus', 'rank', 'accuracy'):
@@ -104,19 +104,19 @@ def ParseFile(file):
def GenerateHeader(files):
try:
- print 'namespace mozc{'
- print 'struct TestCase {'
- print ' const bool enabled;'
- print ' const char *tsv;'
- print '} kTestData[] = {'
+ print('namespace mozc{')
+ print('struct TestCase {')
+ print(' const bool enabled;')
+ print(' const char *tsv;')
+ print('} kTestData[] = {')
for file in files:
for enabled, line in ParseFile(file):
- print ' {%s, "%s"},' % (enabled, EscapeString(line))
- print ' {false, nullptr},'
- print '};'
- print '} // namespace mozc'
+ print(' {%s, "%s"},' % (enabled, EscapeString(line)))
+ print(' {false, nullptr},')
+ print('};')
+ print('} // namespace mozc')
except:
- print 'cannot open %s' % (file)
+ print('cannot open %s' % (file))
sys.exit(1)
diff --git a/src/converter/gen_segmenter_code.py b/src/converter/gen_segmenter_code.py
index 9d1e8edf..67f48f91 100644
--- a/src/converter/gen_segmenter_code.py
+++ b/src/converter/gen_segmenter_code.py
@@ -54,18 +54,22 @@ def ReadPOSID(id_file, special_pos_file):
pos = {}
max_id = 0
- for line in open(id_file, "r"):
+ fh = open(id_file, "r")
+ for line in fh:
fields = line.split()
pos[fields[1]] = fields[0]
max_id = max(int(fields[0]), max_id)
+ fh.close()
max_id = max_id + 1
- for line in open(special_pos_file, "r"):
+ fh = open(special_pos_file, "r")
+ for line in fh:
if len(line) <= 1 or line[0] == '#':
continue
fields = line.split()
pos[fields[0]] = ("%d" % max_id)
max_id = max_id + 1
+ fh.close()
return pos
@@ -79,8 +83,7 @@ def GetRange(pos, pattern, name):
pat = re.compile(PatternToRegexp(pattern))
min = -1;
max = -1;
- keys = pos.keys()
- keys.sort()
+ keys = sorted(pos.keys())
range = []
@@ -107,7 +110,7 @@ def GetRange(pos, pattern, name):
tmp.append("(%s >= %s && %s <= %s)" % (name, r[0], name, r[1]))
if len(tmp) == 0:
- print "FATAL: No rule fiind %s" % (pattern)
+ print("FATAL: No rule fiind %s" % (pattern))
sys.exit(-1)
return " || ".join(tmp)
@@ -115,19 +118,21 @@ def GetRange(pos, pattern, name):
def main():
pos = ReadPOSID(sys.argv[1], sys.argv[2])
- print HEADER % (len(pos.keys()), len(pos.keys()))
+ print(HEADER % (len(pos.keys()), len(pos.keys())))
- for line in open(sys.argv[3], "r"):
+ fh = open(sys.argv[3], "r")
+ for line in fh:
if len(line) <= 1 or line[0] == '#':
continue
(l, r, result) = line.split()
result = result.lower()
lcond = GetRange(pos, l, "rid") or "true";
rcond = GetRange(pos, r, "lid") or "true";
- print " // %s %s %s" % (l, r, result)
- print " if ((%s) && (%s)) { return %s; }" % (lcond, rcond, result)
+ print(" // %s %s %s" % (l, r, result))
+ print(" if ((%s) && (%s)) { return %s; }" % (lcond, rcond, result))
+ fh.close()
- print FOOTER
+ print(FOOTER)
if __name__ == "__main__":
main()
diff --git a/src/data/test/calculator/gen_test.py b/src/data/test/calculator/gen_test.py
index 863630e2..ce8e729a 100644
--- a/src/data/test/calculator/gen_test.py
+++ b/src/data/test/calculator/gen_test.py
@@ -105,7 +105,6 @@
# current script puts parentheses as carefully and safely as possible. If
# such a case happens, it's really tough to track bugs...
-import codecs
import logging
import math
import optparse
@@ -636,24 +635,24 @@ class TestCaseGenerator(object):
"""
# Character map used to generate test expression including Japanese.
- _EQUIVALENT_CHARS = {'+': ['+', u'+'],
- '-': ['-', u'−', u'ー'],
- '*': ['*', u'*'],
- '/': ['/', u'/', u'・'],
+ _EQUIVALENT_CHARS = {'+': ['+', '+'],
+ '-': ['-', '−', 'ー'],
+ '*': ['*', '*'],
+ '/': ['/', '/', '・'],
'^': ['^'],
- '%': ['%', u'%'],
- '(': ['(', u'('],
- ')': [')', u')'],
- '0': ['0', u'0'],
- '1': ['1', u'1'],
- '2': ['2', u'2'],
- '3': ['3', u'3'],
- '4': ['4', u'4'],
- '5': ['5', u'5'],
- '6': ['6', u'6'],
- '7': ['7', u'7'],
- '8': ['8', u'8'],
- '9': ['9', u'9']}
+ '%': ['%', '%'],
+ '(': ['(', '('],
+ ')': [')', ')'],
+ '0': ['0', '0'],
+ '1': ['1', '1'],
+ '2': ['2', '2'],
+ '3': ['3', '3'],
+ '4': ['4', '4'],
+ '5': ['5', '5'],
+ '6': ['6', '6'],
+ '7': ['7', '7'],
+ '8': ['8', '8'],
+ '9': ['9', '9']}
def __init__(self, test_filename, py_filename = '', cc_filename = ''):
"""
@@ -666,7 +665,7 @@ class TestCaseGenerator(object):
# Initialize output file
self._test_filename = test_filename
if test_filename:
- self._test_file = codecs.getwriter('utf-8')(open(test_filename, 'wb'))
+ self._test_file = open(test_filename, 'w', encoding='utf-8')
else:
# Replace the generating function by a dummy
self.add_test_case_for = lambda expr: None
@@ -674,7 +673,7 @@ class TestCaseGenerator(object):
# Initialize python code
if py_filename:
- self._py_file = codecs.getwriter('utf-8')(open(py_filename, 'wb'))
+ self._py_file = open(py_filename, 'w', encoding='utf-8')
self._py_file.write('import math\n\n')
else:
self._add_py_code_for = lambda py_expr, expected: None
@@ -682,7 +681,7 @@ class TestCaseGenerator(object):
# Initialize cc code
if cc_filename:
- self._cc_file = codecs.getwriter('utf-8')(open(cc_filename, 'wb'))
+ self._cc_file = open(cc_filename, 'w', encoding='utf-8')
self._cc_file.write('// Automatically generated by '
'mozc/src/data/test/calculator/gen_test.py\n\n'
'#include <cmath>\n'
@@ -716,7 +715,7 @@ class TestCaseGenerator(object):
@staticmethod
def _mix_japanese_string(string):
"""Randomly transforms half-width characters to full-width."""
- result = u''
+ result = ''
for char in string:
if char in TestCaseGenerator._EQUIVALENT_CHARS:
equiv_chars = TestCaseGenerator._EQUIVALENT_CHARS[char]
@@ -727,7 +726,7 @@ class TestCaseGenerator(object):
def add_test_case_for(self, expr):
"""Appends the code that checks whether the evaluation result of given
- expr coincides with the epxected result.
+ expr coincides with the expected result.
Args:
expr: Expr object
@@ -749,24 +748,24 @@ class TestCaseGenerator(object):
raise FatalError('Expression tree evaluation error')
self._num_computable_cases += 1
- self._test_file.write(u'%s=%.8g\n' % (test_expr, value))
+ self._test_file.write('%s=%.8g\n' % (test_expr, value))
self._add_py_code_for(py_expr, value)
self._add_cc_code_for(expr.build_cc_expr(), value)
except EvalError:
- self._test_file.write(u'%s=\n' % test_expr)
+ self._test_file.write('%s=\n' % test_expr)
self._add_cc_code_for(expr.build_cc_expr(), None)
self._num_total_cases += 1
def _add_py_code_for(self, py_expr, expected):
"""Appends python code that checks whether the evaluation result of given
- expr coincides with the epxected result.
+ expr coincides with the expected result.
If expected is None, it indicates that the evaluation of expr results in
error (like overflow and division-by-zero). Currently, just generates
comments for such cases.
- In generated scrpt, the accuracy is verified either in absolute error or
+ In generated script, the accuracy is verified either in absolute error or
relative error, because there's a possibility that different machines
generate different values due to precision. For example, if the expected
value is very large, we cannot expect that error is less than a certain
@@ -777,21 +776,21 @@ class TestCaseGenerator(object):
expected: expected value of the expression (float)
"""
if expected:
- self._py_file.write('expr = u"%s"\n' % py_expr)
+ self._py_file.write('expr = "%s"\n' % py_expr)
self._py_file.write('expected = %s\n' % repr(expected))
self._py_file.write('val = eval(expr)\n')
self._py_file.write('err = abs(val - expected)\n')
self._py_file.write('if (err > 1e-8 and\n')
self._py_file.write(' err > 1e-2 * abs(expected)):\n')
- self._py_file.write(' print repr(val), "!=", repr(expected)\n')
- self._py_file.write(' print "expr =", expr\n\n')
+ self._py_file.write(' print("%r != %r" % (val, expected))\n')
+ self._py_file.write(' print("expr = %s" % expr)\n\n')
else:
self._py_file.write('# Incomputable\n'
'# %s\n\n' % py_expr)
def _add_cc_code_for(self, cc_expr, expected):
"""Appends the code that checks whether the evaluation result of given
- expr coincides with the epxected result.
+ expr coincides with the expected result.
If expected is None, it indicates that the evaluation of expr results in
error (like overflow and division-by-zero). Currently, just generates
@@ -851,9 +850,6 @@ def parse_options():
def main():
- sys.stdin = codecs.getreader('utf-8')(sys.stdin)
- sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
- sys.stderr = codecs.getwriter('utf-8')(sys.stderr)
random.seed()
logging.basicConfig(level = logging.INFO,
format = '%(levelname)s: %(message)s')
diff --git a/src/data_manager/gen_connection_data.py b/src/data_manager/gen_connection_data.py
index c3fb0952..4f9282b2 100644
--- a/src/data_manager/gen_connection_data.py
+++ b/src/data_manager/gen_connection_data.py
@@ -32,8 +32,7 @@
__author__ = "hidehiko"
-import cStringIO as StringIO
-import itertools
+import io
import logging
import optparse
import os
@@ -45,7 +44,7 @@ from build_tools import code_generator_util
INVALID_COST = 30000
INVALID_1BYTE_COST = 255
RESOLUTION_FOR_1BYTE = 64
-FILE_MAGIC = '\xAB\xCD'
+FILE_MAGIC = b'\xAB\xCD'
FALSE_VALUES = ['f', 'false', '0']
TRUE_VALUES = ['t', 'true', '1']
@@ -79,28 +78,28 @@ def ParseConnectionFile(text_connection_file, pos_size, special_pos_size):
# The result is a square matrix.
mat_size = pos_size + special_pos_size
- matrix = [[0] * mat_size for _ in xrange(mat_size)]
+ matrix = [[0] * mat_size for _ in range(mat_size)]
with open(text_connection_file) as stream:
stream = code_generator_util.SkipLineComment(stream)
# The first line contains the matrix column/row size.
- size = stream.next().rstrip()
+ size = next(stream).rstrip()
assert (int(size) == pos_size), '%s != %d' % (size, pos_size)
for array_index, cost in enumerate(stream):
cost = int(cost.rstrip())
- rid = array_index / pos_size
+ rid = array_index // pos_size
lid = array_index % pos_size
if rid == 0 and lid == 0:
cost = 0
matrix[rid][lid] = cost
# Fill INVALID_COST in matrix elements for special POS.
- for rid in xrange(pos_size, mat_size):
- for lid in xrange(1, mat_size): # Skip EOS
+ for rid in range(pos_size, mat_size):
+ for lid in range(1, mat_size): # Skip EOS
matrix[rid][lid] = INVALID_COST
- for lid in xrange(pos_size, mat_size):
- for rid in xrange(1, mat_size): # Skip BOS
+ for lid in range(pos_size, mat_size):
+ for rid in range(1, mat_size): # Skip BOS
matrix[rid][lid] = INVALID_COST
return matrix
@@ -116,7 +115,7 @@ def CreateModeValueList(matrix):
# Heuristically, we do not compress INVALID_COST.
continue
m[cost] = m.get(cost, 0) + 1
- mode_value = max(m.iteritems(), key=lambda (_, count): count)[0]
+ mode_value = max(m.items(), key=lambda x: x[1])[0]
result.append(mode_value)
return result
@@ -126,8 +125,8 @@ def CompressMatrixByModeValue(matrix, mode_value_list):
# list, and fill None into the matrix if it equals to the corresponding
# mode value.
assert len(matrix) == len(mode_value_list)
- for row, mode_value in itertools.izip(matrix, mode_value_list):
- for index in xrange(len(row)):
+ for row, mode_value in zip(matrix, mode_value_list):
+ for index in range(len(row)):
if row[index] == mode_value:
row[index] = None
@@ -179,7 +178,7 @@ def BuildBinaryData(matrix, mode_value_list, use_1byte_cost):
resolution = RESOLUTION_FOR_1BYTE
else:
resolution = 1
- stream = StringIO.StringIO()
+ stream = io.BytesIO()
# Output header.
stream.write(FILE_MAGIC)
@@ -194,7 +193,7 @@ def BuildBinaryData(matrix, mode_value_list, use_1byte_cost):
# 4 bytes alignment.
if len(mode_value_list) % 2:
- stream.write('\x00\x00')
+ stream.write(b'\x00\x00')
# Process each row:
for row in matrix:
@@ -218,7 +217,7 @@ def BuildBinaryData(matrix, mode_value_list, use_1byte_cost):
if cost == INVALID_COST:
cost = INVALID_1BYTE_COST
else:
- cost /= resolution
+ cost //= resolution
assert cost != INVALID_1BYTE_COST
values.append(cost)
@@ -237,7 +236,7 @@ def BuildBinaryData(matrix, mode_value_list, use_1byte_cost):
values_size = len(values) * 2
# Output the bits for a row.
- stream.write(struct.pack('<HH', len(compact_bits) / 8, values_size))
+ stream.write(struct.pack('<HH', len(compact_bits) // 8, values_size))
OutputBitList(chunk_bits, stream)
OutputBitList(compact_bits, stream)
if use_1byte_cost:
diff --git a/src/dictionary/gen_pos_map.py b/src/dictionary/gen_pos_map.py
index 443e006e..88bee592 100644
--- a/src/dictionary/gen_pos_map.py
+++ b/src/dictionary/gen_pos_map.py
@@ -39,7 +39,7 @@ import sys
from build_tools import code_generator_util
-HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
+HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved.
// Author: keni
#ifndef MOZC_DICTIONARY_POS_MAP_H_
@@ -48,13 +48,13 @@ HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
// POS conversion rules
const POSMap kPOSMap[] = {
"""
-FOOTER = """};
+FOOTER = b"""};
#endif // MOZC_DICTIONARY_POS_MAP_H_
"""
def ParseUserPos(user_pos_file):
- with open(user_pos_file, 'r') as stream:
+ with open(user_pos_file, 'rb') as stream:
stream = code_generator_util.SkipLineComment(stream)
stream = code_generator_util.ParseColumnStream(stream, num_column=2)
return dict((key, enum_value) for key, enum_value in stream)
@@ -64,7 +64,7 @@ def GeneratePosMap(third_party_pos_map_file, user_pos_file):
user_pos_map = ParseUserPos(user_pos_file)
result = {}
- with open(third_party_pos_map_file, 'r') as stream:
+ with open(third_party_pos_map_file, 'rb') as stream:
stream = code_generator_util.SkipLineComment(stream)
for columns in code_generator_util.ParseColumnStream(stream, num_column=2):
third_party_pos_name, mozc_pos = (columns + [None])[:2]
@@ -78,7 +78,7 @@ def GeneratePosMap(third_party_pos_map_file, user_pos_file):
result[third_party_pos_name] = mozc_pos
# Create mozc_pos to mozc_pos map.
- for key, value in user_pos_map.iteritems():
+ for key, value in user_pos_map.items():
if key in result:
assert (result[key] == value)
continue
@@ -94,10 +94,10 @@ def OutputPosMap(pos_map, output):
if value is None:
# Invalid PosType.
value = (
- 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
+ b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
else:
- value = '::mozc::user_dictionary::UserDictionary::' + value
- output.write(' { %s, %s },\n' % (key, value))
+ value = b'::mozc::user_dictionary::UserDictionary::' + value
+ output.write(b' { %s, %s },\n' % (key, value))
output.write(FOOTER)
@@ -121,7 +121,7 @@ def main():
pos_map = GeneratePosMap(options.third_party_pos_map_file,
options.user_pos_file)
- with open(options.output, 'w') as stream:
+ with open(options.output, 'wb') as stream:
OutputPosMap(pos_map, stream)
diff --git a/src/dictionary/gen_pos_rewrite_rule.py b/src/dictionary/gen_pos_rewrite_rule.py
index f8d00198..49b71caa 100644
--- a/src/dictionary/gen_pos_rewrite_rule.py
+++ b/src/dictionary/gen_pos_rewrite_rule.py
@@ -46,29 +46,34 @@ def IsPrefix(str, key):
def LoadRewriteMapRule(filename):
- fh = open(filename)
+ fh = open(filename, 'rb')
rule = []
for line in fh:
- line = line.rstrip('\n')
- if not line or line.startswith('#'):
+ line = line.rstrip(b'\n')
+ if not line or line.startswith(b'#'):
continue
fields = line.split()
rule.append([fields[0], fields[1]])
+ fh.close()
return rule
def ReadPOSID(id_file, special_pos_file):
pos_list = []
- for line in open(id_file, 'r'):
+ fh = open(id_file, 'rb')
+ for line in fh:
fields = line.split()
pos_list.append(fields[1])
+ fh.close()
- for line in open(special_pos_file, 'r'):
- if len(line) <= 1 or line[0] == '#':
+ fh = open(special_pos_file, 'rb')
+ for line in fh:
+ if len(line) <= 1 or line[0:1] == b'#':
continue
fields = line.split()
pos_list.append(fields[0])
+ fh.close()
return pos_list
@@ -112,7 +117,7 @@ def main():
ids.append(id)
with open(opts.output, 'wb') as f:
- f.write(''.join(chr(id) for id in ids))
+ f.write(''.join(chr(id) for id in ids).encode('utf-8'))
if __name__ == '__main__':
diff --git a/src/dictionary/gen_suffix_data.py b/src/dictionary/gen_suffix_data.py
index 0dc0b011..4facb615 100644
--- a/src/dictionary/gen_suffix_data.py
+++ b/src/dictionary/gen_suffix_data.py
@@ -52,10 +52,10 @@ def main():
opts = _ParseOptions()
result = []
- with open(opts.input, 'r') as stream:
+ with open(opts.input, 'rb') as stream:
for line in stream:
- line = line.rstrip('\r\n')
- fields = line.split('\t')
+ line = line.rstrip(b'\r\n')
+ fields = line.split(b'\t')
key = fields[0]
lid = int(fields[1])
rid = int(fields[2])
@@ -63,7 +63,7 @@ def main():
value = fields[4]
if key == value:
- value = ''
+ value = b''
result.append((key, value, lid, rid, cost))
diff --git a/src/dictionary/gen_user_pos_data.py b/src/dictionary/gen_user_pos_data.py
index 58c83c42..0c1d9c3d 100644
--- a/src/dictionary/gen_user_pos_data.py
+++ b/src/dictionary/gen_user_pos_data.py
@@ -64,7 +64,7 @@ def OutputUserPosData(user_pos_data, output_token_array, output_string_array):
f.write(struct.pack('<H', conjugation_id))
serialized_string_array_builder.SerializeToFile(
- sorted(string_index.iterkeys()), output_string_array)
+ sorted(x.encode('utf-8') for x in string_index.keys()), output_string_array)
def ParseOptions():
@@ -100,7 +100,7 @@ def main():
if options.output_pos_list:
serialized_string_array_builder.SerializeToFile(
- [pos for (pos, _) in user_pos.data], options.output_pos_list)
+ [pos.encode('utf-8') for (pos, _) in user_pos.data], options.output_pos_list)
if __name__ == '__main__':
diff --git a/src/dictionary/gen_zip_code_seed.py b/src/dictionary/gen_zip_code_seed.py
index 0265d318..e5818e38 100644
--- a/src/dictionary/gen_zip_code_seed.py
+++ b/src/dictionary/gen_zip_code_seed.py
@@ -83,7 +83,7 @@ class ZipEntry(object):
address = unicodedata.normalize('NFKC', self.address)
line = '\t'.join([zip_code, '0', '0', str(ZIP_CODE_COST),
address, ZIP_CODE_LABEL])
- print line.encode('utf-8')
+ print(line.encode('utf-8'))
def ProcessZipCodeCSV(file_name):
@@ -105,26 +105,26 @@ def ProcessJigyosyoCSV(file_name):
def ReadZipCodeEntries(zip_code, level1, level2, level3):
"""Read zip code entries."""
- return [ZipEntry(zip_code, u''.join([level1, level2, town]))
+ return [ZipEntry(zip_code, ''.join([level1, level2, town]))
for town in ParseTownName(level3)]
def ReadJigyosyoEntry(zip_code, level1, level2, level3, name):
"""Read jigyosyo entry."""
return ZipEntry(zip_code,
- u''.join([level1, level2, level3, u' ', name]))
+ ''.join([level1, level2, level3, ' ', name]))
def ParseTownName(level3):
"""Parse town name."""
- if level3.find(u'以下に掲載がない場合') != -1:
+ if level3.find('以下に掲載がない場合') != -1:
return ['']
assert CanParseAddress(level3), ('failed to be merged %s'
% level3.encode('utf-8'))
# We ignore additional information here.
- level3 = re.sub(u'(.*)', u'', level3, re.U)
+ level3 = re.sub('(.*)', '', level3, re.U)
# For 地割, we have these cases.
# XX1地割
@@ -134,7 +134,7 @@ def ParseTownName(level3):
# XX第1地割、XX第2地割、
# XX第1地割〜XX第2地割、
# We simply use XX for them.
- chiwari_match = re.match(u'(\D*?)第?\d+地割.*', level3, re.U)
+ chiwari_match = re.match('(\D*?)第?\d+地割.*', level3, re.U)
if chiwari_match:
town = chiwari_match.group(1)
return [town]
@@ -144,21 +144,21 @@ def ParseTownName(level3):
# -> XX町YY and (XX町)ZZ
# YY、ZZ
# -> YY and ZZ
- chou_match = re.match(u'(.*町)?(.*)', level3, re.U)
+ chou_match = re.match('(.*町)?(.*)', level3, re.U)
if chou_match:
- chou = u''
+ chou = ''
if chou_match.group(1):
chou = chou_match.group(1)
rests = chou_match.group(2)
- return [chou + rest for rest in rests.split(u'、')]
+ return [chou + rest for rest in rests.split('、')]
return [level3]
def CanParseAddress(address):
"""Return true for valid address."""
- return (address.find(u'(') == -1 or
- address.find(u')') != -1)
+ return (address.find('(') == -1 or
+ address.find(')') != -1)
def ParseOptions():
diff --git a/src/dictionary/zip_code_util.py b/src/dictionary/zip_code_util.py
index de6e084f..32ae7526 100644
--- a/src/dictionary/zip_code_util.py
+++ b/src/dictionary/zip_code_util.py
@@ -86,11 +86,11 @@ class SpecialMergeZip(object):
_SPECIAL_CASES = [
- SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']),
- SpecialMergeZip(u'8710046', u'大分県', u'中津市',
- [u'金谷', u'西堀端', u'東堀端', u'古金谷']),
- SpecialMergeZip(u'9218046', u'石川県', u'金沢市',
- [u'大桑町', u'三小牛町']),
+ SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']),
+ SpecialMergeZip('8710046', '大分県', '中津市',
+ ['金谷', '西堀端', '東堀端', '古金谷']),
+ SpecialMergeZip('9218046', '石川県', '金沢市',
+ ['大桑町', '三小牛町']),
]
diff --git a/src/gui/character_pad/data/gen_cp932_map.py b/src/gui/character_pad/data/gen_cp932_map.py
index 8f8b05a5..b7365d25 100644
--- a/src/gui/character_pad/data/gen_cp932_map.py
+++ b/src/gui/character_pad/data/gen_cp932_map.py
@@ -32,7 +32,6 @@ __author__ = "taku"
import re
import sys
-import string
kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}')
def IsValidUnicode(n):
@@ -42,28 +41,29 @@ def main():
fh = open(sys.argv[1])
result = {}
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
- array = string.split(line)
+ array = line.split()
sjis = array[0]
ucs2 = array[1]
if eval(sjis) < 32 or not IsValidUnicode(ucs2):
continue
result.setdefault(ucs2, sjis)
+ fh.close()
keys = sorted(result.keys())
- print "struct CP932MapData {"
- print " unsigned int ucs4;"
- print " unsigned short int sjis;"
- print "};"
- print ""
- print "static const size_t kCP932MapDataSize = %d;" % (len(keys))
- print "static const CP932MapData kCP932MapData[] = {"
+ print("struct CP932MapData {")
+ print(" unsigned int ucs4;")
+ print(" unsigned short int sjis;")
+ print("};")
+ print("")
+ print("static const size_t kCP932MapDataSize = %d;" % (len(keys)))
+ print("static const CP932MapData kCP932MapData[] = {")
for n in keys:
- print " { %s, %s }," % (n ,result[n])
- print " { 0, 0 }";
- print "};"
+ print(" { %s, %s }," % (n ,result[n]))
+ print(" { 0, 0 }");
+ print("};")
if __name__ == "__main__":
main()
diff --git a/src/gui/character_pad/data/gen_local_character_map.py b/src/gui/character_pad/data/gen_local_character_map.py
index 3e550575..3e30bd99 100644
--- a/src/gui/character_pad/data/gen_local_character_map.py
+++ b/src/gui/character_pad/data/gen_local_character_map.py
@@ -30,7 +30,6 @@
__author__ = "taku"
-import string
import re
import sys
@@ -43,9 +42,9 @@ def LoadJISX0201(filename):
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
- array = string.split(line)
+ array = line.split()
jis = array[0].replace('0x', '')
ucs2 = array[1].replace('0x', '')
if len(jis) == 2:
@@ -53,6 +52,7 @@ def LoadJISX0201(filename):
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0201", result]
@@ -60,13 +60,14 @@ def LoadJISX0208(filename):
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
jis = array[1].replace('0x', '')
ucs2 = array[2].replace('0x', '')
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0208", result]
@@ -74,13 +75,14 @@ def LoadJISX0212(filename):
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
jis = array[0].replace('0x', '')
ucs2 = array[1].replace('0x', '')
if IsValidUnicode(ucs2):
result.append([jis, ucs2])
+ fh.close()
return ["JISX0212", result]
@@ -88,7 +90,7 @@ def LoadCP932(filename):
fh = open(filename)
result = []
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
array = line.split()
sjis = array[0].replace('0x', '')
@@ -100,19 +102,20 @@ def LoadCP932(filename):
if IsValidUnicode(ucs2):
result.append([sjis, ucs2])
+ fh.close()
return ["CP932", result]
def Output(arg):
name = arg[0]
result = arg[1]
- print "static const size_t k%sMapSize = %d;" % (name, len(result))
- print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)
+ print("static const size_t k%sMapSize = %d;" % (name, len(result)))
+ print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name))
for n in result:
- print " { 0x%s, 0x%s }," % (n[0] ,n[1])
- print " { 0, 0 }";
- print "};"
- print ""
+ print(" { 0x%s, 0x%s }," % (n[0] ,n[1]))
+ print(" { 0, 0 }");
+ print("};")
+ print("")
if __name__ == "__main__":
Output(LoadJISX0201(sys.argv[1]))
diff --git a/src/gui/character_pad/data/gen_unicode_blocks.py b/src/gui/character_pad/data/gen_unicode_blocks.py
index 32cd633a..d9a17995 100644
--- a/src/gui/character_pad/data/gen_unicode_blocks.py
+++ b/src/gui/character_pad/data/gen_unicode_blocks.py
@@ -33,13 +33,13 @@ __author__ = "taku"
import sys
import re
-re = re.compile('^(.....?)\.\.(.....?); (.+)')
+re = re.compile(r'^(.....?)\.\.(.....?); (.+)')
def main():
- print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {"
+ print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {")
fh = open(sys.argv[1])
for line in fh.readlines():
- if line[0] is '#':
+ if line[0] == '#':
continue
m = re.match(line)
if m is not None:
@@ -47,11 +47,12 @@ def main():
end = int(m.group(2), 16)
name = m.group(3)
if start <= 0x2FFFF and end <= 0x2FFFF:
- print " { \"%s\", { %d, %d } }," % (name, start, end)
+ print(" { \"%s\", { %d, %d } }," % (name, start, end))
+ fh.close()
- print " { NULL, { 0, 0 } }"
- print "};"
- print ""
+ print(" { NULL, { 0, 0 } }")
+ print("};")
+ print("")
if __name__ == "__main__":
main()
diff --git a/src/gui/character_pad/data/gen_unicode_data.py b/src/gui/character_pad/data/gen_unicode_data.py
index c5bdbf46..431fd951 100644
--- a/src/gui/character_pad/data/gen_unicode_data.py
+++ b/src/gui/character_pad/data/gen_unicode_data.py
@@ -46,18 +46,19 @@ def main():
code = int(code, 16)
if code < 0x2FFFF:
results.append(" { %d, \"%s\" }," % (code, desc))
+ fh.close()
- print "struct UnicodeData {";
- print " char32 ucs4;";
- print " const char *description;";
- print "};";
- print ""
- print "static const size_t kUnicodeDataSize = %d;" % (len(results))
- print "static const UnicodeData kUnicodeData[] = {";
+ print("struct UnicodeData {");
+ print(" char32 ucs4;");
+ print(" const char *description;");
+ print("};");
+ print("")
+ print("static const size_t kUnicodeDataSize = %d;" % (len(results)))
+ print("static const UnicodeData kUnicodeData[] = {");
for line in results:
- print line;
- print " { 0, NULL }";
- print "};";
+ print(line);
+ print(" { 0, NULL }");
+ print("};");
if __name__ == "__main__":
main()
diff --git a/src/gui/character_pad/data/gen_unihan_data.py b/src/gui/character_pad/data/gen_unihan_data.py
index 411ba69a..b59e147b 100644
--- a/src/gui/character_pad/data/gen_unihan_data.py
+++ b/src/gui/character_pad/data/gen_unihan_data.py
@@ -31,35 +31,34 @@
__author__ = "taku"
import re
-import string
import sys
rs = {}
def Escape(n):
- if n is not "NULL":
+ if n != "NULL":
return "\"%s\"" % (n)
else:
return "NULL"
def GetCode(n):
- if n is not "NULL":
- n = string.replace(n, '0-', 'JIS X 0208: 0x')
- n = string.replace(n, '1-', 'JIS X 0212: 0x')
- n = string.replace(n, '3-', 'JIS X 0213: 0x')
- n = string.replace(n, '4-', 'JIS X 0213: 0x')
- n = string.replace(n, 'A-', 'Vendors Ideographs: 0x')
- n = string.replace(n, '3A', 'JIS X 0213 2000: 0x')
+ if n != "NULL":
+ n = n.replace('0-', 'JIS X 0208: 0x')
+ n = n.replace('1-', 'JIS X 0212: 0x')
+ n = n.replace('3-', 'JIS X 0213: 0x')
+ n = n.replace('4-', 'JIS X 0213: 0x')
+ n = n.replace('A-', 'Vendors Ideographs: 0x')
+ n = n.replace('3A', 'JIS X 0213 2000: 0x')
return "\"%s\"" % n
else:
return "NULL"
def GetRadical(n):
pat = re.compile(r'^(\d+)\.')
- if n is not "NULL":
+ if n != "NULL":
m = pat.match(n)
if m:
result = rs[m.group(1)]
- return "\"%s\"" % (result.encode('string_escape'))
+ return "\"%s\"" % result
else:
return "NULL"
else:
@@ -73,6 +72,7 @@ def main():
id = array[1]
radical = array[2]
rs[id] = radical
+ fh.close()
dic = {}
pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)')
@@ -86,23 +86,24 @@ def main():
n = int(m.group(1), 16)
if n <= 65536:
dic.setdefault(key, {}).setdefault(field, value)
+ fh.close()
keys = sorted(dic.keys())
- print "struct UnihanData {";
- print " unsigned int ucs4;";
+ print("struct UnihanData {");
+ print(" unsigned int ucs4;");
# Since the total strokes defined in Unihan data is Chinese-based
# number, we can't use it.
# print " unsigned char total_strokes;";
- print " const char *japanese_kun;";
- print " const char *japanese_on;";
+ print(" const char *japanese_kun;");
+ print(" const char *japanese_on;");
# Since the radical information defined in Unihan data is Chinese-based
# number, we can't use it.
# print " const char *radical;";
- print " const char *IRG_jsource;";
- print "};"
- print "static const size_t kUnihanDataSize = %d;" % (len(keys))
- print "static const UnihanData kUnihanData[] = {"
+ print(" const char *IRG_jsource;");
+ print("};")
+ print("static const size_t kUnihanDataSize = %d;" % (len(keys)))
+ print("static const UnihanData kUnihanData[] = {")
for key in keys:
total_strokes = dic[key].get("kTotalStrokes", "0")
@@ -111,9 +112,9 @@ def main():
rad = GetRadical(dic[key].get("kRSUnicode", "NULL"))
code = GetCode(dic[key].get("kIRG_JSource", "NULL"))
# print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code)
- print " { 0x%s, %s, %s, %s }," % (key, kun, on, code)
+ print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code))
- print "};"
+ print("};")
if __name__ == "__main__":
main()
diff --git a/src/mac/generate_mapping.py b/src/mac/generate_mapping.py
index 75d4888b..733f94ed 100644
--- a/src/mac/generate_mapping.py
+++ b/src/mac/generate_mapping.py
@@ -31,6 +31,7 @@
__author__ = "mukai"
import optparse
+import sys
class Mapping(object):
def __init__(self, options):
@@ -40,26 +41,26 @@ class Mapping(object):
self._filename = options.filename
def PrintLine(self, line):
- columns = line.strip().split('\t')
+ columns = line.strip().split(b'\t')
if len(columns) != 2:
return
(key, value) = columns
mapname = self._mapname
- if key.startswith('Shift '):
+ if key.startswith(b'Shift '):
mapname += 'Shift'
- key = key[len('Shift '):len(key)]
+ key = key[len(b'Shift '):len(key)]
- if self._key_type == 'unsigned short' and not key.startswith('kVK_'):
- key = 'kVK_ANSI_' + key
+ if self._key_type == 'unsigned short' and not key.startswith(b'kVK_'):
+ key = b'kVK_ANSI_' + key
if self._result_type == 'const char *':
- value = '"%s"' % ''.join(['\\x%x' % ord(c) for c in value])
+ value = b'"%b"' % b''.join([b'\\x%x' % c for c in value])
elif self._result_type == 'KeyEvent::SpecialKey':
- value = 'KeyEvent::' + value
- print ' (*k%s)[%s] = %s;' % (mapname, key, value)
+ value = b'KeyEvent::' + value
+ print(' (*k%s)[%s] = %s;' % (mapname, key.decode('utf-8'), value.decode('utf-8')))
def PrintHeader(self):
- print """// Copyright 2009 Google Inc. All Rights Reserved.
+ print("""// Copyright 2009 Google Inc. All Rights Reserved.
// Author: mukai
//
// This file is automatically generated by
@@ -87,17 +88,18 @@ void Init%(mapname)s() {
}
""" % {'key_type': self._key_type,
'mapname': self._mapname,
- 'result_type': self._result_type }
+ 'result_type': self._result_type })
def PrintFooter(self):
- print """}
+ print("""}
} // namespace
-""" % {'mapname': self._mapname}
+""" % {'mapname': self._mapname})
def Print(self):
self.PrintHeader()
- for line in file(self._filename):
- self.PrintLine(line)
+ with open(self._filename, 'rb') as file:
+ for line in file:
+ self.PrintLine(line)
self.PrintFooter()
def ParseOption():
@@ -110,14 +112,14 @@ def ParseOption():
(options, unused_args) = parser.parse_args()
if not options.mapname:
- print "Error: the output map name should be specified."
+ print("Error: the output map name should be specified.")
sys.exit(2)
if not options.key_type:
options.key_type = 'unsigned short'
if not options.result_type:
- print "Error: the result type of the output map should be speicified."
+ print("Error: the result type of the output map should be specified.")
if not options.filename:
- print "Error: the file name is not specified."
+ print("Error: the file name is not specified.")
return options
def main():
diff --git a/src/prediction/gen_zero_query_data.py b/src/prediction/gen_zero_query_data.py
index 1ffb2fbb..26726185 100644
--- a/src/prediction/gen_zero_query_data.py
+++ b/src/prediction/gen_zero_query_data.py
@@ -59,20 +59,20 @@ def ParseCodePoint(s):
Returns:
A integer indicating parsed pua.
"""
- if not s or s[0] == '>':
+ if not s or s[0:1] == b'>':
return 0
return int(s, 16)
def NormalizeString(string):
return unicodedata.normalize(
- 'NFKC', string.decode('utf-8')).encode('utf-8').replace('~', '〜')
+ 'NFKC', string.decode('utf-8')).replace('~', '〜').encode('utf-8')
def RemoveTrailingNumber(string):
if not string:
- return ''
- return re.sub(r'^([^0-9]+)[0-9]+$', r'\1', string)
+ return b''
+ return re.sub(br'^([^0-9]+)[0-9]+$', r'\1', string)
def GetReadingsFromDescription(description):
@@ -84,19 +84,19 @@ def GetReadingsFromDescription(description):
# - ビル・建物
# \xE3\x83\xBB : "・"
return [RemoveTrailingNumber(token) for token
- in re.split(r'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
+ in re.split(br'(?:\(|\)|/|\xE3\x83\xBB)+', normalized)]
def ReadEmojiTsv(stream):
"""Reads emoji data from stream and returns zero query data."""
zero_query_dict = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
if len(columns) != 13:
- logging.critical('format error: %s', '\t'.join(columns))
+ logging.critical('format error: %s', b'\t'.join(columns))
sys.exit(1)
- code_points = columns[0].split(' ')
+ code_points = columns[0].split(b' ')
# Emoji code point.
emoji = columns[1]
@@ -114,12 +114,12 @@ def ReadEmojiTsv(stream):
# - Composite emoji which has multiple code point.
# NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
# TODO(hsumita): Check the availability of such emoji and enable it.
- logging.info('Skip %s', ' '.join(code_points))
+ logging.info('Skip %s', b' '.join(code_points))
continue
reading_list = []
# \xe3\x80\x80 is a full-width space
- for reading in re.split(r'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
+ for reading in re.split(br'(?: |\xe3\x80\x80)+', NormalizeString(readings)):
if not reading:
continue
reading_list.append(reading)
@@ -158,15 +158,15 @@ def ReadZeroQueryRuleData(input_stream):
zero_query_dict = defaultdict(list)
for line in input_stream:
- if line.startswith('#'):
+ if line.startswith(b'#'):
continue
- line = line.rstrip('\r\n')
+ line = line.rstrip(b'\r\n')
if not line:
continue
- tokens = line.split('\t')
+ tokens = line.split(b'\t')
key = tokens[0]
- values = tokens[1].split(',')
+ values = tokens[1].split(b',')
for value in values:
zero_query_dict[key].append(
@@ -179,16 +179,16 @@ def ReadEmoticonTsv(stream):
"""Reads emoticon data from stream and returns zero query data."""
zero_query_dict = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
if len(columns) != 3:
- logging.critical('format error: %s', '\t'.join(columns))
+ logging.critical('format error: %s', b'\t'.join(columns))
sys.exit(1)
emoticon = columns[0]
readings = columns[2]
# \xe3\x80\x80 is a full-width space
- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
if not reading:
continue
zero_query_dict[reading].append(
@@ -202,9 +202,9 @@ def ReadSymbolTsv(stream):
"""Reads emoji data from stream and returns zero query data."""
zero_query_dict = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
if len(columns) < 3:
- logging.warning('format error: %s', '\t'.join(columns))
+ logging.warning('format error: %s', b'\t'.join(columns))
continue
symbol = columns[1]
@@ -222,7 +222,7 @@ def ReadSymbolTsv(stream):
continue
# \xe3\x80\x80 is a full-width space
- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
if not reading:
continue
zero_query_dict[reading].append(
@@ -247,7 +247,7 @@ def ReadSymbolTsv(stream):
def IsValidKeyForZeroQuery(key):
"""Returns if the key is valid for zero query trigger."""
- is_ascii = all(ord(char) < 128 for char in key)
+ is_ascii = all(char < 128 for char in key)
return not is_ascii
@@ -301,13 +301,13 @@ def ParseOptions():
def main():
options = ParseOptions()
- with open(options.input_rule, 'r') as input_stream:
+ with open(options.input_rule, 'rb') as input_stream:
zero_query_rule_dict = ReadZeroQueryRuleData(input_stream)
- with open(options.input_symbol, 'r') as input_stream:
+ with open(options.input_symbol, 'rb') as input_stream:
zero_query_symbol_dict = ReadSymbolTsv(input_stream)
- with open(options.input_emoji, 'r') as input_stream:
+ with open(options.input_emoji, 'rb') as input_stream:
zero_query_emoji_dict = ReadEmojiTsv(input_stream)
- with open(options.input_emoticon, 'r') as input_stream:
+ with open(options.input_emoticon, 'rb') as input_stream:
zero_query_emoticon_dict = ReadEmoticonTsv(input_stream)
merged_zero_query_dict = MergeZeroQueryData(
diff --git a/src/prediction/gen_zero_query_number_data.py b/src/prediction/gen_zero_query_number_data.py
index 1fe3e4e8..5596e34e 100644
--- a/src/prediction/gen_zero_query_number_data.py
+++ b/src/prediction/gen_zero_query_number_data.py
@@ -41,15 +41,15 @@ def ReadZeroQueryNumberData(input_stream):
zero_query_dict = defaultdict(list)
for line in input_stream:
- if line.startswith('#'):
+ if line.startswith(b'#'):
continue
- line = line.rstrip('\r\n')
+ line = line.rstrip(b'\r\n')
if not line:
continue
- tokens = line.split('\t')
+ tokens = line.split(b'\t')
key = tokens[0]
- values = tokens[1].split(',')
+ values = tokens[1].split(b',')
for value in values:
zero_query_dict[key].append(
@@ -71,7 +71,7 @@ def ParseOption():
def main():
options = ParseOption()
- with open(options.input, 'r') as input_stream:
+ with open(options.input, 'rb') as input_stream:
zero_query_dict = ReadZeroQueryNumberData(input_stream)
util.WriteZeroQueryData(zero_query_dict,
options.output_token_array,
diff --git a/src/prediction/gen_zero_query_util.py b/src/prediction/gen_zero_query_util.py
index 733ca2f4..098387a4 100644
--- a/src/prediction/gen_zero_query_util.py
+++ b/src/prediction/gen_zero_query_util.py
@@ -69,7 +69,7 @@ def WriteZeroQueryData(zero_query_dict, output_token_array,
output_string_array):
# Collect all the strings and assing index in ascending order
string_index = {}
- for key, entry_list in zero_query_dict.iteritems():
+ for key, entry_list in zero_query_dict.items():
string_index[key] = 0
for entry in entry_list:
string_index[entry.value] = 0
diff --git a/src/rewriter/gen_counter_suffix_array.py b/src/rewriter/gen_counter_suffix_array.py
index 4f4dd597..98e10cf5 100644
--- a/src/rewriter/gen_counter_suffix_array.py
+++ b/src/rewriter/gen_counter_suffix_array.py
@@ -43,7 +43,7 @@ def ReadCounterSuffixPosIds(id_file):
with codecs.open(id_file, 'r', encoding='utf-8') as stream:
stream = code_generator_util.ParseColumnStream(stream, num_column=2)
for pos_id, pos_name in stream:
- if pos_name.startswith(u'名詞,接尾,助数詞'):
+ if pos_name.startswith('名詞,接尾,助数詞'):
pos_ids.add(pos_id)
return pos_ids
diff --git a/src/rewriter/gen_emoji_rewriter_data.py b/src/rewriter/gen_emoji_rewriter_data.py
index 210d5af6..be5a94cb 100644
--- a/src/rewriter/gen_emoji_rewriter_data.py
+++ b/src/rewriter/gen_emoji_rewriter_data.py
@@ -74,19 +74,19 @@ def ParseCodePoint(s):
the glyph (in other words, it has alternative (primary) code point, which
doesn't lead '>' and that's why we'll ignore it).
"""
- if not s or s[0] == '>':
+ if not s or s[0:1] == b'>':
return None
return int(s, 16)
-_FULLWIDTH_RE = re.compile(ur'[!-~]') # U+FF01 - U+FF5E
+_FULLWIDTH_RE = re.compile(r'[!-~]') # U+FF01 - U+FF5E
def NormalizeString(string):
"""Normalize full width ascii characters to half width characters."""
- offset = ord(u'A') - ord(u'A')
- return _FULLWIDTH_RE.sub(lambda x: unichr(ord(x.group(0)) - offset),
- unicode(string, 'utf-8')).encode('utf-8')
+ offset = ord('A') - ord('A')
+ return _FULLWIDTH_RE.sub(lambda x: chr(ord(x.group(0)) - offset),
+ string.decode('utf-8')).encode('utf-8')
def ReadEmojiTsv(stream):
@@ -96,14 +96,14 @@ def ReadEmojiTsv(stream):
token_dict = defaultdict(list)
stream = code_generator_util.SkipLineComment(stream)
- for columns in code_generator_util.ParseColumnStream(stream, delimiter='\t'):
+ for columns in code_generator_util.ParseColumnStream(stream, delimiter=b'\t'):
if len(columns) != 13:
- logging.critical('format error: %s', '\t'.join(columns))
+ logging.critical('format error: %s', b'\t'.join(columns))
sys.exit(1)
- code_points = columns[0].split(' ')
+ code_points = columns[0].split(b' ')
# Emoji code point.
- emoji = columns[1] if columns[1] else ''
+ emoji = columns[1] if columns[1] else b''
android_pua = ParseCodePoint(columns[2])
docomo_pua = ParseCodePoint(columns[3])
softbank_pua = ParseCodePoint(columns[4])
@@ -112,10 +112,10 @@ def ReadEmojiTsv(stream):
readings = columns[6]
# [7]: Name defined in Unicode. It is ignored in current implementation.
- utf8_description = columns[8] if columns[8] else ''
- docomo_description = columns[9] if columns[9] else ''
- softbank_description = columns[10] if columns[10] else ''
- kddi_description = columns[11] if columns[11] else ''
+ utf8_description = columns[8] if columns[8] else b''
+ docomo_description = columns[9] if columns[9] else b''
+ softbank_description = columns[10] if columns[10] else b''
+ kddi_description = columns[11] if columns[11] else b''
if not android_pua or len(code_points) > 1:
# Skip some emoji, which is not supported on old devices.
@@ -123,7 +123,7 @@ def ReadEmojiTsv(stream):
# - Composite emoji which has multiple code point.
# NOTE: Some Unicode 6.0 emoji don't have PUA, and it is also omitted.
# TODO(hsumita): Check the availability of such emoji and enable it.
- logging.info('Skip %s', ' '.join(code_points))
+ logging.info('Skip %s', b' '.join(code_points))
continue
# Check consistency between carrier PUA codes and descriptions for Android
@@ -132,7 +132,7 @@ def ReadEmojiTsv(stream):
(bool(softbank_pua) != bool(softbank_description)) or
(bool(kddi_pua) != bool(kddi_description))):
logging.warning('carrier PUA and description conflict: %s',
- '\t'.join(columns))
+ b'\t'.join(columns))
continue
# Check if the character is usable on Android.
@@ -140,7 +140,7 @@ def ReadEmojiTsv(stream):
android_pua = 0 # Replace None with 0.
if not emoji and not android_pua:
- logging.info('Skip: %s', '\t'.join(columns))
+ logging.info('Skip: %s', b'\t'.join(columns))
continue
index = len(emoji_data_list)
@@ -149,7 +149,7 @@ def ReadEmojiTsv(stream):
kddi_description))
# \xe3\x80\x80 is a full-width space
- for reading in re.split(r'(?: |\xe3\x80\x80)+', readings.strip()):
+ for reading in re.split(br'(?: |\xe3\x80\x80)+', readings.strip()):
if reading:
token_dict[NormalizeString(reading)].append(index)
@@ -159,7 +159,7 @@ def ReadEmojiTsv(stream):
def OutputData(emoji_data_list, token_dict,
token_array_file, string_array_file):
"""Output token and string arrays to files."""
- sorted_token_dict = sorted(token_dict.iteritems())
+ sorted_token_dict = sorted(token_dict.items())
strings = {}
for reading, _ in sorted_token_dict:
@@ -171,7 +171,7 @@ def OutputData(emoji_data_list, token_dict,
strings[docomo_description] = 0
strings[softbank_description] = 0
strings[kddi_description] = 0
- sorted_strings = sorted(strings.iterkeys())
+ sorted_strings = sorted(strings.keys())
for index, s in enumerate(sorted_strings):
strings[s] = index
@@ -205,7 +205,7 @@ def ParseOptions():
def main():
options = ParseOptions()
- with open(options.input, 'r') as input_stream:
+ with open(options.input, 'rb') as input_stream:
(emoji_data_list, token_dict) = ReadEmojiTsv(input_stream)
OutputData(emoji_data_list, token_dict,
diff --git a/src/rewriter/gen_reading_correction_data.py b/src/rewriter/gen_reading_correction_data.py
index 97a09b9b..99997ed7 100644
--- a/src/rewriter/gen_reading_correction_data.py
+++ b/src/rewriter/gen_reading_correction_data.py
@@ -63,7 +63,7 @@ def ParseOptions():
def WriteData(input_path, output_value_array_path, output_error_array_path,
output_correction_array_path):
outputs = []
- with open(input_path) as input_stream:
+ with open(input_path, 'rb') as input_stream:
input_stream = code_generator_util.SkipLineComment(input_stream)
input_stream = code_generator_util.ParseColumnStream(input_stream,
num_column=3)
@@ -73,7 +73,7 @@ def WriteData(input_path, output_value_array_path, output_error_array_path,
# In order to lookup the entries via |error| with binary search,
# sort outputs here.
- outputs.sort(lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0]))
+ outputs.sort(key=lambda x: (x[1], x[0]))
serialized_string_array_builder.SerializeToFile(
[value for (value, _, _) in outputs], output_value_array_path)
diff --git a/src/rewriter/gen_single_kanji_rewriter_data.py b/src/rewriter/gen_single_kanji_rewriter_data.py
index 5c87006d..97c5b491 100644
--- a/src/rewriter/gen_single_kanji_rewriter_data.py
+++ b/src/rewriter/gen_single_kanji_rewriter_data.py
@@ -52,7 +52,7 @@ def ReadSingleKanji(stream):
stream = code_generator_util.ParseColumnStream(stream, num_column=2)
outputs = list(stream)
# For binary search by |key|, sort outputs here.
- outputs.sort(lambda x, y: cmp(x[0], y[0]))
+ outputs.sort(key=lambda x: x[0])
return outputs
@@ -72,7 +72,7 @@ def ReadVariant(stream):
variant_items.append([target, original, len(variant_types) - 1])
# For binary search by |target|, sort variant items here.
- variant_items.sort(lambda x, y: cmp(x[0], y[0]))
+ variant_items.sort(key=lambda x: x[0])
return (variant_types, variant_items)
@@ -151,10 +151,10 @@ def _ParseOptions():
def main():
options = _ParseOptions()
- with open(options.single_kanji_file, 'r') as single_kanji_stream:
+ with open(options.single_kanji_file, 'rb') as single_kanji_stream:
single_kanji = ReadSingleKanji(single_kanji_stream)
- with open(options.variant_file, 'r') as variant_stream:
+ with open(options.variant_file, 'rb') as variant_stream:
variant_info = ReadVariant(variant_stream)
WriteSingleKanji(single_kanji,
diff --git a/src/session/gen_session_stress_test_data.py b/src/session/gen_session_stress_test_data.py
index 1e27d52f..b3f86c27 100644
--- a/src/session/gen_session_stress_test_data.py
+++ b/src/session/gen_session_stress_test_data.py
@@ -50,24 +50,26 @@ def escape_string(s):
"""
result = ''
for c in s:
- hexstr = hex(ord(c))
+ hexstr = hex(c)
# because hexstr contains '0x', remove the prefix and add our prefix
result += '\\x' + hexstr[2:]
return result
def GenerateHeader(file):
try:
- print "const char *kTestSentences[] = {"
- for line in open(file, "r"):
- if line.startswith('#'):
+ print("const char *kTestSentences[] = {")
+ fh = open(file, "rb")
+ for line in fh:
+ if line.startswith(b'#'):
continue
- line = line.rstrip('\r\n')
+ line = line.rstrip(b'\r\n')
if not line:
continue
- print " \"%s\"," % escape_string(line)
- print "};"
+ print(" \"%s\"," % escape_string(line))
+ fh.close()
+ print("};")
except:
- print "cannot open %s" % (file)
+ print("cannot open %s" % (file))
sys.exit(1)
def main():
diff --git a/src/unix/ibus/gen_mozc_xml.py b/src/unix/ibus/gen_mozc_xml.py
index 4a450408..f3b2718b 100644
--- a/src/unix/ibus/gen_mozc_xml.py
+++ b/src/unix/ibus/gen_mozc_xml.py
@@ -74,7 +74,7 @@ CPP_FOOTER = """} // namespace
def OutputXmlElement(param_dict, element_name, value):
- print ' <%s>%s</%s>' % (element_name, (value % param_dict), element_name)
+ print(' <%s>%s</%s>' % (element_name, (value % param_dict), element_name))
def OutputXml(param_dict, component, engine_common, engines, setup_arg):
@@ -90,26 +90,26 @@ def OutputXml(param_dict, component, engine_common, engines, setup_arg):
engines: A dictionary from a property name to a list of property values of
engines. For example, {'name': ['mozc-jp', 'mozc', 'mozc-dv']}.
"""
- print '<component>'
- for key in component:
+ print('<component>')
+ for key in sorted(component):
OutputXmlElement(param_dict, key, component[key])
- print '<engines>'
+ print('<engines>')
for i in range(len(engines['name'])):
- print '<engine>'
- for key in engine_common:
+ print('<engine>')
+ for key in sorted(engine_common):
OutputXmlElement(param_dict, key, engine_common[key])
if setup_arg:
OutputXmlElement(param_dict, 'setup', ' '.join(setup_arg))
- for key in engines:
+ for key in sorted(engines):
OutputXmlElement(param_dict, key, engines[key][i])
- print '</engine>'
- print '</engines>'
- print '</component>'
+ print('</engine>')
+ print('</engines>')
+ print('</component>')
def OutputCppVariable(param_dict, prefix, variable_name, value):
- print 'const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
- (value % param_dict))
+ print('const char k%s%s[] = "%s";' % (prefix, variable_name.capitalize(),
+ (value % param_dict)))
def OutputCpp(param_dict, component, engine_common, engines):
@@ -122,18 +122,18 @@ def OutputCpp(param_dict, component, engine_common, engines):
engines: ditto.
"""
guard_name = 'MOZC_UNIX_IBUS_MAIN_H_'
- print CPP_HEADER % (guard_name, guard_name)
- for key in component:
+ print(CPP_HEADER % (guard_name, guard_name))
+ for key in sorted(component):
OutputCppVariable(param_dict, 'Component', key, component[key])
- for key in engine_common:
+ for key in sorted(engine_common):
OutputCppVariable(param_dict, 'Engine', key, engine_common[key])
- for key in engines:
- print 'const char* kEngine%sArray[] = {' % key.capitalize()
+ for key in sorted(engines):
+ print('const char* kEngine%sArray[] = {' % key.capitalize())
for i in range(len(engines[key])):
- print '"%s",' % (engines[key][i] % param_dict)
- print '};'
- print 'const size_t kEngineArrayLen = %s;' % len(engines['name'])
- print CPP_FOOTER % guard_name
+ print('"%s",' % (engines[key][i] % param_dict))
+ print('};')
+ print('const size_t kEngineArrayLen = %s;' % len(engines['name']))
+ print(CPP_FOOTER % guard_name)
def CheckIBusVersion(options, minimum_version):
diff --git a/src/usage_stats/gen_stats_list.py b/src/usage_stats/gen_stats_list.py
index 00051dfd..b63e1bc9 100644
--- a/src/usage_stats/gen_stats_list.py
+++ b/src/usage_stats/gen_stats_list.py
@@ -37,23 +37,24 @@ import sys
def GetStatsNameList(filename):
stats = []
- for line in open(filename, 'r'):
- stat = line.strip()
- if not stat or stat[0] == '#':
- continue
- stats.append(stat)
+ with open(filename, 'r') as file:
+ for line in file:
+ stat = line.strip()
+ if not stat or stat[0] == '#':
+ continue
+ stats.append(stat)
return stats
def main():
stats_list = GetStatsNameList(sys.argv[1])
- print '// This header file is generated by gen_stats_list.py'
+ print('// This header file is generated by gen_stats_list.py')
for stats in stats_list:
- print 'const char k%s[] = "%s";' % (stats, stats)
- print 'const char *kStatsList[] = {'
+ print('const char k%s[] = "%s";' % (stats, stats))
+ print('const char *kStatsList[] = {')
for stats in stats_list:
- print ' k%s,' % (stats)
- print '};'
+ print(' k%s,' % (stats))
+ print('};')
if __name__ == '__main__':
diff --git a/src/win32/installer/postbuilds_win.py b/src/win32/installer/postbuilds_win.py
index f37f1c36..a7ef4152 100644
--- a/src/win32/installer/postbuilds_win.py
+++ b/src/win32/installer/postbuilds_win.py
@@ -28,7 +28,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-"""Script post-prosessing executables for Windows.
+"""Script post-processing executables for Windows.
postbuilds_win.py --targetpath=my_binary.exe
"""
@@ -66,11 +66,11 @@ def ParseOption():
open(abs_touch_file_path, 'w').close()
-class RunOrDieError(StandardError):
+class RunOrDieError(Exception):
"""The exception class for RunOrDie."""
def __init__(self, message):
- StandardError.__init__(self, message)
+ Exception.__init__(self, message)
def RunOrDie(argv):
@@ -94,15 +94,15 @@ def RunOrDie(argv):
def PrintErrorAndExit(error_message):
"""Prints the error message and exists."""
- print error_message
+ print(error_message)
sys.exit(1)
def ShowHelpAndExit():
"""Shows the help message."""
- print 'Usage: postbuilds_win.py [ARGS]'
- print 'This script only supports Windows'
- print 'See also the comment in the script for typical usage.'
+ print('Usage: postbuilds_win.py [ARGS]')
+ print('This script only supports Windows')
+ print('See also the comment in the script for typical usage.')
sys.exit(1)
@@ -110,7 +110,7 @@ def main():
opts = ParseOption()
if not opts.targetpath:
- print '--targetpath is not specified'
+ print('--targetpath is not specified')
sys.exit(1)
if IsWindows():
--
2.26.1