File 0001-id3-always-use-little-endian-order-for-utf-16-with-BOM.patch of Package python3-mutagen

From c27b957e242c24b5f247a51fd1807e332fe7ef24 Mon Sep 17 00:00:00 2001
From: Christoph Reiter <reiter.christoph@gmail.com>
Date: Wed, 25 Jan 2017 13:05:45 +0100
Subject: [PATCH] id3: always use little-endian order for utf-16 with BOM. See
 #289

While both variants are valid for ID3 this makes tags the same
on all platforms and reduces the risk that tests break on big endian
machines.
---
 mutagen/_util.py        | 32 ++++++++++++++++++++++++++++++++
 mutagen/id3/_specs.py   | 10 +++++++---
 tests/test__id3specs.py |  9 +--------
 tests/test__util.py     | 29 ++++++++++++++++++++++++++++-
 4 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/mutagen/_util.py b/mutagen/_util.py
index 499729e..2a28995 100644
--- a/mutagen/_util.py
+++ b/mutagen/_util.py
@@ -888,6 +888,38 @@ def dict_match(d, key, default=None):
     return default
 
 
+def encode_endian(text, encoding, errors="strict", le=True):
+    """Like text.encode(encoding) but always returns little endian/big endian
+    BOMs instead of the system one.
+
+    Args:
+        text (text)
+        encoding (str)
+        errors (str)
+        le (boolean): if little endian
+    Returns:
+        bytes
+    Raises:
+        UnicodeEncodeError
+        LookupError
+    """
+
+    encoding = codecs.lookup(encoding).name
+
+    if encoding == "utf-16":
+        if le:
+            return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors)
+        else:
+            return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors)
+    elif encoding == "utf-32":
+        if le:
+            return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors)
+        else:
+            return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors)
+    else:
+        return text.encode(encoding, errors)
+
+
 def decode_terminated(data, encoding, strict=True):
     """Returns the decoded data until the first NULL terminator
     and all data after it.
diff --git a/mutagen/id3/_specs.py b/mutagen/id3/_specs.py
index 9454596..9b5dce7 100644
--- a/mutagen/id3/_specs.py
+++ b/mutagen/id3/_specs.py
@@ -12,7 +12,8 @@
 
 from .._compat import text_type, chr_, PY3, swap_to_string, string_types, \
     xrange
-from .._util import total_ordering, decode_terminated, enum, izip, flags, cdata
+from .._util import total_ordering, decode_terminated, enum, izip, flags, \
+    cdata, encode_endian
 from ._util import BitPaddedInt, is_valid_frame_id
 
 
@@ -487,7 +488,7 @@ def read(self, header, frame, data):
     def write(self, config, frame, value):
         enc, term = self._encodings[frame.encoding]
         try:
-            return value.encode(enc) + term
+            return encode_endian(value, enc, le=True) + term
         except UnicodeEncodeError as e:
             raise SpecError(e)
 
@@ -815,7 +816,10 @@ def write(self, config, frame, value):
         data = []
         encoding, term = self._encodings[frame.encoding]
         for text, time in value:
-            text = text.encode(encoding) + term
+            try:
+                text = encode_endian(text, encoding, le=True) + term
+            except UnicodeEncodeError as e:
+                raise SpecError(e)
             data.append(text + struct.pack(">I", time))
         return b"".join(data)
 
diff --git a/tests/test__id3specs.py b/tests/test__id3specs.py
index 5ca00cd..c14083a 100644
--- a/tests/test__id3specs.py
+++ b/tests/test__id3specs.py
@@ -1,7 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import sys
-
 from tests import TestCase
 
 from mutagen._compat import PY3
@@ -27,12 +25,7 @@ def test_write(self):
         self.assertEqual(
             s.read(None, f, s.write(None, f, values)), (values, b""))
         data = s.write(None, f, [(u"A", 100)])
-        if sys.byteorder == 'little':
-            self.assertEquals(
-                data, b"\xff\xfeA\x00\x00\x00\x00\x00\x00d")
-        else:
-            self.assertEquals(
-                data, b"\xfe\xff\x00A\x00\x00\x00\x00\x00d")
+        self.assertEquals(data, b"\xff\xfeA\x00\x00\x00\x00\x00\x00d")
 
         # utf-16be
         f.encoding = 2
diff --git a/tests/test__util.py b/tests/test__util.py
index f2e3196..76d1095 100644
--- a/tests/test__util.py
+++ b/tests/test__util.py
@@ -3,7 +3,7 @@
 from mutagen._util import DictMixin, cdata, insert_bytes, delete_bytes, \
     decode_terminated, dict_match, enum, get_size, BitReader, BitReaderError, \
     resize_bytes, seek_end, mmap_move, verify_fileobj, fileobj_name, \
-    read_full, flags, resize_file, fallback_move
+    read_full, flags, resize_file, fallback_move, encode_endian
 from mutagen._compat import text_type, itervalues, iterkeys, iteritems, PY2, \
     cBytesIO, xrange, BytesIO
 from tests import TestCase, get_temp_empty
@@ -742,6 +742,33 @@ def test_get_size(self):
         self.assertEqual(f.tell(), 1)
 
 
+class Tencode_endian(TestCase):
+
+    def test_other(self):
+        assert encode_endian(u"\xe4", "latin-1") == b"\xe4"
+        assert encode_endian(u"\xe4", "utf-8") == b"\xc3\xa4"
+        with self.assertRaises(LookupError):
+            encode_endian(u"", "nopenope")
+        with self.assertRaises(UnicodeEncodeError):
+            assert encode_endian(u"\u2714", "latin-1")
+        assert encode_endian(u"\u2714", "latin-1", "replace") == b"?"
+
+    def test_utf_16(self):
+        assert encode_endian(u"\xe4", "utf-16", le=True) == b"\xff\xfe\xe4\x00"
+        assert encode_endian(u"\xe4", "utf-16-le") == b"\xe4\x00"
+        assert encode_endian(
+            u"\xe4", "utf-16", le=False) == b"\xfe\xff\x00\xe4"
+        assert encode_endian(u"\xe4", "utf-16-be") == b"\x00\xe4"
+
+    def test_utf_32(self):
+        assert encode_endian(u"\xe4", "utf-32", le=True) == \
+            b"\xff\xfe\x00\x00\xe4\x00\x00\x00"
+        assert encode_endian(u"\xe4", "utf-32-le") == b"\xe4\x00\x00\x00"
+        assert encode_endian(
+            u"\xe4", "utf-32", le=False) == b"\x00\x00\xfe\xff\x00\x00\x00\xe4"
+        assert encode_endian(u"\xe4", "utf-32-be") == b"\x00\x00\x00\xe4"
+
+
 class Tdecode_terminated(TestCase):
 
     def test_all(self):