Kaydet (Commit) 00f402bf authored tarafından Eli Bendersky's avatar Eli Bendersky

Close #1767933: Badly formed XML using etree and utf-16. Patch by Serhiy…

Close #1767933: Badly formed XML using etree and utf-16. Patch by Serhiy Storchaka, with some minor fixes by me
üst 1191709b
...@@ -659,7 +659,6 @@ ElementTree Objects ...@@ -659,7 +659,6 @@ ElementTree Objects
should be added to the file. Use False for never, True for always, None should be added to the file. Use False for never, True for always, None
for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is for only if not US-ASCII or UTF-8 or Unicode (default is None). *method* is
either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``). either ``"xml"``, ``"html"`` or ``"text"`` (default is ``"xml"``).
Returns an (optionally) encoded string.
This is the XML file that is going to be manipulated:: This is the XML file that is going to be manipulated::
......
This diff is collapsed.
...@@ -100,6 +100,8 @@ VERSION = "1.3.0" ...@@ -100,6 +100,8 @@ VERSION = "1.3.0"
import sys import sys
import re import re
import warnings import warnings
import io
import contextlib
from . import ElementPath from . import ElementPath
...@@ -792,59 +794,38 @@ class ElementTree: ...@@ -792,59 +794,38 @@ class ElementTree:
# None for only if not US-ASCII or UTF-8 or Unicode. None is default. # None for only if not US-ASCII or UTF-8 or Unicode. None is default.
def write(self, file_or_filename, def write(self, file_or_filename,
# keyword arguments
encoding=None, encoding=None,
xml_declaration=None, xml_declaration=None,
default_namespace=None, default_namespace=None,
method=None): method=None):
# assert self._root is not None
if not method: if not method:
method = "xml" method = "xml"
elif method not in _serialize: elif method not in _serialize:
# FIXME: raise an ImportError for c14n if ElementC14N is missing?
raise ValueError("unknown method %r" % method) raise ValueError("unknown method %r" % method)
if not encoding: if not encoding:
if method == "c14n": if method == "c14n":
encoding = "utf-8" encoding = "utf-8"
else: else:
encoding = "us-ascii" encoding = "us-ascii"
elif encoding == str: # lxml.etree compatibility.
encoding = "unicode"
else: else:
encoding = encoding.lower() encoding = encoding.lower()
if hasattr(file_or_filename, "write"): with _get_writer(file_or_filename, encoding) as write:
file = file_or_filename if method == "xml" and (xml_declaration or
else: (xml_declaration is None and
if encoding != "unicode": encoding not in ("utf-8", "us-ascii", "unicode"))):
file = open(file_or_filename, "wb") declared_encoding = encoding
if encoding == "unicode":
# Retrieve the default encoding for the xml declaration
import locale
declared_encoding = locale.getpreferredencoding()
write("<?xml version='1.0' encoding='%s'?>\n" % (
declared_encoding,))
if method == "text":
_serialize_text(write, self._root)
else: else:
file = open(file_or_filename, "w") qnames, namespaces = _namespaces(self._root, default_namespace)
if encoding != "unicode": serialize = _serialize[method]
def write(text): serialize(write, self._root, qnames, namespaces)
try:
return file.write(text.encode(encoding,
"xmlcharrefreplace"))
except (TypeError, AttributeError):
_raise_serialization_error(text)
else:
write = file.write
if method == "xml" and (xml_declaration or
(xml_declaration is None and
encoding not in ("utf-8", "us-ascii", "unicode"))):
declared_encoding = encoding
if encoding == "unicode":
# Retrieve the default encoding for the xml declaration
import locale
declared_encoding = locale.getpreferredencoding()
write("<?xml version='1.0' encoding='%s'?>\n" % declared_encoding)
if method == "text":
_serialize_text(write, self._root)
else:
qnames, namespaces = _namespaces(self._root, default_namespace)
serialize = _serialize[method]
serialize(write, self._root, qnames, namespaces)
if file_or_filename is not file:
file.close()
def write_c14n(self, file): def write_c14n(self, file):
# lxml.etree compatibility. use output method instead # lxml.etree compatibility. use output method instead
...@@ -853,6 +834,58 @@ class ElementTree: ...@@ -853,6 +834,58 @@ class ElementTree:
# -------------------------------------------------------------------- # --------------------------------------------------------------------
# serialization support # serialization support
@contextlib.contextmanager
def _get_writer(file_or_filename, encoding):
# returns text write method and release all resourses after using
try:
write = file_or_filename.write
except AttributeError:
# file_or_filename is a file name
if encoding == "unicode":
file = open(file_or_filename, "w")
else:
file = open(file_or_filename, "w", encoding=encoding,
errors="xmlcharrefreplace")
with file:
yield file.write
else:
# file_or_filename is a file-like object
# encoding determines if it is a text or binary writer
if encoding == "unicode":
# use a text writer as is
yield write
else:
# wrap a binary writer with TextIOWrapper
with contextlib.ExitStack() as stack:
if isinstance(file_or_filename, io.BufferedIOBase):
file = file_or_filename
elif isinstance(file_or_filename, io.RawIOBase):
file = io.BufferedWriter(file_or_filename)
# Keep the original file open when the BufferedWriter is
# destroyed
stack.callback(file.detach)
else:
# This is to handle passed objects that aren't in the
# IOBase hierarchy, but just have a write method
file = io.BufferedIOBase()
file.writable = lambda: True
file.write = write
try:
# TextIOWrapper uses this methods to determine
# if BOM (for UTF-16, etc) should be added
file.seekable = file_or_filename.seekable
file.tell = file_or_filename.tell
except AttributeError:
pass
file = io.TextIOWrapper(file,
encoding=encoding,
errors="xmlcharrefreplace",
newline="\n")
# Keep the original file open when the TextIOWrapper is
# destroyed
stack.callback(file.detach)
yield file.write
def _namespaces(elem, default_namespace=None): def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree # identify namespaces used in this tree
...@@ -1134,22 +1167,13 @@ def _escape_attrib_html(text): ...@@ -1134,22 +1167,13 @@ def _escape_attrib_html(text):
# @defreturn string # @defreturn string
def tostring(element, encoding=None, method=None): def tostring(element, encoding=None, method=None):
class dummy: stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
pass ElementTree(element).write(stream, encoding, method=method)
data = [] return stream.getvalue()
file = dummy()
file.write = data.append
ElementTree(element).write(file, encoding, method=method)
if encoding in (str, "unicode"):
return "".join(data)
else:
return b"".join(data)
## ##
# Generates a string representation of an XML element, including all # Generates a string representation of an XML element, including all
# subelements. If encoding is False, the string is returned as a # subelements.
# sequence of string fragments; otherwise it is a sequence of
# bytestrings.
# #
# @param element An Element instance. # @param element An Element instance.
# @keyparam encoding Optional output encoding (default is US-ASCII). # @keyparam encoding Optional output encoding (default is US-ASCII).
...@@ -1161,13 +1185,15 @@ def tostring(element, encoding=None, method=None): ...@@ -1161,13 +1185,15 @@ def tostring(element, encoding=None, method=None):
# @since 1.3 # @since 1.3
def tostringlist(element, encoding=None, method=None): def tostringlist(element, encoding=None, method=None):
class dummy:
pass
data = [] data = []
file = dummy() class DataStream(io.BufferedIOBase):
file.write = data.append def writable(self):
ElementTree(element).write(file, encoding, method=method) return True
# FIXME: merge small fragments into larger parts
def write(self, b):
data.append(b)
ElementTree(element).write(DataStream(), encoding, method=method)
return data return data
## ##
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment