开发者社区> 问答> 正文





  • imports
  • preparations (creating src.zip, these ZIPs are a given in my actual application)
  • actual work of program (modifying XMLs), starting at ` # read XMLs from zip `


import os
import tempfile
import zipfile
from xml.etree.ElementTree import Element, parse

src_1 = os.path.join(tempfile.gettempdir(), "one.xml")
src_2 = os.path.join(tempfile.gettempdir(), "two.xml")
src_zip = os.path.join(tempfile.gettempdir(), "src.zip")
trgt_appr1_zip = os.path.join(tempfile.gettempdir(), "trgt_appr1.zip")
trgt_appr2_zip = os.path.join(tempfile.gettempdir(), "trgt_appr2.zip")
trgt_appr3_zip = os.path.join(tempfile.gettempdir(), "trgt_appr3.zip")

# file on hard disk that must be used due to ElementTree insufficiencies
tmp_xml_name = os.path.join(tempfile.gettempdir(), "curr_xml.tmp")

# prepare src.zip
tree1 = ElementTree(Element('hello', {'beer': 'good'}))
tree1.write(os.path.join(tempfile.gettempdir(), "one.xml"), encoding="UTF-8", xml_declaration=True)
tree2 = ElementTree(Element('scnd', {'äkey': 'a value'}))
tree2.write(os.path.join(tempfile.gettempdir(), "two.xml"), encoding="UTF-8", xml_declaration=True)

with zipfile.ZipFile(src_zip, 'a') as src:
    with open(src_1, 'r', encoding="utf-8") as one:
        string_representation = one.read()
    # write to zip
    src.writestr(zinfo_or_arcname="one.xml", data=string_representation.encode("utf-8"))
    with open(src_2, 'r', encoding="utf-8") as two:
        string_representation = two.read()
    # write to zip
    src.writestr(zinfo_or_arcname="two.xml", data=string_representation.encode("utf-8"))

# read XMLs from zip
with zipfile.ZipFile(src_zip, 'r') as zfile:

    updated_trees = []

    for xml_name in zfile.namelist():

        curr_file = zfile.open(xml_name, 'r')
        tree = parse(curr_file)
        # modify tree
        updated_tree = tree
        updated_tree.getroot().append(Element('new', {'newkey': 'new value'}))
        updated_trees.append((xml_name, updated_tree))

    for xml_name, updated_tree in updated_trees:

        # write to target file
        with zipfile.ZipFile(trgt_appr1_zip, 'a') as trgt1_zip, zipfile.ZipFile(trgt_appr2_zip, 'a') as trgt2_zip, zipfile.ZipFile(trgt_appr3_zip, 'a') as trgt3_zip:

            # APPROACH 1 [DESIRED, BUT DOES NOT WORK]: write tree to zip-file
            # encoding in XML declaration missing
            # create byte representation of elementtree
            byte_representation = tostring(element=updated_tree.getroot(), encoding='UTF-8', method='xml')
            # write XML directly to zip
            trgt1_zip.writestr(zinfo_or_arcname=xml_name, data=byte_representation)

            # APPROACH 2 [WORKS IN THEORY, BUT DOES NOT WORK]: write tree to zip-file
            # encoding in XML declaration is faulty (is 'utf8', should be 'utf-8' or 'UTF-8')
            # create byte representation of elementtree
            byte_representation = tostring(element=updated_tree.getroot(), encoding='utf8', method='xml')
            # write XML directly to zip
            trgt2_zip.writestr(zinfo_or_arcname=xml_name, data=byte_representation)

            # APPROACH 3 [WORKS, BUT LACKS PERFORMANCE]: write to file, then read from file, then write to zip
            # write to file
            updated_tree.write(tmp_xml_name, encoding="UTF-8", method="xml", xml_declaration=True)
            # read from file
            with open(tmp_xml_name, 'r', encoding="utf-8") as tmp:
                string_representation = tmp.read()
            # write to zip
            trgt3_zip.writestr(zinfo_or_arcname=xml_name, data=string_representation.encode("utf-8"))



APPROACH 2 is the only way I could get an ElementTree object to be written with an actual XML declaration -- which then turns out to be invalid ( utf8 instead of UTF-8 / utf-8 ).

APPROACH 1 would be most desired -- but fails during reading later in the pipeline, as the XML declaration is missing.

Question: How can I get rid of writing the whole XML to disk first, only to read it afterwards, write it to the zip and delete it after being done with the zip? What am I missing?

问题来源: stackoverflow

is大龙 2020-03-23 16:16:25 1204 0
1 条回答
取消 提交回答
  • You can use an io.BytesIO object. This allows using ElementTree.write , while avoiding exporting the tree to disk:

    import zipfile
    from io import BytesIO
    from xml.etree.ElementTree import ElementTree, Element
    tree = ElementTree(Element('hello', {'beer': 'good'}))
    bio = BytesIO()
    tree.write(bio, encoding='UTF-8', xml_declaration=True)
    with zipfile.ZipFile('/tmp/test.zip', 'w') as z:
        z.writestr('test.xml', bio.getvalue())

    If you are using Python 3.6 or higher, there's an even shorter solution: you can get a writable file object from the ZipFile object, which you can pass to ElementTree.write :

    import zipfile
    from xml.etree.ElementTree import ElementTree, Element
    tree = ElementTree(Element('hello', {'beer': 'good'}))
    with zipfile.ZipFile('/tmp/test.zip', 'w') as z:
        with z.open('test.xml', 'w') as f:
            tree.write(f, encoding='UTF-8', xml_declaration=True)



    2020-03-23 16:16:29
    赞同 展开评论 打赏


低代码开发师(初级)实战教程 立即下载
冬季实战营第三期:MySQL数据库进阶实战 立即下载
阿里巴巴DevOps 最佳实践手册 立即下载