Python – How to manage a XML file

By | 22/05/2024

In this post, we will see how to manage a XML in Python using the “xml.etree.ElementTree” module.
The “xml.etree.ElementTree” module is part of Python’s standard library, offering a simple and effective way to work with XML data. This module supports parsing XML from strings or files, navigating and searching the XML tree, and modifying or creating new XML documents. It’s a go-to choice for basic XML file operations due to its availability in the standard Python installation.
For more complex XML processing needs, the ‘lxml’ library is a powerful alternative. It is a third-party library that provides extensive functionalities for working with XML and HTML. It’s known for its performance and comprehensive feature set, including full XPath support, XSLT transformation, and schema validation.

Let’s see some example to manage a file xml with “xml.etree.ElementTree”:

First of all, we define a file xml called data.xml:
[DATA.XML]

<library>
    <book id="1">
        <title>Python Essentials</title>
        <author>John Doe</author>
        <year>2020</year>
    </book>
    <book id="2">
        <title>Learning XML</title>
        <author>Jane Smith</author>
        <year>2019</year>
    </book>
</library>


Then, we define a file called CoreXML used to manage a file XML:
[COREXML.PY]

import xml.etree.ElementTree as ET


class CoreXML:
    def __init__(self, file_name) -> None:
        self.file_name = file_name
        # loading file xml
        self.file_xml = ET.parse(file_name)
        # in this case, the root of the xml file is <library>
        self.root_xml = self.file_xml.getroot()


Finally, we define the file main.py:
[MAIN.PY]

from CoreXML import CoreXML

obj_core_xml = CoreXML('data.xml')



Now, we will add in CoreXml file all methods to manage a file xml.

READ:

def read_values(self):
        for book in self.root_xml.findall('book'):
            id = book.get('id')
            title = book.find('title').text
            author = book.find('author').text
            year = book.find('year').text
            print(f'ID: {id} - Title: {title}, Author: {author}, Year: {year}')

from CoreXML import CoreXML

obj_core_xml = CoreXML('data.xml')

obj_core_xml.read_values()


UPDATE:

def update_value(self, id, year):
        for book in self.root_xml.findall('book'):
            # select the book by id
            if book.get('id') == str(id):
                # update year with the value in input
                book.find('year').text = str(year)

        # save the file with the value updated
        self.file_xml.write(self.file_name)

from CoreXML import CoreXML

obj_core_xml = CoreXML('data.xml')

obj_core_xml.read_values()

obj_core_xml.update_value(1, 2030)

obj_core_xml.read_values()


INSERT:

def add_book(self, id, title, author, year):
        try:
            new_book = ET.SubElement(self.root_xml, 'book', id=str(id))
            ET.SubElement(new_book, 'title').text = title
            ET.SubElement(new_book, 'author').text = author
            ET.SubElement(new_book, 'year').text = str(year)

            # save the file with the value updated
            self.file_xml.write(self.file_name)
        except Exception as e:
            print(f"Failed to add book: {e}")

from CoreXML import CoreXML

obj_core_xml = CoreXML('data.xml')

obj_core_xml.read_values()

obj_core_xml.add_book(3, "Learning C#", "Damiano Abballe", 2024)

obj_core_xml.read_values()


DELETE:

def delete_book(self, id):
        for book in self.root_xml.findall('book'):
            # select the book by id
            if book.get('id') == str(id):
                # update year with the value in input
                self.root_xml.remove(book)

        # save the file with the value updated
        self.file_xml.write(self.file_name)

from CoreXML import CoreXML

obj_core_xml = CoreXML('data.xml')

obj_core_xml.read_values()

obj_core_xml.delete_book(3)

obj_core_xml.read_values()


[COREXML.PY]

import xml.etree.ElementTree as ET


class CoreXML:
    def __init__(self, file_name) -> None:
        self.file_name = file_name
        # loading file xml
        self.file_xml = ET.parse(file_name)
        # in this case, the root of the xml file is <library>
        self.root_xml = self.file_xml.getroot()

    def read_values(self):
        for book in self.root_xml.findall('book'):
            id = book.get('id')
            title = book.find('title').text
            author = book.find('author').text
            year = book.find('year').text
            print(f'ID: {id} - Title: {title}, Author: {author}, Year: {year}')

    def update_value(self, id, year):
        for book in self.root_xml.findall('book'):
            # select the book by id
            if book.get('id') == str(id):
                # update year with the value in input
                book.find('year').text = str(year)

        # save the file with the value updated
        self.file_xml.write(self.file_name)

    def add_book(self, id, title, author, year):
        try:
            new_book = ET.SubElement(self.root_xml, 'book', id=str(id))
            ET.SubElement(new_book, 'title').text = title
            ET.SubElement(new_book, 'author').text = author
            ET.SubElement(new_book, 'year').text = str(year)

            # save the file with the value updated
            self.file_xml.write(self.file_name)
        except Exception as e:
            print(f"Failed to add book: {e}")

    def delete_book(self, id):
        for book in self.root_xml.findall('book'):
            # select the book by id
            if book.get('id') == str(id):
                # update year with the value in input
                self.root_xml.remove(book)

        # save the file with the value updated
        self.file_xml.write(self.file_name)



Leave a Reply

Your email address will not be published. Required fields are marked *