Amara 2.0 quick reference
Contents
Core node trees
Parsing
import amara
doc = amara.parse("<spam/>") #string
doc = amara.parse(open("spam.xml", "r")) #stream (file-like object)
doc = amara.parse("spam.xml") #OS file path
doc = amara.parse("http://example.com/spam.xml") #URL
First sample document
The next two sections are based on this sample document:
<a x="1"><b>i</b><c>j<d/>k</c><b>l</b></a>
Basic XML data access
doc.xml_type #"document"
amara.tree.entity.xml_type #"document"
a = doc.xml_elements[0] #element node
a = doc.xml_children[0] #element node
a.xml_type #"element"
amara.tree.element.xml_type #"element"
a.xml_attributes[u'x'] #u"1"
a.xml_local #u'a'
a.xml_qname #u'a', may not match xml_local if doc uses namespaces
a.xml_name #(None, u'a') - namespace/local name tuple (universal name)
a.xml_children #list with two b instances and one c instance
a.xml_parent #the doc entity -- returns the immediate parent
a.xml_root #the doc entity -- returns the overall entity (root node)
doc.xml_index(a) #1: the index of a among doc's children; supports optional start and stop args, like lists
b1 = a.xml_first_child #The first b child
b2 = a.xml_last_child #The second b child
b.xml_following_sibling #The c child
a.xml_preceding_sibling #The c child
a.xml_base #Base URI of the node
doc.a.xml_attributes #{(None, u"x"), u"1"}, None => no namespace
XPath
doc.xml_select(u"//b") #sequence of 2 b instances (XPath node set = Python sequence)
doc.xml_select(u"count(//b)") #Number(2.0)
doc.xml_select(u"/a/@x") #String(u'1')
doc.xml_select(u"/a/@x = '1'") #Boolean(True)
b1 = a.xml_select(u"b")[0]
Basic document update
new_text = amara.tree.text(u'New Content')
a.xml_append(new_text) #Add a new text node to a
#Add a new text node to a immediately after its first b element child
new_text = amara.tree.text(u'New Content')
a.xml_insert(1, new_text) #Add a child to an absolute location
a.xml_insert(a.xml_index(b1), new_text) #Add a child relative to another (b1)
#Remove the first e child from b
e1 = b.xml_select(u"e")[0]
b.xml_remove(e1)
a.xml_replace(b1, new_text) #Replace the b1 element with a text node
doc.xml_normalize() #Ensure all descendants have normalized text nodes (i.e. no adjacent text nodes)
New document creation
#Create a document with a single top-level element named root
doc = amara.tree.entity()
doc.xml_append(amara.tree.element(None, u'root'))
Namespaces
Sample document:
<n:a xmlns:n="urn:x-bogus1" n:x="1"><b xmlns="urn:x-bogus2">c</b></n:a>
Code snippets:
NS1 = u'urn:x-bogus1'
NS2 = u'urn:x-bogus2'
a = doc.xml_children[0] #element node
a.xml_attributes[u'x'] #u"1"
a.xml_local #u'a'
a.xml_prefix #u'n'
a.xml_qname #u'n:a', may not match xml_local if doc uses namespaces
a.xml_name #(u'urn:x-bogus1', u'a')
a.xml_namespace #u'urn:x-bogus1'
Amara bindery
Parsing the sample document
from amara import bindery
XML = '<a x="1"><b>i</b><c>j<d/>k</c><b>l</b></a>'
doc = bindery.parse(XML)
Additional features for basic XML data access
doc.a #object representing a element (instance of class a)
a = doc.xml_elements[0] #object representing a element (instance of class a)
doc[u'a'] #object representing a element (instance of a)
doc.a.b #first instance of b
a = doc.a.xml_elements[0] #first instance of b
doc.a.x #u"1"
doc.a[u'x'] #u"1"
doc.a.b[0] #first instance of b
doc.a.b[1] #second instance of b
doc.a[u'b'][1] #second instance of b
iter(doc.a.b) #iterator over both instances of b
unicode(doc.a.b) #u"i"
unicode(doc.a.b[1]) #u"l"
unicode(doc.a) #u"ijkl"
Additional features for manipulation
doc.a.xml_clear() #Remove all children from a
doc.a.c.d = u'hello' #Adds a text node (remove all children of a if any)
del doc.a.c.d #Delete node
Parts below are work in progress. Please ignore...
Reserialize to XML
xmlstr = doc.xml() #Reserialize the document to a string
xmlstr = doc.xml(indent=u"yes") #Pretty-print the document to a string
