2016-12-03 11 views
1

Я пытаюсь разобрать xml в python с lxml и elementree, но это не работает из-за пространств имен.Не удается разобрать xml с пространствами имен в python

Я пробовал xpath, но мне не повезло. Также, как преобразовать XML-документ в формат utf-8, потому что теперь мне нужно добавить в xml для его анализа.

data = """<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/"> 
<![CDATA[<?xml version='1.0' encoding='UTF-8'?> 
<soapenv:Header> 
<messageHeader:messageHeader xmlns:messageHeader="http://www.xyx.co.nz/ismm/common/messageHeader/v1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="messageHeader:MessageHeader"> 
<messageHeader:application>THOM</messageHeader:application> 
<messageHeader:transactionId>BVCQWAC</messageHeader:transactionId> 
<messageHeader:correlationId>1771518</messageHeader:correlationId> 
<messageHeader:timeStamp>2016-11-18T20:41:16</messageHeader:timeStamp> 
</messageHeader:messageHeader> 
</soapenv:Header> 
<soapenv:Body> 
<submitSupplierPartner xmlns:customerBill="http://www.xyzaaa.com/ismm/common/customerBill/v1" xsi:type="messaging_supplierPartner:SubmitSupplierPartner"> 
<logisticsOrder> 
<interactionDateTime>2016-11-18T20:41:16</interactionDateTime> 
<businessInteractionRole xsi:type="bi:PartyInteractionRole"> 
<interactionRole>Customer</interactionRole> 
<partyRole xsi:type="customer:Customer"> 
<contactMedium xsi:type="party:DeliveryContact"> 
..... 
..... 

namespace = "http://schemas.xmlsoap.org/soap/envelope/" 
namespace_c = "{" + namespace + "}" 
NSMAP = {"soapenv": namespace} 
root = lxml.etree.fromstring(data) 

# for i, element in enumerate(root.getiterator()): 
#  print(element.tag) 

#get data from header tag 
records = root.xpath('//messageHeader:messageHeader/messageHeader:correlationId', namespaces = {'messageHeader': 'http://www.xyx.co.nz/ismm/common/messageHeader/v1'}) 
for record in records: 
    print(record.text) 

#get data from body tag 
records = root.xpath('//submitSupplierPartner', namespaces = {"customerBill": "http://www.xyzaaa.com/ismm/common/customerBill/v1"}) 
for record in records: 
    print(record.text) 
+0

вы читали/попробуйте http://stackoverflow.com/questions/14853243/parsing-xml-with-namespace-in-python-via-elementtree?rq=1 или http://stackoverflow.com/questions/5572247/how-to-find-xml-elements-via-xpath-in-python-in-a-namespace-agnostic-way?rq=1? –

ответ

0

Это сработало для меня.

данные идентификатор XML-

root = lxml.etree.fromstring(data) 

#orderId/uniqueCreatorId 
records = root.xpath('//submitSupplierPartner/logisticsOrder/orderId') 
for record in records: 
    orderID=record.text 
    print(orderID) 

#sim and devices ID 

hardwareID = [] 

records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/resourceSpecification/ID') 
for record in records: 
    hardwareID.append(record.text) 
print(hardwareID) 

#get the no of items for shipping 
noOfItems = len(hardwareID) 
print("Total items for shipping are :") 
print(noOfItems) 

#sim and devices skuNumber 
hardwaresku = [] 
records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/resourceSpecification/skuNumber') 
for record in records: 
    hardwaresku.append(record.text) 
print(hardwaresku) 

#sim and devices itemId 
hardwareitemID = [] 
records = root.xpath('//submitSupplierPartner/logisticsOrder/resourceOrderItem/itemId') 
for record in records: 
    hardwareitemID.append(record.text) 
print(hardwareitemID) 

#correlation ID/Rom 
records = root.xpath('//messageHeader:messageHeader/messageHeader:correlationId/text()',namespaces={"messageHeader": "http://www.xyz.co.nz/ismm/common/messageHeader/v1"}) 
for record in records: 
    correlationID = record 
    print(record)