Parsing XML attrib with Python, ' and " character spliting

Parsing XML attrib with Python, ' and " character spliting - python

I'm working with the NVD XML and attempting to parse and split the XML to eventually enter into a DB. The issue I'm running into is the parsed XML attrib is either with " or ' around the values. I'm not able to split the strings of these. I've included the code and the entry that it currently fails on. The expected output is
product,america's_first_federal_credit_union,america's_first_fcu_mobile_banking
Code
#!/usr/bin/env python
import os
import sys
import time
from subprocess import call
import xml.etree.ElementTree
import re
range_from = 2017
range_to = 2017
def process_entry(entry):
cve = entry.attrib.get("name")
print cve
cpes = get_cpes_affected(entry)
def get_cpes_affected(entry):
child = []
for e in entry.iter():
if "}prod" in e.tag:
print e.attrib
print unichr(34)
if unichr(34) in e.attrib:
print "hey yo"
child.append("product," + str(e.attrib).split('"')[1] + "," + str(e.attrib).split('"')[3])
else:
child.append("product," + str(e.attrib).split("'")[3] + "," + str(e.attrib).split("'")[7])
#print e.tag, e.attrib
if "'prev'" in e.attrib:
child.append("version," + str(e.attrib).split("'")[7] + "," + str(e.attrib).split("'")[3])
if "}vers" in e.tag and "'prev'" not in e.attrib:
child.append("version," + str(e.attrib).split("'")[3] + ",")
#print e.tag, e.attrib
for derp in child:
print derp
for i in range(range_from, range_to+1):
os.system("wget -O tmp.zip https://nvd.nist.gov/download/nvdcve-%i.xml.zip" % i)
os.system("unzip -o tmp.zip")
e = xml.etree.ElementTree.parse('nvdcve-%i.xml' % i).getroot()
for entry in e:
process_entry(entry)
Example of an XML entry that is being parsed
<entry type="CVE" name="CVE-2017-5916" seq="2017-5916" published="2017-05-05" modified="2017-05-16" severity="Medium" CVSS_version="2.0" CVSS_score="4.3" CVSS_base_score="4.3" CVSS_impact_subscore="2.9" CVSS_exploit_subscore="8.6" CVSS_vector="(AV:N/AC:M/Au:N/C:P/I:N/A:N)">
<desc>
<descript source="cve">The America's First Federal Credit Union (FCU) Mobile Banking app 3.1.0 for iOS does not verify X.509 certificates from SSL servers, which allows man-in-the-middle attackers to spoof servers and obtain sensitive information via a crafted certificate.</descript>
</desc>
<loss_types>
<conf/>
</loss_types>
<range>
<network/>
</range>
<refs>
<ref source="MISC" url="https://medium.com/#chronic_9612/follow-up-76-popular-apps-confirmed-vulnerable-to-silent-interception-of-tls-protected-data-64185035029f" adv="1">https://medium.com/#chronic_9612/follow-up-76-popular-apps-confirmed-vulnerable-to-silent-interception-of-tls-protected-data-64185035029f</ref>
</refs>
<vuln_soft>
<prod name="america's_first_fcu_mobile_banking" vendor="america's_first_federal_credit_union">
<vers num="3.1.0" prev="1" edition=":~~~iphone_os~~"/>
</prod>
</vuln_soft>
Entry it fails on
{'vendor': "america's_first_federal_credit_union", 'name': "america's_first_fcu_mobile_banking"}
And just to include an example of a string it's able to split without issue
{'vendor': 'emirates_nbd_bank_p.j.s.c', 'name': 'emirates_nbd_ksa'}
Sorry forgot to include the error
Traceback (most recent call last):
File "prev-version-load.py", line 49, in <module>
process_entry(entry)
File "prev-version-load.py", line 18, in process_entry
cpes = get_cpes_affected(entry)
File "prev-version-load.py", line 33, in get_cpes_affected
child.append("product," + str(e.attrib).split("'")[3] + "," + str(e.attrib).split("'")[7])
IndexError: list index out of range

This has nothing to do with parsing xml, but with how you format the output.
Unlike in shell scripting, where most things are just string and you can just do string fiddling to get the output you want, python is an object-oriented languages, objects in Python has types. In particular e.attrib is a dictionary type and you can't do string operations on a dictionary.
I'd suggest using ElementTree's findall() method instead of doing what I think you were trying to do. For example, I think this is what you are really trying to do:
#!/usr/bin/env python
from xml.etree import ElementTree as ET
range_from = 2017
range_to = 2017
def process_entry(entry):
cve = entry.attrib.get("name")
print cve
cpes = get_cpes_affected(entry)
def get_cpes_affected(entry):
prods = entry.findall('nvd:vuln_soft/nvd:prod', namespaces=namespaces)
for prod in prods:
print prod.attrib
print '"'
for prod in prods:
print "product,{},{}".format(prod.attrib['vendor'], prod.attrib['name'])
for vers in prod.findall('nvd:vers', namespaces=namespaces):
if vers.get('edition'):
print "version,{},".format(vers.attrib['edition'])
elif vers.get('prev') == '1':
print "version,{},".format(vers.attrib['prev'])
else:
print "version,{},".format(vers.attrib['num'])
namespaces = {'nvd': 'http://nvd.nist.gov/feeds/cve/1.2'}
# OPTIONAL: registering namespace is useful for outputting XML with ET.tostring()/ET.dump()
#for prefix, ns in namespaces.items():
# ET.register_namespace(prefix, ns)
for i in range(range_from, range_to+1):
e = ET.parse('nvdcve-%i.xml' % i).getroot()
for entry in e:
process_entry(entry)

Consider replacing...
if "}prod" in e.tag:
print unichr(34)
if unichr(34) in e.attrib:
print "hey yo"
child.append("product," + str(e.attrib).split('"')[1] + "," + str(e.attrib).split('"')[3])
else:
child.append("product," + str(e.attrib).split("'")[3] + "," + str(e.attrib).split("'")[7])
#print e.tag, e.attrib
if "'prev'" in e.attrib:
child.append("version," + str(e.attrib).split("'")[7] + "," + str(e.attrib).split("'")[3])
if "}vers" in e.tag and "'prev'" not in e.attrib:
child.append("version," + str(e.attrib).split("'")[3] + ",")
With...
reg=r"\"|'(?=[^\"]*')|'(?=\W*\")"
if "prod" in e.tag:
#print(re.split(reg,str(e.attrib)))
child.append("product," + re.split(reg,str(e.attrib))[3] + "," + re.split(reg,str(e.attrib))[7])
#print e.tag, e.attrib
if "prev" in e.attrib:
child.append("version," + re.split(reg,str(e.attrib))[7] + "," + re.split(reg,str(e.attrib))[3])
if "vers" in e.tag and "prev" not in e.attrib:
child.append("version," + re.split(reg,str(e.attrib))[3] + ",")
Let me know if this works, I will explain.
UPDATE
Even better solution is below:-
if "prod" in e.tag:
#print(e.attrib)
child.append("product," + e.attrib['name'] + "," + e.attrib['vendor'])
if "prev" in e.attrib:
child.append("version," + e.attrib['prev'] + "," + e.attrib['num'])
if "vers" in e.tag and "prev" not in e.attrib:
child.append("version," + e.attrib['num'] + ",")
A working example with your given xml is here for all three cases yours, my original solution and updated solution.

Related

Why do I have different api responses from Openweathermap JSON and PyOWM library?

I am using two different ways to get current weather and I have got different data from two API.
I suspect PyOWM doesn't work properly because if I changed a city and run a script several times, it hangs with the same data and shows the same cyphers no matter what a city I type in the script. But at least pyowm shows weather pretty close to real if it is launched for first time. Webapi from https://openweathermap.org/ works pretty accurately and I don't have problems with it's JSON response. But PyOWM's response seems to be shows random data. Surely, I could forget about PyOWM and never use it but I am new with this sort of api responses discrepancy and I would like to know whether I do something wrong or I don't understand where I screwed up.
web API https://openweathermap.org/current
import json, requests
place = "London"
apikey = "e4784f34c74efe649018567223752b21"
lang = "en"
r = requests.get("http://api.openweathermap.org/data/2.5/weather?q=" + place + "&appid=" + apikey + "&lang=" + lang + "&units=metric", timeout=20)
api_answer = json.dumps(r.json())
weather_is = "Now in " + place + ": " + json.loads(api_answer)["weather"][0]["description"] + ".\n"
t_txt = "Temperature:\n"
t_now = "now: " + str(json.loads(api_answer)["main"]["temp"]) + "\n"
t_max = "maximum: " + str(json.loads(api_answer)["main"]["temp_max"]) + "\n"
t_min = "minimum: " + str(json.loads(api_answer)["main"]["temp_min"])
final_txt = weather_is + t_txt + t_now + t_max + t_min
print(final_txt)
PyOWM API https://pyowm.readthedocs.io/en/latest/usage-examples-v2/weather-api-usage-examples.html
import pyowm
owm = pyowm.OWM('e4784f34c74efe649018567223752b21', language = "en")
place = "London"
observation = owm.weather_at_place('place')
w = observation.get_weather()
print("Now in " + place + ": " + w.get_detailed_status() + ".")
temperature_at_place_now = w.get_temperature('celsius')["temp"]
temperature_at_place_max = w.get_temperature('celsius')["temp_max"]
temperature_at_place_min = w.get_temperature('celsius')["temp_min"]
print ("Temperature:")
print ("now: " + str(temperature_at_place_now))
print ("maximum: " + str(temperature_at_place_max))
print ("minimum: " + str(temperature_at_place_min))
[web api output] 1 [pyowm api output] 2

Arcmap script will not print messages in arcmap console

I have a Python script for Arcmap that I wrote. I'm trying to create a tool that reprojects all the feature classes within the workspace to a specified feature class.
The problem that I'm having is that I cannot get Arcmap to print the "completed" messages. The messages that I want to have appear will print when I hard-code the variables and run it as a script, but they will not print in Arcmap. You can see in the code below that I have specific printed messages that I want printed, but they just won't appear.
Code:
#Import modules
import arcpy, os
#Set workspace directory
from arcpy import env
#Define workspace
inWorkspace = arcpy.GetParameterAsText(0)
env.workspace = inWorkspace
env.overwriteOutput = True
try:
#Define local feature class to reproject to:
targetFeature = arcpy.GetParameterAsText(1)
#Describe the input feature class
inFc = arcpy.Describe(targetFeature)
sRef = inFc.spatialReference
#Describe input feature class
fcList = arcpy.ListFeatureClasses()
#Loop to re-define the feature classes and print the messages:
for fc in fcList:
desc = arcpy.Describe(fc)
if desc.spatialReference.name != sRef.name:
print "Projection of " + str(fc) + " is " + desc.spatialReference.name + ", so re-defining projection now:\n"
newFc = arcpy.Project_management(fc, "projected_" + fc, sRef)
newFeat = arcpy.Describe(newFc)
count = arcpy.GetMessageCount()
print "The reprojection of " + str(newFeat.baseName) + " " + arcpy.GetMessage(count-1) + "\n"
#Find out which feature classes have been reprojected
outFc = arcpy.ListFeatureClasses("projected_*")
#Print a custom messagae describing which feature classes were reprojected
for fc in outFc:
desc = arcpy.Describe(fc)
name = desc.name
name = name[:name.find(".")]
name = name.split("_")
name = name[1] + "_" + name[0]
print "The new file that has been reprojected is named " + name + "\n"
except arcpy.ExecuteError:
pass
severity = arcpy.GetMaxSeverity()
if severity == 2:
print "Error occurred:\n{0}".format(arcpy.GetMessage(2))
elif severity == 1:
print "Warning raised:\n{1}".format(arcpy.GetMessage(1))
else:
print "Script complete"
When I upload a script into an Arcmap toolbox, the following lines (From the above code) will NOT print:
print "Projection of " + str(fc) + " is " + desc.spatialReference.name + ", so re-defining projection now:\n"
print "The reprojection of " + str(newFeat.baseName) + " " + arcpy.GetMessage(count-1) + "\n"
print "The new file that has been reprojected is named " + name + "\n"
How can I fix this?

print only prints the messages while your script is running in Python interpreter. In order to print logs while the script is running in ArcGIS Toolbox, you need to use arcpy.AddMessage()
arcpy.AddMessage("Projection of {0} is {1}, so re-defining projection now: ".format(str(fc), desc.spatialReference.name)

Websphere 9X unable to deploy ear file via jython script

I'm writing due to an error I just cannot seem to work around.
WASX7017E: Exception received while running file "/root/wsDeploy.py"; exception information: com.ibm.ws.scripting.ScriptingException: WASX7115E: Cannot read input file "/opt/IBM/WebSphere/AppServer/profiles/AppSrv01/installableApps/my_ear_file.ear,'[-node DefaultNode01 -cell DefaultCell01 -server server1 -MapWebModToVH [[ "Our War One" first_war.war,WEB-INF/web.xml default_host ]["Our War Two" second_war.war,WEB-INF/web.xml default_host]["Our War Three" third_war.war,WEB-INF/web.xml default_host]]]'"
Now that script has the following variables and syntax:
ParameterStr = "-node DefaultNode01 -cell DefaultCell01 -server server1 -MapWebModToVH [[ \"Our War One\" first_war.war,WEB-INF/web.xml default_host ][\"Our War Two\" second_war.war,WEB-INF/web.xml default_host][\"Our War Three\" third_war.war,WEB-INF/web.xml default_host]]"
EAR_FILE=/path/to/file/my_ear_file.ear
This is the portion of code which is choking when executed by websphere in jython(2.7) (Also fails in 2.1)
elif UpdateExistingorNewApp == "INITIAL" and ConditionForUpdate == 0:
AdminApp.install(EAR_FILE + "," + "'" + "[" + ParameterStr + "]" + "'")
AdminConfig.save()
I have tried
AdminApp.install( 'EAR_FILE' + "," + "'" + "[" + ParameterStr + "]" + "'")
AdminApp.install( "'" + EAR_FILE + "'" +"," + "'" + "[" + ParameterStr + "]" + "'")
AdminApp.install( \' EAR_FILE \' + "," + "'" + "[" + ParameterStr + "]" + "'")
I have even tried adding the "[ ]" pair inside the ParameterStr variables as well.
I have looked at the following documents for guidance:
https://www.ibm.com/developerworks/community/forums/html/topic?id=43cee700-9074-49e1-9223-7c4db2d89680
https://developer.ibm.com/answers/questions/258458/ucd-install-application-fails-with-wasx7115e-canno/
I have verified the path to the ear, the permissions on the ear, and the ownership of the ear.
Am I having an issue similar to globbing? The input file is there, and is world readable. I have even run the script from the same location as the (installableApps) folder for the AppSrv01 Profile.
Any help would be highly appreciated.
EDIT:
So it we're past this part now. So I imagine that I will need to start escaping any meta characters. I'm posting what a fully constructed argument looks like:
AdminApp.install('/opt/IBM/WebSphere/AppServer/installableApps/my_ear_file.ear','[-node DefaultNode01 -cell DefaultCell01 -server server1 -MapWebModToVH [[ \"Our War One\" first_war.war,WEB-INF/web.xml default_host ][\"Our War Two\" second_war.war,WEB-INF/web.xml default_host][\"Our War Three\" third_war.war,WEB-INF/web.xml default_host]]]')
Our argument is slightly different at this time, but that appears to still be acceptable to the interpreter
AdminApp.install( /opt/IBM/WebSphere/AppServer/installableApps/my_ear_file.ear,'[-node DefaultNode01 -cell DefaultCell01 -server server1 -MapWebModToVH [[ \"Our War One\" first_war.war,WEB-INF/web.xml default_host ][\"Our War Two\" second_war.war,WEB-INF/web.xml default_host][\"Our War Three\" third_war.war,WEB-INF/web.xml default_host]]]')
So that generates a
java.lang.IllegalArgumentException: java.lang.IllegalArgumentException: WASX7122E: Expected "-" not found.
So I am now trying to see which meta characters I can deal with via trial and errors.
I am basing this off of the comment response by kgibm.

You're concatenating the ear file name with the options, whereas those are two parameters separated by a comma. Try:
AdminApp.install(EAR_FILE, "'" + "[" + ParameterStr + "]" + "'")

Parsing JSON in Python

I have below query stored in a variable I got and I need to fetch value of 'resource_status'.
I need 'UPDATE_IN_PROGRESS'
As requested, putting the code here. The variable evntsdata is storing the events list.
try:
evntsdata = str(hc.events.list(stack_name)[0]).split(" ") # this is the variable that is getting the JSON response (or so)
#print(evntsdata[715:733])
#event_handle = evntsdata[715:733]
if event_handle == 'UPDATE_IN_PROGRESS':
loopcontinue = True
while loopcontinue:
evntsdata = str(hc.events.list(stack_name)[0]).split(" ")
#event_handle = evntsdata[715:733]
if (event_handle == 'UPDATE_COMPLETE'):
loopcontinue = False
print(str(timestamp()) + " " + "Stack Update is Completed!" + ' - ' + evntsdata[-3] + ' = ' + evntsdata[-1])
else:
print(str(timestamp()) + " " + "Stack Update in Progress!" + ' - ' + evntsdata[-3] + ' = ' + evntsdata[-1])
time.sleep(10)
else:
print("No updates to perform")
exit(0)
except AttributeError as e:
print(str(timestamp()) + " " + "ERROR: Stack Update Failure")
raise
print(evntsdata) has below result
['<Event', "{'resource_name':", "'Stackstack1',", "'event_time':", "'2017-05-26T12:10:43',", "'links':", "[{'href':", "'x',", "'rel':", "'self'},", "{'href':", "'x',", "'rel':", "'resource'},", "{'href':", "'x',", "'rel':", "'stack'}],", "'logical_resource_id':", "'Stackstack1',", "'resource_status':", "'UPDATE_IN_PROGRESS',", "'resource_status_reason':", "'Stack", 'UPDATE', "started',", "'physical_resource_id':", "'xxx',", "'id':", "'xxx'}>"]

Do not serialize and parse objects when the data is in front of you. This is inefficient and hard to understand and maintain. The solution is quite trivial:
data = hc.events.list(stack_name)[0].to_dict()
event_handle = data['resource_status']

It's not JSON, it's a class that you've printed
class Event(base.Resource):
def __repr__(self):
return "<Event %s>" % self._info
Try poking around the source code to get access to the dictionary self._info, then access your fields according
For example,
event_info = hc.events.list(stack_name)[0]._info
event_handle = event_info['resource_status']
Though, there may be another way like calling to_dict() instead, since the underscore indicates a private variable

How do I find the offset and signature of the PE header in python?

I am not sure how to go about this. I do know that the signature is 50 45 00 00 but I am not sure how to take an .exe file and count the amount of times it is used in python.
By the end of it, it should have the magic number, offset of the PE header, PE signature, entrypoint, image base, number of sections with the PE, name of each sections with offset.
Here is what I have so far (it is only for the magic number):
def sig(content):
content = content.encode("hex")
content = str(content)
signature = content[0:2].upper()
sig2 = content[2:4].upper()
print "Magic Number: " + str(signature) + " " + str(sig2)
If you can help, please let me know!

it's everything besides the offset
import struct
import pefile
import pydasm
pe = pefile.PE(filename)
print "PE Signature: " + hex(pe.VS_FIXEDFILEINFO.Signature)
print "Image Base: " + hex(pe.OPTIONAL_HEADER.ImageBase)
print "Address of EntryPoint: " + hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint)
print "RVA Number and Size: " + hex(pe.OPTIONAL_HEADER.NumberOfRvaAndSizes)
print "Number of Sections within PE: " + hex(pe.FILE_HEADER.NumberOfSections)
for section in pe.sections:
print 'Section Name: ' + (section.Name)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Parsing XML attrib with Python, ' and " character spliting - python

Related

Why do I have different api responses from Openweathermap JSON and PyOWM library?

Arcmap script will not print messages in arcmap console

Websphere 9X unable to deploy ear file via jython script

Parsing JSON in Python

How do I find the offset and signature of the PE header in python?

Categories

Resources