I get this error when I run my script
AttributeError: Element instance has no attribute '__float__'
My code looks like this:
def populate():
parsedfiles = minidom.parse('C:\Users\User\Downloads\New folder\StreetTrees_ArbutusRidge.xml')
treelist = parsedfiles.getElementsByTagName('StreetTree')
for alltrees in treelist:
treeId = alltrees.getAttribute('TreeID')
neighbourhood = alltrees.getElementsByTagName('NeighbourhoodName')
commonName = alltrees.getElementsByTagName('CommonName')
diameter = alltrees.getElementsByTagName('Diameter')[0]
diameter = float(diameter)
streetNumber = alltrees.getElementsByTagName('CivicNumber')
street = alltrees.getElementsByTagName('StdStreet')
lat = 0
lon = 0
add_tree(treeId=treeId, neighbourhood=neighbourhood, commonName=commonName,
diameter=diameter, streetNumber=streetNumber, street=street, lat=0, lon=0)
I think I'm misinterpreting the diameter but I don't know how to fix it.
diameter is a DOM Element:
diameter = alltrees.getElementsByTagName('Diameter')[0]
It is not directly convertable to a float, because that's not textual data; you probably want the text contained in the element:
diameter = alltrees.getElementsByTagName('Diameter')[0]
diameter = float(diameter.firstChild.nodeValue)
Note that the W3C DOM is rather a pain to work with; you may want to look into the more Pythonic ElementTree API instead:
parsedfiles = etree.parse(r'C:\Users\User\Downloads\New folder\StreetTrees_ArbutusRidge.xml')
for alltrees in parsedfiles.findall('.//StreetTree'):
treeId = alltrees. attrib['TreeID']
neighbourhood = alltrees.findall('NeighbourhoodName')
commonName = alltrees.findall('CommonName')
diameter = float(alltrees.find('Diameter').text)
streetNumber = alltrees.findall('CivicNumber')
street = alltrees.findall('StdStreet')
add_tree(treeId=treeId, neighbourhood=neighbourhood, commonName=commonName,
diameter=diameter, streetNumber=streetNumber, street=street,
lat=0, lon=0)
where I am assuming that the various elements you are looking for are directly contained in the <StreetTree> element.
Related
I'm try to merge every span to a row use pymupdf
import fitz
with fitz.open("0003001v1.pdf") as doc:
page = doc[0]
dict = page.get_text("dict")
if "blocks" in dict:
blocks = dict["blocks"]
fixed_blocks = dict["blocks"]
for block in blocks:
print("--------------------------".strip())
print("block: ", str(block["bbox"]).replace("(","[").replace(")","]"))
print("")
if "lines" in block.keys():
lines = block["lines"]
for line in lines:
if "spans" in line.keys():
spans = line["spans"]
for span in spans:
fixed_line_bbox = []
fixed_line_text = []
line_text = span["text"]
line_bbox = span["bbox"]
line_bbox_x_0 = line_bbox[0]
line_bbox_y_0 = line_bbox[1]
line_bbox_x_1 = line_bbox[2]
line_bbox_y_1 = line_bbox[3]
print("row:" + str(line_bbox).replace("(","[").replace(")","]") + "\t" + line_text)
the output will be:
block: [71.99899291992188, 630.993408203125, 502.38116455078125, 700.308837890625]
row:[71.99905395507812, 630.993408203125, 502.36865234375, 642.9486083984375] and look for the explicit form of the function Φ from the experimental data on the
row:[71.99905395507812, 645.2735595703125, 107.62599182128906, 657.228759765625] system
row:[107.62599182128906, 645.2735595703125, 119.32400512695312, 657.228759765625] S
row:[120.1189956665039, 645.2735595703125, 502.3509826660156, 657.228759765625] . However, the function Φ may depend on time, it means that there are
row:[71.99899291992188, 659.673583984375, 344.1631774902344, 671.6287841796875] some hidden parameters, which control the system
row:[344.1631774902344, 659.673583984375, 356.683837890625, 671.6287841796875] S
row:[356.683837890625, 659.673583984375, 502.38116455078125, 671.6287841796875] and its evolution is of the
row:[71.99899291992188, 673.95361328125, 96.2470474243164, 685.9088134765625] form
row:[257.9989929199219, 688.3536376953125, 261.3225402832031, 700.308837890625] ˙
row:[254.6388397216797, 688.1612548828125, 262.4575500488281, 700.116455078125] ϕ
row:[262.4575500488281, 688.3536376953125, 291.689697265625, 700.308837890625] = Φ(
row:[291.71893310546875, 688.1612548828125, 311.758056640625, 700.116455078125] ϕ, u
row:[311.75872802734375, 688.3536376953125, 316.4093017578125, 700.308837890625] )
row:[316.4388122558594, 688.1612548828125, 319.7623596191406, 700.116455078125] ,
how could I merge spans text and coordinates which in a single lines and get the fixed line coordinates and texts.
Im working with nested JSON data with Pandas, but i have a problem once i extract the dataframe of the nested data.
The data looks like:
[{"export_id":"COL-EXP-1894","origin_office":"EXAMPLE","destination_office":"","incoterms":"","shipment_date":"","export_date":"2023-01-01","origin_port":"Buenaventura","destination_port":"New York/New Jersey","bl_number":null,"shipping_line":null,"shipping_mode":null,"vessel_name":null,"voyage_number":null,"reservation_number":null,"container_number":null,"seal_number":null,"eta":null,"etd":null,"export_status":"in_progress","ico_list":\[\]}\]
And reading like that all is good, but some data have ico_list like:
[{"export_id":"COL-EXP-1894","origin_office":"EXAMPLE","destination_office":"","incoterms":"","shipment_date":"","export_date":"2023-01-01","origin_port":"Buenaventura","destination_port":"New York/New Jersey","bl_number":null,"shipping_line":null,"shipping_mode":null,"vessel_name":null,"voyage_number":null,"reservation_number":null,"container_number":null,"seal_number":null,"eta":null,"etd":null,"export_status":"in_progress","ico_list":[{"ico_id":"03-0178-436-23","contract_id":"CI-1046","customer":null,"origin_office":"example","destination_office":"example","incoterm":"CIF","quality":"ML","mark":"example","packaging_type":"Nitrogen-Flushed Vac-Packed Boxes - 35KG","packaging_capacity":35.0,"units":1,"quantity":35.0,"certification":null}]}]
And not just one like the example, can be more, so i implemented this:
if response.status_code == 200:
data_str = response.text
try:
atlas_api_data = json.loads(data_str)
df_atlas = pd.json_normalize(atlas_api_data)
#print(df_atlas)
except:
print('ErrorOccured While Parsing JSON ATLAS API TO Dataframe')
df_atlas2 = pd.json_normalize(df_atlas['ico_list'].loc[95])
for i, row in df_atlas.iterrows():
export_id = row['export_id']
origin_office = row['origin_office']
destination_office = row['destination_office']
export_date = row['export_date']
origin_port = row['origin_port']
destination_port = row['destination_port']
bl_number = row['bl_number']
shipping_line = row['shipping_line']
shipping_mode = row['shipping_mode']
vessel_name = row['vessel_name']
voyage_number = row['voyage_number']
reservation_number = row['reservation_number']
container_number = row['container_number']
seal_number = row['seal_number']
export_status = row['export_status']
values = [export_id,origin_office,destination_office,export_date,origin_port,destination_port,
bl_number,shipping_line,shipping_mode,vessel_name,voyage_number,reservation_number,container_number,
seal_number,export_status]
data_list.append(values)
df_atlas2 = pd.json_normalize(df_atlas['ico_list'].loc[i])
if df_atlas2.empty:
print('Empty DF')
else:
for row_ico, j in df_atlas2.iterrows():
ico_id = row_ico['ico_id']
contract_id = row_ico['contract_id']
customer = row_ico['customer']
incoterm = row_ico['incoterm']
quality = row_ico['quality']
mark = row_ico['mark']
packaging_type = row_ico['packaging_type']
packaging_capacity = row_ico['packaging_capacity']
units = row_ico['units']
quantity = row_ico['quantity']
certification = row_ico['certification']
ico_values = [export_id,ico_id,contract_id,customer,incoterm,quality,mark,packaging_type,packaging_capacity,units,quantity,certification]
data_ico_list.append(ico_values)
In this way i extract only the data that i need, and for the first level worked, but when i go to the second iterrows() it says
TypeError Traceback (most recent call last)
Cell In [4], line 43
41 else:
42 for row_ico, j in df_atlas2.iterrows():
---> 43 ico_id = row_ico['ico_id']
44 contract_id = row_ico['contract_id']
45 customer = row_ico['customer']
TypeError: 'int' object is not subscriptable
When printing the df_atlas2 it looks normal, like this:
variable: df_atlas2 before goes into iterrrows()
I tried using df_atlas2['ico_id'].astype(str) with all the columns and ico_id = str(row_ico['ico_id']) but still getting the message
If you know how to solve this, hundred thanks!
I am trying to parse a very large XML file which I downloaded from OSHA's website and convert it into a CSV so I can use it in a SQLite database along with some other spreadsheets. I would just use an online converter, but the osha file is apparently too big for all of them.
I wrote a script in Python which looks like this:
import csv
import xml.etree.cElementTree as ET
tree = ET.parse('data.xml')
root = tree.getroot()
xml_data_to_csv =open('Out.csv', 'w')
list_head=[]
Csv_writer=csv.writer(xml_data_to_csv)
count=0
for element in root.findall('data'):
List_nodes =[]
if count== 0:
inspection_number = element.find('inspection_number').tag
list_head.append(inspection_number)
establishment_name = element.find('establishment_name').tag
list_head.append(establishment_name)
city = element.find('city')
list_head.append(city)
state = element.find('state')
list_head.append(state)
zip_code = element.find('zip_code')
list_head.append(zip_code)
sic_code = element.find('sic_code')
list_head.append(sic_code)
naics_code = element.find('naics_code')
list_head.append(naics_code)
sampling_number = element.find('sampling_number')
list_head.append(sampling_number)
office_id = element.find('office_id')
list_head.append(office_id)
date_sampled = element.find('date_sampled')
list_head.append(date_sampled)
date_reported = element.find('date_reported')
list_head.append(date_reported)
eight_hour_twa_calc = element.find('eight_hour_twa_calc')
list_head.append(eight_hour_twa_calc)
instrument_type = element.find('instrument_type')
list_head.append(instrument_type)
lab_number = element.find('lab_number')
list_head.append(lab_number)
field_number = element.find('field_number')
list_head.append(field_number)
sample_type = element.find('sample_type')
list_head.append(sample_type)
blank_used = element.find('blank_used')
list_head.append(blank_used)
time_sampled = element.find('time_sampled')
list_head.append(time_sampled)
air_volume_sampled = element.find('air_volume_sampled')
list_head.append(air_volume_sampled)
sample_weight = element.find('sample_weight')
list_head.append(sample_weight)
imis_substance_code = element.find('imis_substance_code')
list_head.append(imis_substance_code)
substance = element.find('substance')
list_head.append(substance)
sample_result = element.find('sample_result')
list_head.append(sample_result)
unit_of_measurement = element.find('unit_of_measurement')
list_head.append(unit_of_measurement)
qualifier = element.find('qualifier')
list_head.append(qualifier)
Csv_writer.writerow(list_head)
count = +1
inspection_number = element.find('inspection_number').text
List_nodes.append(inspection_number)
establishment_name = element.find('establishment_name').text
List_nodes.append(establishment_name)
city = element.find('city').text
List_nodes.append(city)
state = element.find('state').text
List_nodes.append(state)
zip_code = element.find('zip_code').text
List_nodes.append(zip_code)
sic_code = element.find('sic_code').text
List_nodes.append(sic_code)
naics_code = element.find('naics_code').text
List_nodes.append(naics_code)
sampling_number = element.find('sampling_number').text
List_nodes.append(sampling_number)
office_id = element.find('office_id').text
List_nodes.append(office_id)
date_sampled = element.find('date_sampled').text
List_nodes.append(date_sampled)
date_reported = element.find('date_reported').text
List_nodes.append(date_reported)
eight_hour_twa_calc = element.find('eight_hour_twa_calc').text
List_nodes.append(eight_hour_twa_calc)
instrument_type = element.find('instrument_type').text
List_nodes.append(instrument_type)
lab_number = element.find('lab_number').text
List_nodes.append(lab_number)
field_number = element.find('field_number').text
List_nodes.append(field_number)
sample_type = element.find('sample_type').text
List_nodes.append(sample_type)
blank_used = element.find('blank_used').text
List_nodes.append()
time_sampled = element.find('time_sampled').text
List_nodes.append(time_sampled)
air_volume_sampled = element.find('air_volume_sampled').text
List_nodes.append(air_volume_sampled)
sample_weight = element.find('sample_weight').text
List_nodes.append(sample_weight)
imis_substance_code = element.find('imis_substance_code').text
List_nodes.append(imis_substance_code)
substance = element.find('substance').text
List_nodes.append(substance)
sample_result = element.find('sample_result').text
List_nodes.append(sample_result)
unit_of_measurement = element.find('unit_of_measurement').text
List_nodes.append(unit_of_measurement)
qualifier= element.find('qualifier').text
List_nodes.append(qualifier)
Csv_writer.writerow(List_nodes)
xml_data_to_csv.close()
But when I run the code I get a CSV with nothing in it. I suspect this may have something to do with the XSD file associated with the XML, but I'm not totally sure.
Does anyone know what the issue is here?
The code below is a 'compact' version of your code.
It assumes that the XML structure looks like in the script variable xml. (Based on https://www.osha.gov/opengov/sample_data_2011.zip)
The main difference bwtween this sample code and yours is that I define the fields that I want to collect once (see FIELDS) and I use this definition across the script.
import xml.etree.ElementTree as ET
FIELDS = ['lab_number', 'instrument_type'] # TODO add more fields
xml = '''<main xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="health_sample_data.xsd">
<DATA_RECORD>
<inspection_number>316180165</inspection_number>
<establishment_name>PROFESSIONAL ENGINEERING SERVICES, LLC.</establishment_name>
<city>EUFAULA</city>
<state>AL</state>
<zip_code>36027</zip_code>
<sic_code>1799</sic_code>
<naics_code>238990</naics_code>
<sampling_number>434866166</sampling_number>
<office_id>418600</office_id>
<date_sampled>2011-12-30</date_sampled>
<date_reported>2011-12-30</date_reported>
<eight_hour_twa_calc>N</eight_hour_twa_calc>
<instrument_type>TBD</instrument_type>
<lab_number>L13645</lab_number>
<field_number>S1</field_number>
<sample_type>B</sample_type>
<blank_used>N</blank_used>
<time_sampled></time_sampled>
<air_volume_sampled></air_volume_sampled>
<sample_weight></sample_weight>
<imis_substance_code>S777</imis_substance_code>
<substance>Soil</substance>
<sample_result>0</sample_result>
<unit_of_measurement>AAAAA</unit_of_measurement>
<qualifier></qualifier>
</DATA_RECORD>
<DATA_RECORD>
<inspection_number>315516757</inspection_number>
<establishment_name>MARGUERITE CONCRETE CO.</establishment_name>
<city>WORCESTER</city>
<state>MA</state>
<zip_code>1608</zip_code>
<sic_code>1771</sic_code>
<naics_code>238110</naics_code>
<sampling_number>423259902</sampling_number>
<office_id>112600</office_id>
<date_sampled>2011-12-30</date_sampled>
<date_reported>2011-12-30</date_reported>
<eight_hour_twa_calc>N</eight_hour_twa_calc>
<instrument_type>GRAV</instrument_type>
<lab_number>L13355</lab_number>
<field_number>9831B</field_number>
<sample_type>P</sample_type>
<blank_used>N</blank_used>
<time_sampled>184</time_sampled>
<air_volume_sampled>340.4</air_volume_sampled>
<sample_weight>.06</sample_weight>
<imis_substance_code>9135</imis_substance_code>
<substance>Particulates not otherwise regulated (Total Dust)</substance>
<sample_result>0.176</sample_result>
<unit_of_measurement>M</unit_of_measurement>
<qualifier></qualifier>
</DATA_RECORD></main>'''
root = ET.fromstring(xml)
records = root.findall('.//DATA_RECORD')
with open('out.csv', 'w') as out:
out.write(','.join(FIELDS) + '\n')
for record in records:
values = [record.find(f).text for f in FIELDS]
out.write(','.join(values) + '\n')
out.csv
lab_number,instrument_type
L13645,TBD
L13355,GRAV
Reading mouse coordinate values was successful. But I need to read the stored coordinate values through the xml.
Value was retrieved using ElementTree.
But once you've put it in an array, the shape of the coordinates is x,y, so the comma in the middle prevents integer conversion. And it's a string, so it's apostrophe on both ends, so you can't convert it.
Please advise me.
<?xml version='1.0' encoding='utf-8'?>
<DA>
<DetectionAreas>2</DetectionAreas>
<DetectArea>
<Point>0,0</Point>
<Point>1280,0</Point>
<Point>1280,720</Point>
<Point>0,720</Point>
</DetectArea>
<Loitering>
<Point>625,564</Point>
<Point>625,0</Point>
<Point>1280,0</Point>
<Point>1280,631</Point>
</Loitering>
</DA>
import xml.etree.ElementTree as ET
tree = ET.parse('./MapFile/C001101.map')
root = tree.getroot()
DetectPoint = root.getchildren()[1]
LoiteringPoint = root.getchildren()[2]
IntrusionPoint = root.getchildren()[2]
Ipointvalue = []
Lpointvalue = []
Dpointvalue = []
if DetectPoint.tag == 'DetectArea' :
for DPoint in root.findall("DetectArea/Point") :
Dpointvalue.append(DPoint.text)
if LoiteringPoint.tag == 'Loitering' :
for LPoint in root.findall("Loitering/Point") :
Lpointvalue.append(LPoint.text)
elif IntrusionPoint.tag == 'Intrusion' :
for IPoint in root.findall("Intrusion/Point") :
Ipointvalue.append(IPoint.text)
ip = len(Ipointvalue)
lp = len(Lpointvalue)
dp = len(Dpointvalue)
for i in range(dp):
Dpointvalue[i]
print(Dpointvalue[i])
for i in range(lp):
Lpointvalue[i]
print(Lpointvalue[i])
for i in range(ip):
Ipointvalue[i]
print(Ipointvalue[i])
'
'
'
def onMouseCallback(self, event, x, y, flags, idx):
if self.view_state == 'intrusion append' or self.view_state == 'loitering append' or self.view_state == 'counting append':
if event == cv2.EVENT_LBUTTONUP and flags == cv2.EVENT_FLAG_LBUTTON:
works[idx].area_tmp.append([x, y])
#print(works[idx].area_tmp)
#print(Dpointvalue)
To create a polyline
The coordinate values I wanted were x and y, but I want to ask for advice because it was recognized like this 'x,y'.
Define a 'namedtuple' named Point. This object has two int properties x and y. There is a helper method that will help you to translate the data you have (x and y as string) to a Point object
See below
from collections import namedtuple
Point = namedtuple('Point','x y')
def make_point(point_str):
parts = point_str.split(',')
return Point(int(parts[0]),int(parts[1]))
point_str = '3,4'
point = make_point(point_str)
print(point)
print(point.x)
print(point.y)
output
Point(x=3, y=4)
3
4
Now your code may look like:
...
Lpointvalue.append(make_point(LPoint.text))
...
I have an intSlidergrp to make a given number of spheres, but the method I tried gives me this error: ''int' object is not iterable' so does anyone know how to make a for-loop out of the info given in the slidergrp.
def givenLights(*args):
wantedLights = cmds.intSliderGrp( sldr2, query=True,value=lights)
print wantedLights
for item in wantedLights:
cmds.polySphere(sx=5, sy=5)
win = cmds.window(title="electrical chords", widthHeight =(300,400),
topLeftCorner= (200,350))
cmds.columnLayout(adj = True, rs=(10))
lights = 0
sldr2 = cmds.intSliderGrp( field=True, value=lights,minValue=0,maxValue=100)
btn6 = cmds.button(label="Allign given lights",command=givenLights)
cmds.showWindow(win)
I found it myself
for i in range(inp):
cmds.polySphere(sx=5, sy=5)
I didn't add the range