Problem with encoding in Python - python

I'm fairly new to python, and I'm with some problems with encoding.
Please see the code:
# -*- coding: utf-8 -*-
import config # Ficheiro de configuracao
import twitter
import random
import sqlite3
import time
import bitly_api #https://github.com/bitly/bitly-api-python
import feedparser
class TwitterC:
def logToDatabase(self, tweet, timestamp):
# Will log to the database
database = sqlite3.connect('database.db') # Create a database file
cursor = database.cursor() # Create a cursor
cursor.execute("CREATE TABLE IF NOT EXISTS twitter(id_tweet INTEGER AUTO_INCREMENT PRIMARY KEY, tweet TEXT, timestamp TEXT);") # Make a table
# Assign the values for the insert into
msg_ins = tweet
timestamp_ins = timestamp
values = [msg_ins, timestamp_ins]
# Insert data into the table
cursor.execute("INSERT INTO twitter(tweet, timestamp) VALUES(?, ?)", values)
database.commit() # Save our changes
database.close() # Close the connection to the database
def shortUrl(self, url):
bit = bitly_api.Connection(config.bitly_username, config.bitly_key) # Instanciar a API
return bit.shorten(url) # Encurtar o URL
def updateTwitterStatus(self, update):
short = self.shortUrl(update["url"]) # Vou encurtar o URL
update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
# I will see how much characters have the message, if more than 140, delete some chars
length_message = len(update_str)
if length_message > 140:
length_url = len(short['url'])
count_message = 136 - length_url
shorten_msg = update["msg"][0:count_message] + '... '
update_str = shorten_msg + short['url']
# Will post to twitter and print the posted text
api = twitter.Api(consumer_key=config.consumer_key,
consumer_secret=config.consumer_secret,
access_token_key=config.access_token_key,
access_token_secret=config.access_token_secret)
status = api.PostUpdate(update_str) # Fazer o update
msg = status.text # Vou gravar o texto enviado para a variavel 'msg'
# Vou gravar p a Base de Dados
self.logToDatabase(msg, time.time())
print msg # So p mostrar o texto enviado. Comentar esta linha de futuro.
# Exemplo base
#x = TwitterC()
#x.updateTwitterStatus({"url": "http://xyz.com/?cat=28", "msg": "Some tips about PostgreSQL Administration?"})
# Solucao para um misto de feeds e frases feitas
# Vou escolher uma fonte ao acaso
p = range(2) # tem o 0 e o 1
p = random.choice(p)
if p == 0: # Escolhe TEXT UPDATES
# Vou escolher um text update ao acaso
text_a_enviar = random.choice(config.text_updates)
update_to_send = text_a_enviar
elif p == 1: # Escolhe FEEDS UPDATES
'''# Vou escolher um feed ao acaso
feed_a_enviar = random.choice(config.feeds_updates)
# Vou apanhar o conteudo do feed
d = feedparser.parse(feed_a_enviar["feedurl"])
# Vou definir quantos feeds quero ter no i
i = range(8)
# Vou meter para "updates" 10 entradas do feed
updates = []
for i in range(8):
updates.append([{"url": feed_a_enviar["linktoourpage"], "msg": d.entries[i].summary + ", "}])
# Vou escolher ums entrada ao acaso
update_to_send = random.choice(updates)'''
# Vou postar p o Twitter
x = TwitterC()
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})
The code have some lines but the problem is in this line:
x.updateTwitterStatus({"url": "http://xyz.com/", "msg": "favoritos à distancia"})
This line have a character with an accent "à" and this causes the problem here:
def updateTwitterStatus(self, update):
short = self.shortUrl(update["url"]) # Vou encurtar o URL
update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
...
More precisely in this line:
update_str = update["msg"] + " " + short['url'] # Mensagem em bruto, sem tratamento de contagem de caracteres
The output of the error is this:
x.updateTwitterStatus({"url": "http://xyz.com", "msg": "favoritos à distancia"})
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 48: ordinal not in range(128)
Any clues on how to solve this?

Try adding from __future__ import unicode_literals at the top if your file. Alternatively you can prefix every string with a ´u´, ie u"favoritos à distancia"
Make sure your file is actually saved as utf-8 too!

Related

PYSIMPLEGUI - Window not working properly

I'm creating a GUI and I had an issue. My GUI is supposed to get the user's ID and then show him a button to direct him to another window with some tutorials. If the user has already watched all the tutorials, an input box will appear asking if he wants to watch any tutorial again, if he refuses it (by typing "N") the window will be cleared for another user put his ID.
The code is working, but I noticed that when the user has already watched all tutorials and then refuses to watch again, if the next user has already watched all tutorials too the code doesn't work.
Notice that the input box clears it self, when it was supposed to make the same thing it did in the first picture.
Some curious fact is that if i put the ID of some user that didn't finish the tutorials, it will work fine. And if I put the ID of someone who finished, after puting the ID of someone who didn't, it will work again.
Here is the code, could somone help me?
import pandas as pd
import os
import PySimpleGUI as sg
from window3 import window3
from window2 import window2
from window4 import window4
users_path = r"D:\Users\raulc\Documents\AMBIENTES\TESTE\21 - Ilha de treinamento\Users.csv"
watched_videos_path = r"D:\Users\raulc\Documents\AMBIENTES\TESTE\21 - Ilha de treinamento\watched_videos.csv"
# caminho para a pasta dos videos da 111
path_111 = "D:\\Users\\raulc\\Documents\\AMBIENTES\\videos\\111"
# caminho para a pasta dos videos da 113
path_113 = "D:\\Users\\raulc\\Documents\\AMBIENTES\\videos\\113"
# variavel que armazena o nome dos arquivos que estao na 111
p111 = os.listdir(path_111)
# variavel que armazena o nome dos arquivos que estao na 113
p113 = os.listdir(path_113)
# verifica se o usuario ja esta registrado no banco de dados
def is_registered(ID): #OK
df = read_any_csv(users_path)
x = df.loc[df["ID"] == ID]
if x.empty:
return False
else:
return True
# faz o cadastro do usuario
def register(ID,name,section): # OK
global users_df # need it to add to external variable
# com esse "global" nao precisa ler o csv de novo
data = pd.DataFrame({'NAME': [name], 'ID': [ID], 'SECTION': [section]})
users_df = pd.concat([users_df, data]) # <-- add to original `users_df`
users_df.to_csv(r"D:\Users\raulc\Documents\AMBIENTES\TESTE\21 - Ilha de treinamento\Users.csv", index=False)
# mostrar os videos ja assistidos
def already_watched(ID): #OK
df = read_any_csv(watched_videos_path)
videos = df.loc[df["ID"] == ID,"LINK"]
return videos
# retorna os links que nao foram assistidos ainda
def videos_to_watch(section,ID): #OK
list_already_watched = already_watched(ID).tolist()
if section == 111:
list_to_watch = set(p111)-set(list_already_watched)
elif section == 113:
list_to_watch = set(p113)-set(list_already_watched)
elif section == 000:
list_to_watch = []
return list(list_to_watch)
# retorna setor do usuario
def current_user_section(df,ID):
current_user = df.loc[df["ID"] == ID]
section = int(current_user["SECTION"])
return section
# retorna se a lista de videos a serem assistidos esta vazia ou nao
def is_list_empty(list,section,ID):
list = videos_to_watch(section,ID)
length = len(list)
if length == 0:
return True
else:
return False
# retorna o nome do usuario
def current_user_name(df,ID):
current_user = df.loc[df["ID"] == ID]
name = current_user["NAME"].to_string(index = False)
return name
# retorna os dados do usuario -> nao da para ser essa funcao direto, pois ai o python nao consegue converte o section para int
def current_user_data(df,ID):
name = current_user_name(df,ID)
section = current_user_section(df,ID)
return name,section
# faz a funcao read_csv do pandas
def read_any_csv(path): #OK
df = pd.read_csv(path)
return df
def window():
global users_df
# Definindo o fundo da tela como preto
sg.theme('Black')
# Declarando o logo da mercedes
myImg = sg.Image(filename='logo_meca_pret.png',size=(200,200))
# Declarando os outputs
output = sg.Text(font=("Arial",20),key="output")
output2 = sg.Text(font=("Arial",20),key="output2")
output3 = sg.Text(font=("Arial",20),key="output3")
layout = [
[myImg,sg.Text('PROGRAMA DE TREINAMENTOS',font=("Arial",60),justification="center")],
[sg.Text("Passe o cracha no leitor: ",font=("Arial",20)),sg.InputText(size=(60),key="ID")],
[sg.Text("Escreva seu nome: ",font=("Arial",20),visible=False,key="NAMETEXT"),sg.InputText(size=(60),visible=False,key="NAME")],
[sg.Text("Digite seu setor(111/112/113): ",font=("Arial",20),visible=False,key="SECTIONTEXT"),sg.Input(size=(5),visible=False,key="SECTION")],
[sg.Button('SubmitData', visible=False)],
[output],
[output2],
[output3,sg.InputText(size=(1),key="w_a",visible=False)],
[sg.Text("CLIQUE AQUI E FECHE A JANELA",font=("Arial",20),visible=False,key="BOTAOERROR1"),sg.Text("CLIQUE NO BOTAO PARA ABRIR AS TELAS DOS TUTORIAIS",font=("Arial",20),visible=False,key="BOTAOW3"),sg.Button("W3",visible=False)],
[sg.Text("CLIQUE NO BOTAO PARA ASSISTIR ALGUM TUTORIAL NOVAMENTE",font=("Arial",20),visible=False,key="BOTAOW5"),sg.Button("W5",size=(5),visible=False)],
[sg.Button('Submit', visible=False, bind_return_key=True)],
#[sg.Button("ERROR1",visible=False)],
]
window = sg.Window('PROGRAMA DE TREINAMENTOS MERCEDES BENZ', layout,element_justification="center").Finalize()
window.Maximize()
while True:
event, values = window.read()
if event == sg.WIN_CLOSED or event == 'Cancel': # if user closes window or clicks cancel
break
#print('You entered ', values[0])
if event == 'Submit':
ID = values["ID"]
ID = ID.upper()
if is_registered(ID) == True:
if ID == "X":
name,section = current_user_data(users_df,ID)
window["BOTAOERROR1"].update(visible=True)
#window["ERROR1"].update(visible=True)
window["W3"].update(visible=True)
else:
name,section = current_user_data(users_df,ID)
output.update(f"Ola, {name}, bem vindo ao programa de treinamento Mercedes Benz Brasil!\n")
videos = videos_to_watch(section,ID)
if is_list_empty(videos,section,ID) == True:
output2.update("Nao ha novos tutoriais disponiveis.")
output3.update("Deseja assistir algum tutorial novamente (S/N)?")
window['w_a'].update(visible = True)
w_a = values["w_a"]
if w_a == "s" or w_a == "S":
# abre a tela com todos os tutoriais da pasta daquela secao
window2(ID,section)
window.find_element("ID").update("")
window.find_element("output").update("")
window.find_element("output2").update("")
window.find_element("output3").update("")
window.find_element("w_a").update("")
window['w_a'].update(visible = False)
if w_a == "n" or w_a == "N":
# usa esses comandos para limpar a tela, para que um novo usuario use
window.find_element("ID").update("")
window.find_element("output").update("")
window.find_element("output2").update("")
window.find_element("output3").update("")
window['w_a'].update(visible = False) # deixa o input do w_a invisivel de novo
else:
# se tiverem videos a serem assistidos abrir a WINDOW3
window["BOTAOW3"].update(visible = True)
window["W3"].update(visible = True)
if section == 113:
folder = p113
elif section == 111:
folder = p111
if len(videos_to_watch(section,ID)) != len(folder):
window["BOTAOW5"].update(visible = True)
window["W5"].update(visible=True)
else:
window["NAMETEXT"].update(visible = True)
window["NAME"].update(visible = True)
window["SECTIONTEXT"].update(visible = True)
window["SECTION"].update(visible = True)
window["SubmitData"].update(visible = True)
if event == "W5":
window2(ID,section)
# if event == "ERROR1":
# window3(ID,section)
# window.find_element("ID").update("")
# window['BOTAOERROR1'].update(visible=False)
# window['ERROR1'].update(visible=False)
if event == 'SubmitData' :
name = values["NAME"]
name = name.title()
section = values["SECTION"]
output.update(f"Ola, {name}, bem vindo ao programa de treinamento Mercedes Benz Brasil!\n")
register(ID,name,section)
users_df = pd.read_csv(users_path)
window["BOTAOW3"].update(visible = True)
window["W3"].update(visible = True)
if event == "W3":
window3(ID,section)
window.find_element("ID").update("")
window.find_element("output").update("")
window.find_element("output2").update("")
window.find_element("output3").update("")
window.find_element("w_a").update("")
window["W3"].update(visible = False)
window["BOTAOW3"].update(visible = False)
window["NAME"].update(visible = False)
window["SECTION"].update(visible = False)
window["NAMETEXT"].update(visible = False)
window["SECTIONTEXT"].update(visible = False)
window["SubmitData"].update(visible = False)
window["W5"].update(visible = False)
window["BOTAOW5"].update(visible = False)
window["BOTAOERROR1"].update(visible=False)
window.close()
users_df = pd.read_csv(users_path)
watched_videos_df = pd.read_csv(watched_videos_path)
window()

How to register and than search a value in CSV file?

I'm trying to register an user and then read the data I just appended. It looks like the csv file is appending the value, but it's not saving it. But this is awkward, because I used the "with open" function at the "registered" function.
def is_registered(ID): #OK
df = read_any_csv(users_path)
x = df.loc[df["ID"] == ID]
if x.empty:
return False
else:
return True
#faz o cadastro do usuario
def register(ID): #OK
x = str(input("Escreva seu nome: "))
name = x.title()
section = int(input("Digite seu setor(111/112/113): "))
data = [name,ID,section]
with open (users_path,'a') as file:
writer = csv.writer(file)
writer.writerow(data)
def start():
#Se o usuario estiver registrado, da as boas vindas a ele, caso nao, registra ele e depois da as boas vindas
if is_registered(ID) == True: #OK
current_user = users_df.loc[users_df["ID"] == ID]
name = current_user["NAME"]
name2 =(name.to_string(index=False))
section = current_user["SECTION"]
print(f"Ola, {name2}, bem vindo ao programa de treinamento Mercedes Benz Brasil!\n")
videos = videos_to_watch(section)
print("Esses sao os videos que faltam serem assistidos:\n")
print(*videos,sep = '\n')
else: #OK
register(ID)
users_df.to_csv("Users.csv",index = False)
current_user = users_df.loc[users_df["ID"] == ID]
print(current_user)
But the csv can't find the data, the result that I got is that:
Digite o ID: aaaaa
Escreva seu nome: leo
Digite seu setor(111/112/113): 113
Empty DataFrame
Columns: [NAME, ID, SECTION]
Index: []
What I actually want is:
Digite o ID: 5DBEF04B
Escreva seu nome: Raul Lopes Camina
Digite seu setor(111/112/113): 113
Ola, Raul Lopes Camina, bem vindo ao programa de treinamento Mercedes Benz Brasil!
You write new data to file but this can't change original users_df and you would have to read it again (instead of write again)
register(ID)
users_df = pd.read_csv("Users.csv") # <-- read instead of write
current_user = users_df.loc[users_df["ID"] == ID]
print(current_user)
Or you should first add to users_df and later save it with to_csv - and then you don't have to read it again.
def register(ID): # OK
global users_df # need it to add to external variable
name = str(input("Escreva seu nome: "))
name = name.title()
section = int(input("Digite seu setor(111/112/113): "))
data = pd.DataFrame({'NAME': [name], 'ID': [ID], 'SECTION': [section]})
users_df = pd.concat([users_df, data]) # <-- add to original `users_df`
users_df.to_csv("Users.csv", index=False)
and later
register(ID)
# without `to_csv()` and `read_csv()`
current_user = users_df.loc[users_df["ID"] == ID]
print(current_user)

How to extract a substring using this regex pattern? It's give a ValueError: too many values to unpack (expected 1)

import re, random, os, datetime, time
from os import remove
from unicodedata import normalize
from glob import glob
def learn_in_real_time(input_text, text):
#Quita acentos y demas diacríticos excepto la ñ
input_text = re.sub(
r"([^n\u0300-\u036f]|n(?!\u0303(?![\u0300-\u036f])))[\u0300-\u036f]+", r"\1",
normalize("NFD", input_text), 0, re.I
)
input_text = normalize( 'NFC', input_text) # -> NFC
input_text_to_check = input_text.lower() #Convierte a minuscula todo
words = []
words_associations = []
regex_what_who = r"(.*)\¿?(que sabes|que sabias|que sabrias|que te referis|que te refieres|que te referias|que te habias referido|que habias referido|a que|que|quienes|quien)\s*(con que|con lo que|con la que|con|acerca de que|acerca de quienes|acerca de quien|sobre de que|sobre que|sobre de quienes|sobre quienes|sobre de quien|sobre quien|)\s*(son|sean|es|serian|seria)\s*(iguales|igual|similares|similar|parecidos|parecido|comparables|comparable|asociables|asociable|distinguibles|distinguible|distintos|distinto|diferentes|diferente|diferenciables|diferenciable|)\s*(a |del |de |)\s*((?:\w+\s*)+)?"
l = re.search(regex_what_who, input_text_to_check, re.IGNORECASE) #Con esto valido la regex haber si entra o no en el bloque de code
if l:
#print("C")
association, = l.groups()
association = association.strip()
association_check = association + "\n" #Uso estas para las comparaciones, ya que sino las consideraria erroneamente como palabras que no estan en la lista solo por no tener el \n
return text
return text
I need it to extract the word that is in ((?: \ W + \ s *) +) and save it to a variable as a string, but the problem is that it gives me this error:
Traceback (most recent call last):
File "answer_about_learned_in_txt.py", line 106, in <module>
print(learn_in_real_time(input_t, text))
File "answer_about_learned_in_txt.py", line 72, in learn_in_real_time
association, = l.groups()
ValueError: too many values to unpack (expected 1)
How do I extract all what is in ((?: \ W + \ s *) +), and save it in a variable?
Taking advantage now that I ask how I would do to:
a) to extract everything that is in ((?: \ W + \ s *) +) and if there are blank spaces that it does not cut and save everything, for example: "Hello, how are you?"
b) to extract everything that is in ((?: \ W + \ s *) +) but to save up to the first white space, for example: "Hello"
I have the problem that if I put the following, position 6 of the tuple does not catch me
if l:
#print("C")
#association, = l.groups()
print(l.groups())
association, _temp = l.group(6)
And it gives me this error
File "answer_about_learned_in_txt.py", line 74, in learn_in_real_time
association, _temp = l.group(6)
ValueError: not enough values to unpack (expected 2, got 0)
In the end I was able to solve it with the following
If you enter
Que son los cometas
print (l.groups ())
('', 'que', '', 'son', '', '', 'los cometas')
I'm interested in the seventh position of the tuple, counting from 1
association = l.group (7)
And this give me :
'los cometas'
let's update patterns string to a logical view and follow main feature.
regex_what_who = r"(que sabes|que sabias|que sabrias|que te referis|que te refieres|que te referias|que te habias referido|que habias referido|a que|que|quienes|quien|con que|con lo que|con la que|con|acerca de que|acerca de quienes|acerca de quien|sobre de que|sobre que|sobre de quienes|sobre quienes|sobre de quien|sobre quien|son|sean|es|serian|seria|iguales|igual|similares|similar|parecidos|parecido|comparables|comparable|asociables|asociable|distinguibles|distinguible|distintos|distinto|diferentes|diferente|diferenciables|diferenciable).*(a|del|de)\s*((?:\w+\s*)+)?"
then, fix error first error in case if we got one result or many:
association, _temp = l.groups()
It Work's! -)

python - Change one-row table value

I'm developing a script for a tables. I had a problem, that the table came with decimal values ​​and I'm trying to make a change of these values ​​in the code.
To do that, just take the decimal value and multiply it by 1000, but there's a problem that I really can't understand. It works up to a certain number, then the values ​​of some
rows of the table change and the result at the end is of immense value. I don't know what's going on, someone can give me a hand.
Put the entire code of the function
def DESTINOS_CLUBE_CLIENTE_MOD072():
# Caminho do arquivo xlsx # Sheet_name Nome da tabela ou planilha. Obs: Tem que ser EXATAMENTE como está escrito no excel.
table = pd.read_excel('./planilha.xlsx', sheet_name='LP')
oneCard = ""
# Remoção de linhas que não contém valores
for i in range(table.shape[0]):
if pd.isna(table[1][i]) == True:
table = table.drop(labels=i, axis=0)
table.reset_index(inplace=True, drop=True)
# Criar nova tabela
table2 = pd.DataFrame(table)
# Resetar o index da nova tabela
table2.reset_index(inplace=True, drop=True)
for i in range(8):
print(table2[1][i])
valor2 = int(table2[1][i] * 1000)
table2[1] = table2[1].replace(table2[1][i], valor2)
print("------")
# table2 = table2.astype({1 : 'int32'})
# table2 = table2.astype({'1.1': 'int32'})
print(table2)
# Pegando os valores da tabela e inserindo em seus determinados campos
for i in range(table2.shape[0]):
values = 'element{}.querySelector("{}").value="{}"; element{}.querySelector("{}").value="{}"; element{}.querySelector("{}").value="{}"; element{}.querySelector("{}").value="{}"; element{}.querySelector("{}").value="{}";' . format(i, "input[name*='Origem']", table2['ORIGEM'][i], i, "input[name*='Destino']", table2['DESTINOS'][i], i, "input[name*='Clube_valor']", table2[1][i], i, "input[name*='Geral_valor']", table2['1.1'][i], i, "input[name*='Link_botao']", table2['LINKS'][i])
oneCard += " let element"+str(i)+" = document.querySelectorAll('[data-fieldname=Itens]')"+str([i])+"; setTimeout(()=>{"+values+"},10000);"
# Código completo
cod = "let totalElements = document.querySelectorAll('[data-fieldname=Itens]').length == "+str(table2.shape[0])+" ? 0 : "+str(table2.shape[0])+" - document.querySelectorAll('[data-fieldname=Itens]').length; let element = document.querySelector('[data-fieldname=Itens]'); for(i = 0; i < totalElements; i++){element.children[13].click()} setTimeout(() => {"+oneCard+"}, 10000); setTimeout(() => {console.log('Pronto, pode publicar!')}, 20050);"
# Inserir o código dentro de um arquivo de texto
file(cod)
# Mensagem de sucesso
message()
DESTINOS_CLUBE_CLIENTE_MOD072()
the problem was that I was just passing the value, so it got all the equal values ​​from the table. So I used loc, to select the line
for i in range(table2.shape[0]):
valor2 = int(table2[1][i] * 1000)
table2.loc[i] = table2.loc[i].replace(table2.loc[i][1], valor2)

Python XML DOM collecting elements data

I was trying to retrieve some info of an XML tag with Python. My goal is to have a dictionary which saves for each situation tag id, all child data, but I don't know how to deal with the fact that extract data from text nodes.
My code:
from xml.dom.minidom import *
import requests
print("GETTING XML...")
resp = requests.get('http://infocar.dgt.es/datex2/dgt/SituationPublication/all/content.xml', stream = True) #XML that I need
if resp.status_code != 200:
raise ApiError('GET /tasks/ {}'.format(resp.status_code))
print("XML RECIBIDO 200 OK")
#resp.raw.decode_content = True
print("GUARDANDO XML")
with open("DGT_DATEX.xml", "wb") as handle:
for data in (resp.iter_content()):
handle.write(data)
print("XML GUARDADO")
print("INICIANDO PARSEO..")
dom3 = parse("DGT_DATEX.xml")
print(dom3)#memory dir
print("DATEX PARSEADO")
def getText(nodelist):
dict = {}
listofdata = list()
for node in nodelistofPayloadTag:
if node.nodeType != node.TEXT_NODE:
dict[node.getAttribute('id')] = listofdata
listofdata = goDeep(node.childNodes ,listofdata)
print(str.format("El diccionario antes de ser retornado es {0}", dict))
return dict
def goDeep(childsOfElement, l):
for i in childsOfElement:
if i.nodeType != i.TEXT_NODE:
goDeep(i.childNodes, l)
else:
l.append(i.data)
return l
def getSituation(payloadTag):
getText(payloadTag.childNodes)
def getPayLoad(dom):
print(str.format("Tag to be processed:{0}",dom.getElementsByTagNameNS('*', 'payloadPublication')[0]))
getSituation(dom.getElementsByTagNameNS('*', 'payloadPublication')[0])
print(str.format("Verificando que el dato retornado es un diccionario, {0}, y contiene {1}", type(getPayLoad(dom3)), getPayLoad(dom3)))
I came to this code, is it what you were looking for?
def getText(element):
return element.data.encode('utf-8').strip()
def getPayLoad(dom):
attrs = ['confidentiality', 'informationStatus', 'situationRecordCreationReference', 'situationRecordCreationTime', 'situationRecordVersion', 'situationRecordVersionTime', 'situationRecordFirstSupplierVersionTime', 'probabilityOfOccurrence', 'sourceCountry', 'sourceIdentification', 'validityStatus', 'overallStartTime', 'overallEndTime', 'impactOnTraffic', 'locationDescriptor', 'tpegDirection', 'latitude', 'longitude', 'tpegDescriptorType', 'from']
for index, node in enumerate(dom.getElementsByTagNameNS('*', 'situation'), 1):
print("\nSituation ID: {0} numero {1}".format(getAttributeID(node), index))
for attr in attrs:
key = node.getElementsByTagNameNS('*', attr)
if key:
value = getText(key[0].firstChild)
if value:
print('{0}: {1}'.format(attr, value))
Here is the way which allow me to collect data from childs, thanks
import xml.etree.ElementTree as ET
from xml.dom.minidom import *
import requests
print("GETTING XML...")
resp = requests.get('http://infocar.dgt.es/datex2/dgt/SituationPublication/all/content.xml', stream = True) #XML that I need
if resp.status_code != 200:
raise ApiError('GET /tasks/ {}'.format(resp.status_code))
print("XML RECIBIDO 200 OK")
#resp.raw.decode_content = True
print("GUARDANDO XML")
with open("DGT_DATEX.xml", "wb") as handle:
for data in (resp.iter_content()):
handle.write(data)
print("XML GUARDADO")
print("INICIANDO PARSEO..")
dom3 = parse("DGT_DATEX.xml")
print(dom3)#memory dir
print("DATEX PARSEADO")
def getAttributeID(element):
return element.getAttribute('id')
def getText(element):
return element.data
def getPayLoad(dom):
dict = {}
index = 1 #esto sirve para relacionar los atributos con el situation que les corresponde
indexRecord = 1 #esto sirve para relacionar los atributos con el situationRecord que les corresponde
for i in dom.getElementsByTagNameNS('*', 'situation'):
#Por cada situation del XML vamos a sacar el situation id y todos los campos que pertecen a este de la siguiente manera
print(str.format("Situation ID: {0} numero {1}", getAttributeID(i), index))
print(getText(dom.getElementsByTagNameNS('*','confidentiality')[index].firstChild))#por ejemplo aquí, se coge el first text de la lista de atributos confidentiality dado el index, que nos indica la relacion con el situation
print(getText(dom.getElementsByTagNameNS('*', 'informationStatus')[index].firstChild))
for record in dom.getElementsByTagNameNS('*', 'situation')[index].childNodes:#buscamos el hijo del corespondiente situation que tenga un ID, lo que nos deveulve elsituationRecord
if record.nodeType != record.TEXT_NODE:
print(str.format("SituationRecord ID: {0} numero {1}", getAttributeID(record), indexRecord))
print(getText(dom.getElementsByTagNameNS('*', 'situationRecordCreationReference')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'situationRecordCreationTime')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'situationRecordVersion')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'situationRecordVersionTime')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'situationRecordFirstSupplierVersionTime')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'probabilityOfOccurrence')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'sourceCountry')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'sourceIdentification')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'validityStatus')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'overallStartTime')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'overallEndTime')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'impactOnTraffic')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'locationDescriptor')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'tpegDirection')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'latitude')[indexRecord].firstChild))
print(getText(dom.getElementsByTagNameNS('*', 'longitude')[indexRecord].firstChild))
print(str.format("VALUE FIELD: {0}", getText(dom.getElementsByTagNameNS('*', 'descriptor')[indexRecord].firstChild)))
indexRecord = indexRecord + 1
index = index + 1
getPayLoad(dom3)

Categories

Resources