Generate ID Batch for each xml´s files import - python

hope I can make it clear to what I´ve been strugulling for quite some time.
I' m writing a python script that will allow me to upload a batch of XML files at once to my SQLite database.
So far, no problems. The point is I want to generate a unique ID for each XML file batch when uploading the infos to SQLite. Here are my codes:
This one reads each XML file pick up a specific value that through a string:
itemNota = 1
notas = []
for item in root.findall("./ns:NFe/ns:infNFe/ns:det", nsNFE):
remessa = (str(uuid.uuid4()))
nfe = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:ide/ns:nNF", nsNFE))
data_emissao = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:ide/ns:dhEmi", nsNFE))
data_emissao = F"{data_emissao[8:10]}/{data_emissao[5:7]}/{data_emissao[:4]}"
data_venc = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:cobr/ns:dup/ns:dVenc", nsNFE))
data_venc = F"{data_venc[8:10]}/{data_venc[5:7]}/{data_venc[:4]}"
# Dados Emitentes
chave = self.check_none(root.find("./ns:protNFe/ns:infProt/ns:chNFe", nsNFE))
cnpj_emitente = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:emit/ns:CNPJ", nsNFE))
nome_emitente = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:emit/ns:xNome", nsNFE))
ie_emitente = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:emit/ns:IE", nsNFE))
cnpj_emitente = self.format_cnpj(cnpj_emitente)
uf_emitente = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:emit/ns:enderEmit/ns:UF", nsNFE))
valorNfe = self.check_none(root.find("./ns:NFe/ns:infNFe/ns:total/ns:ICMSTot/ns:vNF", nsNFE))
data_importacao = date.today()
data_importacao = data_importacao.strftime('%d/%m/%Y')
dados = [chave, cnpj_emitente, nome_emitente, ie_emitente, uf_emitente, cnpj_destinatario, cpf_destinatario,
nome_destinatario, ie_destinatario, uf_destinatario, nfe,
data_emissao, data_venc, valorNfe, data_importacao, remessa]
notas.append(dados)
itemNota += 1
return dados
I want my ID batch to be the"remessa" string using uuid library:
remessa = (str(uuid.uuid4()))
This is the code to insert the data on the SQlite table:
def insert_data(self, full_dataset):
cursor = self.connection.cursor()
campos_tabela = ('chave', 'cnpj_emitente', 'nome_emitente', 'ie_emitente',
'uf_emitente', 'cnpj_destinatario', 'cpf_destinatario', 'nome_destinatario',
'ie_destinatario', 'uf_destinatario', 'nfe', 'data_emissao', 'data_venc',
'valorNfe', 'data_importacao', 'remessa')
columns = (str(campos_tabela)).replace("'", "")
values = ','.join(str('?' * 16))
query_insert_data = self.query_insert_data(columns, values)
cursor.execute(query_insert_data, full_dataset)
self.connection.commit()
After executing, the table stays like this:
Table for XML´s files
But I want the column "remessa" to have the same id for each row and only changing when I insert new XML´s batch files.
Does anyone had acrossed something like this?

Related

removing data from SQL table using python

I'm trying to achieve something with a function I have.
so as you can see here:
this is an SQL table with data. when I upload 2 docs I get it on doc0 and doc1 and all the others are null.
but what I want to do is if I upload only 2 docs the rest will be removed completely from the SQL table.
this is my code:
def submit_quality_dept_application(request, application_id):
n = int(request.data['length'])
application = Application.objects.get(id=application_id)
application_state = application.application_state
teaching_feedback = request.FILES['teaching-feedback']
application_state['teaching_feedback'] = teaching_feedback.name
now = datetime.now()
dt_string = now.strftime("%Y-%m-%d %H:%M:%S")
application_state['edited_time'] = dt_string
for i in range(5):
application_state[f'doc{i}'] = None
for i in range(n):
doc = request.FILES[f'doc{i}']
application_state[f'doc{i}'] = doc.name
copy_to_application_directory(doc, application.id)
copy_to_application_directory(teaching_feedback, application.id)
ApplicationStep.objects.update_or_create(
application=application, step_name=Step.STEP_7
)
Application.objects.filter(id=application_id).update(application_state=application_state)
return Response(n, status=status.HTTP_200_OK)
what should I do to achieve it?
thank you so much for your help!

Decryption not working - how to get raw data from csv/pandas - python

Below is my code for decrypting from a csv file stored on DropBox. I get the user to type in their ID, I match this with a database containing hashed values, and then I use the ID typed in to search my stored csv file for the matching row. I then place all the row values into my decryption function.
Also im aware my variable names/formatting is awful im just using this code as a prototype as of right now.
My results are being printed as such:
b'b\xebS\x1b\xc8v\xe2\xf8\xa2\\x84\x0e7M~\x1b'
b'\x01B#6i\x1b\xfc]\xc3\xca{\xd5{B\xbe!'
b'in*V7\xf3P\xa0\xb2\xc5\xd2\xb7\x1dz~\x95'
I store my key and IV so they are always the same, yet the decryption doesnt seem to work. My only thinking is perhaps my data is changed somehow when stored in a csv or pandas table etc. does anyone know what the issue would be or if the bytes can be altered when stored/imported to dataframe?
also maybe i am extracting the data from my csv to pandas incorrectly?
def login():
import sqlite3
import os.path
def decoder():
from Crypto.Cipher import AES
import hashlib
from secrets import token_bytes
cursor.execute(
'''
Select enc_key FROM Login where ID = (?);
''',
(L_ID_entry.get(), ))
row = cursor.fetchone()
if row is not None:
keys = row[0]
#design padding function for encryption
def padded_text(data_in):
while len(data_in)% 16 != 0:
data_in = data_in + b"0"
return data_in
#calling stored key from main file and reverting back to bytes
key_original = bytes.fromhex(keys)
mode = AES.MODE_CBC
#model
cipher = AES.new(key_original, mode, IV3)
#padding data
p4 = padded_text(df1.tobytes())
p5 = padded_text(df2.tobytes())
p6 = padded_text(df3.tobytes())
#decrypting data
d_fname = cipher.decrypt(p4)
d_sname = cipher.decrypt(p5)
d_email = cipher.decrypt(p6)
print(d_fname)
print(d_sname)
print(d_email)
#connecting to db
try:
conn = sqlite3.connect('login_details.db')
cursor = conn.cursor()
print("Connected to SQLite")
except sqlite3.Error as error:
print("Failure, error: ", error)
finally:
#downloading txt from dropbox and converting to dataframe to operate on
import New_user
import ast
_, res = client.files_download("/user_details/enc_logins.csv")
with io.BytesIO(res.content) as csvfile:
with open("enc_logins.csv", 'rb'):
df = pd.read_csv(csvfile, names=['ID', 'Fname', 'Sname', 'Email'], encoding= 'unicode_escape')
newdf = df[(df == L_ID_entry.get()).any(axis=1)]
print(newdf)
df1 = newdf['Fname'].to_numpy()
df2 = newdf['Sname'].to_numpy()
df3 = newdf['Email'].to_numpy()
print(df1)
print(df2)
print(df3)
csvfile.close()
decoder()

Issue with reading from txt using csV

I'm working with project and I'm stuck because when I tried to read from file nrwolek,nrwolbiz,nrwolpr instead of getting [1E, 2E, 3E, 4E, 5E], [1B,2B,3B,4B,5B], [1P, 2P, 3P] I got nrwolek = 1E, nrwolbiz = 2E, nrwolpr u 3E. It seams that it doesn't read a whole list but only elements from it. Is it a method to correct this? Or is a good solution to solve this with json? Code -reading :
import csv
from lot import DatabaseofLoty, Lot
def read_from_csv(path):
loty = []
with open(path,"r") as file_handle:
reader = csv.DictReader(file_handle)
for row in reader:
numer_lotu = row["numer_lotu"]
id_samolotu = row["id_samolotu"]
czas_lotu = row['czas_lotu']
trasa = row['trasa']
wolne_miejscaek = row['wolne_miejscaek']
wolne_miejscabiz = row['wolne_miejscabiz']
wolne_miejscapr = row['wolne_miejscapr']
bramka = row['bramka']
cenaek = row['cenaek']
cenabiz = row['cenabiz']
cenapr = row['cenapr']
nrwolek = row['nrwolek']
nrwolbiz = row['nrwolbiz']
nrwolpr = row['nrwolpr']
lot = Lot(numer_lotu, id_samolotu, czas_lotu, trasa,
wolne_miejscaek,
wolne_miejscabiz, wolne_miejscapr, bramka,
cenaek, cenabiz, cenapr)
loty.append(lot)
database = DatabaseofLoty(loty)
return database
print(read_from_csv("loty.txt"))
Text file:
numer_lotu,id_samolotu,czas_lotu,trasa,wolne_miejscaek,wolne_miejscabiz,wolne_miejscapr,bramka,cenaek,cenabiz,cenapr,nrwolek,nrwolbiz,nrwolpr
1,3,3:52,Amsterdam-Berlin,129,92,192,8,52,68,75, [1E, 2E, 3E, 4E, 5E], [1B,2B,3B,4B,5B], [1P, 2P, 3P]
2,3,3:52,Tokio-Berlin,129,92,192,8,580,720,1234

Python- Flask CSV File upload error handling [duplicate]

This question already has an answer here:
Python: One Try Multiple Except
(1 answer)
Closed 4 years ago.
Essentially I am able to do what I want with my current code, which is upload a csv file, manipulate it with pandas and then update a MSQL Database. I would like to add error handling somehow. Essentially the upload function will only work for one particular file and throw different errors for all others.
Is there a way that I can catch multiple errors and return an error message to the user ?
Possibly something like a check on the input csv file column headers.
#app.route('/upload', methods =['GET', 'POST'])
def csv_input():
tempfile_path = tempfile.NamedTemporaryFile().name
#file.save(tempfile_path)
#sheet = pd.read_csv(tempfile_path)
if request.method == 'POST':
file = request.files['file']
if file: #and allowed_filename(file.filename):
#filename = secure_filename(file.filename)
file.save(tempfile_path)
input_csv = pd.read_csv(tempfile_path,sep=",", engine='python')
#### Data Cleansing From Uploded Data
col_titles = ['id','title','vote_average','w_average','vote_count','year','runtime',
'budget','revenue','profit']
# Only Keep Data where the Original Language is English
input_csv = input_csv[input_csv['original_language']=='en']
# New Dataframe that only contains data with vote count > 10
input_csv = input_csv[input_csv['vote_count'] >= 10]
# Fill all NA values to 0 - Needed to set datatypes
input_csv = input_csv.fillna(0)
# Remove all Rows with no Runtime
input_csv = input_csv[input_csv['runtime']!=0]
# Revmove all duplciate Rows
input_csv = input_csv.drop_duplicates()
input_csv['vote_average'] = input_csv.vote_average.astype(float).round(1)
input_csv.vote_average.round(1)
input_csv['runtime'] = input_csv.runtime.astype(int)
input_csv['vote_count'] = input_csv.vote_count.astype(int)
input_csv['revenue'] = input_csv.revenue.astype('int64')
input_csv['budget'] = input_csv.budget.astype('int64')
profit_cal(input_csv,'revenue','budget','profit')
input_csv['profit']=input_csv.profit.astype('int64')
input_csv['profit']=input_csv.profit.replace(0,'No Data')
#reorder_data = pd.DataFrame(input_csv)
# Year Cleaning
input_csv['year'] = pd.to_datetime(input_csv['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)
#C = reorder_data['vote_average'].mean()
#m = reorder_data['vote_count'].quantile(0.10)
#w_average = org_data.copy().loc[reorder_data['vote_count'] >= m]
#### IMDB Data Calculation
V = input_csv['vote_count']
R = input_csv['vote_average']
C = input_csv['vote_average'].mean()
m = input_csv['vote_count'].quantile(0.10)
input_csv['w_average'] = (V/(V+m) * R) + (m/(m+V) * C)
#C = input_csv['vote_average'].mean()
#m = input_csv['vote_count'].quantile(0.10)
#input_csv['w_average'] = input_csv.apply(weighted_rating, axis = 1)
input_csv['w_average'] = input_csv.w_average.astype(float).round(1)
reorder_data = input_csv[col_titles]
reorder_data.to_sql(name='title_data', con=engine, if_exists = 'replace', index=False)
# Reorder the data and output in the correct order
##### Genre Loads == DataFrame 2
df = input_csv
v = df.genres.apply(json.loads)
df = pd.DataFrame(
{
'id' : df['id'].values.repeat(v.str.len(), axis=0),
'genre' : np.concatenate(v.tolist())
})
df['genre'] = df['genre'].map(lambda x: x.get('name'))
genre_data = df.genre.str.get_dummies().sum(level=0)
genre_data = df.loc[(df!=0).any(1)]
#genre_data = genre_data.set_index('id')
genre_order = ['id','genre']
## Dataframw to SQL
genre_data[genre_order].to_sql(name='genre_data', con=engine, if_exists = 'replace', index=False)
####### Keyword Search ### Dataframe
#genre_data.to_csv("genre_data.csv")
#return genre_data[genre_order].to_html()
flash('Database has been updated successfully','success')
#return reorder_data[col_titles].to_html()
#stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
#csv_input = csv.reader(stream)
#return reorder_data.to_html(index=False)
#flash('File Uploaded Successfully')
#return redirect(url_for('index'))
return render_template('upload.html')
There are several methods:
The Python way, just add try: and except with the relevant exception classes.
try:
# parsing & processing logic here
pass
except EmptyDataError as ex:
# tell user we don't except empty data
pass
except ParserError as ex:
# tell user we failed to parse their input
pass
except Exception as ex:
# tell user that something went wrong.
pass
The Flask way, register error handlers with flask for specific exceptions (this effects the whole flask application):
#app.errorhandler(pandas.errors.EmptyDataError)
def handle_empty_data():
return 'Failed parsing Input', 200

Datastore Asynchronous Write Operation, not saving

trying to write data to my local datastore like:
drivingJson = json.loads(drivingdata)
for data in drivingJson:
keys = getKey()
index = 1
dataList = list()
for nodeData in data:
self.response.write(keys)
self.response.write("<br>")
lat = nodeData['lat']
lng = nodeData['long']
color = nodeData['color']
timestamp = datetime.datetime.strptime(nodeData['timestamp'], "%Y-%m-%d %H:%M:%S")
saveDrivingData = DrivingObject(
index = index,
lat = float(lat),
lng = float(lng),
timestamp = timestamp,
sessionKey = str(keys),
color = int(color)
)
dataList.append(saveDrivingData)
index +=1
ndb.put_multi_async(dataList)
this doesn't populate the datastore with any detail. But when i use
ndb.put_multi(dataList)
the datatstore populates well. How do I handle the asynchronous call. Thanks
put_multi_async returns a list of Future objects.
You need to call wait_any to make sure the put's complete before you return from the request.
Have a read about async all work has to complete before you return.
https://cloud.google.com/appengine/docs/python/ndb/async#using
All through the document it talks about waiting.

Categories

Resources