Saving data from Arduino using Python - loss of data - python

With the help of web, i have created a code that collects the data form Arduino uno, and saves it to csv file.
The data collected are raw values of MEMS accelerometers.
The problem in code is that very often i loose a lot of data, if not all, if i terminate the Python. I noticed that at a random time, the output csv file has zero bytes.
Temporary solution is to start Arduino's "Serial monitor". This way most of the measured data is saved.
import serial
import time
import csv
import numpy as np
import pandas as pd
timeHr = []
timeT = []
mem1xD = []
mem1yD = []
mem1zD = []
#
mem2xD = []
mem2yD = []
mem2zD = []
arduinoData = serial.Serial('COM4',9600)
df = pd.DataFrame({
'timeHr':0,
'timeT':0,
'mem1xD':0,
'mem1yD':0,
'mem1zD':0,
'mem2xD':0,
'mem2yD':0,
'mem2zD':0,
},
index=[0]
)
while True:
while (arduinoData.inWaiting()==0):
pass
arduinoString = arduinoData.readline().decode("utf-8")
dataArray = arduinoString.split(",")
timehr = dataArray[0]
time = float(dataArray[1])/1000
mem1x = float(dataArray[2])
mem1y = float(dataArray[3])
mem1z = float(dataArray[4])
#
mem2x = float(dataArray[5])
mem2y = float(dataArray[6])
mem2z = float(dataArray[7])
timeHr.append(timehr)
timeT.append(time)
mem1xD.append(mem1x)
mem1yD.append(mem1y)
mem1zD.append(mem1z)
#
mem2xD.append(mem2x)
mem2yD.append(mem2y)
mem2zD.append(mem2z)
df = pd.DataFrame({
'timeHr':timeHr,
'timeT':timeT,
'mem1xD':mem1xD,
'mem1yD':mem1yD,
'mem1zD':mem1zD,
'mem2xD':mem2xD,
'mem2yD':mem2yD,
'mem2zD':mem2zD,
}
)
df.to_csv(r'time4.csv')

You need to append new data to your dataframe. Passing mode='a' in pd.Dataframe.to_csv will allow you to do that.
import time
tStart = str(time.time()).split('.')[0]
fileOut = tStart+'.csv'
while True:
while (arduinoData.inWaiting()==0):
pass
arduinoString = arduinoData.readline().decode("utf-8")
dataArray = arduinoString.split(",")
timehr = dataArray[0]
time = float(dataArray[1])/1000
mem1x = float(dataArray[2])
mem1y = float(dataArray[3])
mem1z = float(dataArray[4])
#
mem2x = float(dataArray[5])
mem2y = float(dataArray[6])
mem2z = float(dataArray[7])
timeHr.append(timehr)
timeT.append(time)
mem1xD.append(mem1x)
mem1yD.append(mem1y)
mem1zD.append(mem1z)
#
mem2xD.append(mem2x)
mem2yD.append(mem2y)
mem2zD.append(mem2z)
df = pd.DataFrame({
'timeHr':timeHr,
'timeT':timeT,
'mem1xD':mem1xD,
'mem1yD':mem1yD,
'mem1zD':mem1zD,
'mem2xD':mem2xD,
'mem2yD':mem2yD,
'mem2zD':mem2zD,
}
)
df.to_csv(fileOut,mode='a', header=False)

Related

Is there a proper way to append JSON Data to a Numpy array

I am trying to add data that I am reading from a series of JSON files to a Numpy array (or whatever data collection would work best). My idea, is that I want to sort a collection of episodes of a tv show by episode title.
The problem I have encountered, is actually creating the collection from the data.
The intent, is that I want to be able to have a collection of the items found within the for loop [a,b,c,d]; for each episode of the show.
Is a Numpy array the best way to go about making this collection, or should I use something else?
season1 = open('THEJSONFILES\seasonone.json', 'r')
season_array = np.array(['episodeTitle','seasonNum', 'episodeNum', 'plotContents'])
def ReadTheDarnJsonFile(jsonTitle):
seasondata = jsonTitle.read()
seasonobj = j.loads(seasondata)
list = (seasonobj['episodes'])
for i in range(len(list)):
a = str(list[i].get('title'))
b = str(list[i].get('seasonNumber'))
c = str(list[i].get('episodeNumber'))
d = str(list[i].get('plot'))
print(a, b, c, d)
print("----------------")
# np.append(season_array, [a,b,c,d]) this is not correct
ReadTheDarnJsonFile(season1)
print(season_array)
2 notes. First I would avoid using list as a variable name because it is a keyword in python. Second I would recommend using a custom class for your data for maximum readability.
season1 = open('THEJSONFILES\seasonone.json', 'r')
season_array = np.array(['episodeTitle','seasonNum', 'episodeNum', 'plotContents'])
class episode:
def __init__(self,title,seasonNumber,episodeNumber,plot):
self.title = title
self.seasonNumber = seasonNumber
self.episodeNumber = episodeNumber
self.plot = plot
def summary(self):
print("Season "+str(self.seasonNumber)+" Episode "+str(self.episodeNumber))
print(self.title)
print(self.plot)
def ReadTheDarnJsonFile(jsonTitle):
seasondata = jsonTitle.read()
seasonobj = j.loads(seasondata)
episodes = (seasonobj['episodes'])
season_array = []
for i in range(len(episodes)):
a = str(list[i].get('title'))
b = str(list[i].get('seasonNumber'))
c = str(list[i].get('episodeNumber'))
d = str(list[i].get('plot'))
season_array.append(episode(a,b,c,d)) this is not correct
return season_array
season_array = Read
TheDarnJsonFile(season1)
for item in season_array:
item.summary()
Here is what I ended up doing.
import json as j
import pandas as pd
emptyArray = []
season1 = open('THEJSONFILES\seasonone.json', 'r')
season2 = open('THEJSONFILES\seasontwo.json', 'r')
season3 = open('THEJSONFILES\seasonthree.json', 'r')
season4 = open('THEJSONFILES\seasonfour.json', 'r')
season5 = open('THEJSONFILES\seasonfive.json', 'r')
season6 = open('THEJSONFILES\seasonsix.json', 'r')
season7 = open('THEJSONFILES\seasonseven.json', 'r')
columnData = ["episodeTitle", "seasonIndex", "episodeIndex", "plot", "imageURL"]
finalDf = pd.DataFrame
def ReadTheDarnJsonFile(jsonTitle):
df = pd.DataFrame(columns = columnData)
seasonData = jsonTitle.read()
seasonObj = j.loads(seasonData)
currentSeasonList = (seasonObj['episodes'])
for i in range(len(currentSeasonList)):
tempTitle = str(currentSeasonList[i].get('title'))
tempSN = str(currentSeasonList[i].get('seasonNumber'))
tempEN = str(currentSeasonList[i].get('episodeNumber'))
tempPlot = str(currentSeasonList[i].get('plot'))
tempImage = str(currentSeasonList[i].get('image'))
dataObj = pd.Series([tempTitle, tempSN, tempEN, tempPlot, tempImage], index=(df.columns))
df.loc[i] = dataObj
emptyArray.append(df)
ReadTheDarnJsonFile(season1)
ReadTheDarnJsonFile(season2)
ReadTheDarnJsonFile(season3)
ReadTheDarnJsonFile(season4)
ReadTheDarnJsonFile(season5)
ReadTheDarnJsonFile(season6)
ReadTheDarnJsonFile(season7)
finalDf = pd.concat(emptyArray)
print(emptyArray)
holyOutput = finalDf.sort_values(by=['episodeTitle'])
holyOutput.reset_index(inplace=True)
holyOutput.to_json("P:\\ProjectForStarWarsCloneWarsJson\JSON\OutputJsonV2.json")

Convert excel to XML in python

I am trying to convert excel database into python.
I have a trading data which I need to import into the system in xml format.
my code is following:
df = pd.read_excel("C:/Users/junag/Documents/XML/Portfolio2.xlsx", sheet_name="Sheet1", dtype=object)
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
tree = ET.ElementTree(root)
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
for row in df.itertuples():
Portfolio = ET.SubElement(Portfolios, "Portfolio", Name=row.Name, BaseCurrency=row.BaseCurrency2, TradingPower=str(row.TradingPower),
ValidationProfile=row.ValidationProfile, CommissionProfile=row.CommissionProfile)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume))
Cash = ET.SubElement(PortfolioPosition, 'Cash', Currency=str(row.Currency))
else:
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume),
Invested=str(row.Invested), BaseInvested=str(row.BaseInvested))
Instrument = ET.SubElement(PortfolioPosition, 'Instrument', Ticker=str(row.Ticker), ISIN=str(row.ISIN), Market=str(row.Market),
Currency=str(row.Currency2), CFI=str(row.CFI))
ET.indent(tree, space="\t", level=0)
tree.write("Portfolios_converted2.xml", encoding="utf-8")
The output looks like this:
enter image description here
While I need it to look like this:
enter image description here
How can I improve my code to make the output xml look better? please advise
here the excel data:
Since you need a single <Portfolio> and <PortfolioPositions> as parent grouping, consider a nested loop by iterating through a list of data frames. Then, within each data frame loop through its rows:
import xml.etree.ElementTree as ET
import pandas as pd
import xml.dom.minidom as md
df = pd.read_excel("Input.xlsx", sheet_name="Sheet1", dtype=object)
# LIST OF DATA FRAME SPLITS
df_list = [g for i,g in df.groupby(
["Name", "BaseCurrency2", "TradingPower", "ValidationProfile", "CommissionProfile"]
)]
# ROOT LEVEL
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
# ROOT CHILD LEVEL
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
# GROUP LEVEL ITERATION
for df in df_list:
Portfolio = ET.SubElement(
Portfolios,
"Portfolio",
Name = df["Name"][0],
BaseCurrency = df["BaseCurrency2"][0],
TradingPower = str(df["TradingPower"][0]),
ValidationProfile = df["ValidationProfile"][0],
CommissionProfile = df["CommissionProfile"][0]
)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
# ROW LEVEL ITERATION
for row in df.itertuples():
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume)
)
Cash = ET.SubElement(
PortfolioPosition,
"Cash",
Currency = str(row.Currency)
)
else:
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume),
Invested = str(row.Invested),
BaseInvested = str(row.BaseInvested)
)
Instrument = ET.SubElement(
PortfolioPosition,
"Instrument",
Ticker = str(row.Ticker),
ISIN = str(row.ISIN),
Market = str(row.Market),
Currency = str(row.Currency2),
CFI = str(row.CFI)
)
# SAVE PRETTY PRINT OUTPUT
with open("Output.xml", "wb") as f:
dom = md.parseString(ET.tostring(root))
f.write(dom.toprettyxml().encode("utf-8"))
Converting excel to XML in python
import openpyxl
import xml.etree.ElementTree as ET
def convert_excel_to_xml(file_name, sheet_name):
wb = openpyxl.load_workbook(file_name)
sheet = wb[sheet_name]
root = ET.Element("root")
for row in sheet.rows:
for cell in row:
ET.SubElement(root, "cell", value=cell.value)
tree = ET.ElementTree(root)
tree.write("{}.xml".format(sheet_name))
Run the function
convert_excel_to_xml("test.xlsx", "Sheet1")

How to read large NetCDF data sets without using a for - Python

Good morning, I have a problem when reading a large netCDF file in python, which contains meteorological information, that information must go through it to assemble the information and then insert it into the database, but the time it takes to go through and assemble the information is too much, I know there must be other ways to perform the same process more efficiently, currently I access the information through a for loop, below the code
content = nc.Dataset(pathFile+file)
XLONG, XLAT = content.variables["XLONG"], content.variables["XLAT"]
Times = content.variables["Times"] #Horas formar b 'b
RAINC = content.variables["RAINC"] #Lluvia
Q2 = content.variables["Q2"] #Humedad especifica
T2 = content.variables["T2"] #Temperatura
U10 = content.variables["U10"] #Viento zonal
V10 = content.variables["V10"] #Viento meridional
SWDOWN = content.variables["SWDOWN"] #Radiacion incidente
PSFC = content.variables["PSFC"] #Presion de la superficie
SST = content.variables["SST"] #Temperatura de la superficie del mar
CLDFRA = content.variables["CLDFRA"] #Fraccion de nubes
for c2 in range(len(XLONG[0])):
for c3 in range(len(XLONG[0][c2])):
position += 1
for hour in range(len(Times)):
dateH = getDatetimeInit(dateFormatFile.hour) if hour == 0 else getDatetimeForHour(hour, dateFormatFile.hour)
hourUTC = getHourUTC(hour)
RAINH = str(RAINC[hour][0][c2][c3])
Q2H = str(Q2[hour][0][c2][c3])
T2H = str(convertKelvinToCelsius(T2[hour][0][c2][c3]))
U10H = str(U10[hour][0][c2][c3])
V10H = str(V10[hour][0][c2][c3])
SWDOWNH = str(SWDOWN[hour][0][c2][c3])
PSFCH = str(PSFC[hour][0][c2][c3])
SSTH = str(SST[hour][0][c2][c3])
CLDFRAH = str(CLDFRA[hour][0][c2][c3] )
rowData = [idRun, functions.IDMODEL, idTime, position, dateH.year, dateH.month, dateH.day, dateH.hour, RAINH, Q2H, T2H, U10H, V10H, SWDOWNH, PSFCH, SSTH, CLDFRAH]
dataProcess.append(rowData)
I would use NumPy. Let us assume you have netCDF with 2 variables, "t2" and "slp". Then you could use the following code to vectorize your data:
#!//usr/bin/env ipython
# ---------------------
import numpy as np
from netCDF4 import Dataset
# ---------------------
filein = 'test.nc'
ncin = Dataset(filein);
tair = ncin.variables['t2'][:];
slp = ncin.variables['slp'][:];
ncin.close();
# -------------------------
tairseries = np.reshape(tair,(np.size(tair),1));
slpseries = np.reshape(slp,(np.size(slp),1));
# --------------------------
## if you want characters:
#tairseries = np.array([str(val) for val in tairseries]);
#slpseries = np.array([str(val) for val in slpseries]);
# --------------------------
rowdata = np.concatenate((tairseries,slpseries),axis=1);
# if you want characters, do this in the end:
row_asstrings = [[str(vv) for vv in val] for val in rowdata]
# ---------------------------
Nevertheless, I have a feeling that using strings is not very good idea. In my example, the conversion from numerical arrays to strings, took quite long time and therefore I did not implement it before concatenation.
If you want also some time/location information, you can do like this:
#!//usr/bin/env ipython
# ---------------------
import numpy as np
from netCDF4 import Dataset
# ---------------------
filein = 'test.nc'
ncin = Dataset(filein);
xin = ncin.variables['lon'][:]
yin = ncin.variables['lat'][:]
timein = ncin.variables['time'][:]
tair = ncin.variables['t2'][:];
slp = ncin.variables['slp'][:];
ncin.close();
# -------------------------
tairseries = np.reshape(tair,(np.size(tair),1));
slpseries = np.reshape(slp,(np.size(slp),1));
# --------------------------
## if you want characters:
#tairseries = np.array([str(val) for val in tairseries]);
#slpseries = np.array([str(val) for val in slpseries]);
# --------------------------
rowdata = np.concatenate((tairseries,slpseries),axis=1);
# if you want characters, do this in the end:
#row_asstrings = [[str(vv) for vv in val] for val in rowdata]
# ---------------------------
# =========================================================
nx = np.size(xin);ny = np.size(yin);ntime = np.size(timein);
xm,ym = np.meshgrid(xin,yin);
xmt = np.tile(xm,(ntime,1,1));ymt = np.tile(ym,(ntime,1,1))
timem = np.tile(timein[:,np.newaxis,np.newaxis],(1,ny,nx));
xvec = np.reshape(xmt,(np.size(tair),1));yvec = np.reshape(ymt,(np.size(tair),1));timevec = np.reshape(timem,(np.size(tair),1)); # to make sure that array's size match, I am using the size of one of the variables
rowdata = np.concatenate((xvec,yvec,timevec,tairseries,slpseries),axis=1);
In any case, with variable sizes (744,150,150), it took less than 2 seconds to vectorize 2 variables.

Pandas Dataframe Only Returning first Row of JSON Data

I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')
When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])

Strange variable behaviour in Python

My first post here.
So I'm loading data into a variable called f1_data, then passing it to pm.removeDC() function to do some signal processing, and keeping the result into the same variable. But then, I want to replace only the column 8, with the original f1_data that I called raw_data and I can't figure it out why it doesn't work. Here are the functions. Help anyone?
inside file pm.py
def removeDC(data):
# define the filter
butter_order = 2
hp_cutoff_Hz = 1.0
b, a = signal.butter(butter_order, hp_cutoff_Hz/(fs_Hz / 2.0), 'highpass')
for i in range(1,9):
data[:,i] = signal.lfilter(b, a, data[:,i], 0)
return (data)
def get_epoch1(data, t_sec, epoch, f_tup, col):
#f_tup = (f_wdir, f_name, f_columns, out_save, out_dir, out_number, fig_width)
f_name = f_tup[1]
fig_width = f_tup[6]
epoch_boolvector = (t_sec >= epoch[0][0]) & (t_sec <= epoch[0][1])
epoch_timescale = t_sec[epoch_boolvector]
epoch_data = data[epoch_boolvector]
plt.figure(figsize=(fig_width,8), dpi=96)
plt.plot(epoch_timescale, epoch_data[:,col]);
plt.xlim(epoch_timescale[0], epoch_timescale[-1])
plt.show()
return (epoch_boolvector, epoch_timescale, epoch_data)
inside main file
#load the whole data
(f1_data, f1_data_indices, f1_timescale) = pm.load_data(f1_wdir, f1_name)
raw_data = f1_data[:] #create copy of f1_data
(f1ep1_boolvector, f1ep1_timescale, f1ep1_data) = pm.get_epoch1(f1_data, f1_timescale, f1_epochs[1], f1_tup, 8)
#--- filter data to remove DC (1Hz)
f1_data = pm.removeDC(f1_data)
# replace only channel 8 with original data
f1_data[:,8] = raw_data[:,8]
(f1ep2_boolvector, f1ep2_timescale, f1ep2_data) = pm.get_epoch1(f1_data, f1_timescale, f1_epochs[1], f1_tup, 8)
The solution is import copy and use copy.deepcopy function.
For further info check this link:
docs.python.org/2/library/copy.html
When I have raw_data = f1_data[:] I get, after pm.removeDC():
raw_data is f1_data: False
(raw_data == f1_data).all(): True
But when I have raw_data = copy.deepcopy(f1_data) I get, after pm.removeDC():
raw_data is f1_data: False
(raw_data == f1_data).all(): False

Categories

Resources