Python read in file: ERROR: line contains NULL byte - python

I would like to parse an .ubx File(=my input file). This file contains many different NMEA sentences as well as raw receiver data. The output file should just contain informations out of GGA sentences. This works fine as far as the .ubx File does not contain any raw messages. However if it contains raw data
I get the following error:
Traceback (most recent call last):
File "C:...myParser.py", line 25, in
for row in reader:
Error: line contains NULL byte
My code looks like this:
import csv
from datetime import datetime
import math
# adapt this to your file
INPUT_FILENAME = 'Rover.ubx'
OUTPUT_FILENAME = 'out2.csv'
# open the input file in read mode
with open(INPUT_FILENAME, 'r') as input_file:
# open the output file in write mode
with open(OUTPUT_FILENAME, 'wt') as output_file:
# create a csv reader object from the input file (nmea files are basically csv)
reader = csv.reader(input_file)
# create a csv writer object for the output file
writer = csv.writer(output_file, delimiter=',', lineterminator='\n')
# write the header line to the csv file
writer.writerow(['Time','Longitude','Latitude','Altitude','Quality','Number of Sat.','HDOP','Geoid seperation','diffAge'])
# iterate over all the rows in the nmea file
for row in reader:
if row[0].startswith('$GNGGA'):
time = row[1]
# merge the time and date columns into one Python datetime object (usually more convenient than having both separately)
date_and_time = datetime.strptime(time, '%H%M%S.%f')
date_and_time = date_and_time.strftime('%H:%M:%S.%f')[:-6] #
writer.writerow([date_and_time])
My .ubx file looks like this:
$GNGSA,A,3,16,25,29,20,31,26,05,21,,,,,1.30,0.70,1.10*10
$GNGSA,A,3,88,79,78,81,82,80,72,,,,,,1.30,0.70,1.10*16
$GPGSV,4,1,13,02,08,040,17,04,,,47,05,18,071,44,09,02,348,24*49
$GPGSV,4,2,13,12,03,118,24,16,12,298,36,20,15,118,30,21,44,179,51*74
$GPGSV,4,3,13,23,06,324,35,25,37,121,47,26,40,299,48,29,60,061,49*73
$GPGSV,4,4,13,31,52,239,51*42
$GLGSV,3,1,10,65,07,076,24,70,01,085,,71,04,342,34,72,13,029,35*64
$GLGSV,3,2,10,78,35,164,41,79,75,214,48,80,34,322,46,81,79,269,49*64
$GLGSV,3,3,10,82,28,235,52,88,39,043,43*6D
$GNGLL,4951.69412,N,00839.03672,E,124610.00,A,D*71
$GNGST,124610.00,12,,,,0.010,0.010,0.010*4B
$GNZDA,124610.00,03,07,2016,00,00*79
µb<  ¸½¸Abð½ . SB éF é v.¥ # 1 f =•Iè ,
Ïÿÿ£Ëÿÿd¡ ¬M 0+ùÿÿ³øÿÿµj #ª ² -K*
,¨ , éºJU /) ++ f 5 .lG NL C8G /{; „> é óK 3 — Bòl . "¿ 2 bm¡
4âH ÐM X cRˆ 35 »7 Óo‡ž "*ßÿÿØÜÿÿUhQ`
3ŒðÿÿÂïÿÿþþûù ÂÈÿÿñÅÿÿJX ES
$²I uM N:w (YÃÿÿV¿ÿÿ> =ìî 1¥éÿÿèÿÿmk³m /?ÔÿÿÒÿÿšz+Ú ­Ïÿÿ6ÍÿÿêwÇ\ ? ]? ˜B Aÿƒ y µbÐD‹lçtæ#p3,}ßœŒ-vAh
¿M"A‚UE ôû JQý
'wA´üát¸jžAÀ‚"Å
)DÂï–ŽtAöÙüñÅ›A|$Å ôû/ Ìcd§ÇørA†áãì˜AØY–Ä ôû1 /Áƒ´zsAc5+_’ô™AìéNÅ ôû( ¶y(,wvAFøÈV§ƒA˜ÝwE ôû$ _S R‰wAhÙ]‘ÑëžAÇ9Å vwAòܧsAŒöƒd§Ò™AÜOÄ ôû3 kœÕ}vA;D.ž‡žAÒûàÄ #ˆ" ϬŸ ntAfˆÞ3ךA~Y2E ôû3 :GVtAæ93l)ÆšAß yE ôû4 Uþy.TwA<âƒ' ¦žAhmëC ôû" ¯4Çï ›wAþ‰Ì½6ŸAŠû¶D ~~xI]tA<ÞÿrÁšAmHE ôû/ ÖÆ#ÈgŸsAXnþ‚†4šA'0tE ôû. ·ÈO:’
sA¢B†i™Aë%
E ôû/ >Þ,À8vA°‚9êœA>ÇD ôû, ø(¼+çŠuAÆOÁ לAÈΆD
ôû# ¨Ä-_c¯qAuÓ?]> —AÐкà ôû0 ÆUV¨ØZsA]ðÛñß™AÛ'Å ôû, ™mv7žqAYÐ:›Ä‘—AdWxD ôû1 ûö>%vA}„
ëV˜A.êbE
AÝ$GNRMC,124611.00,A,4951.69413,N,00839.03672,E,0.009,,030716,,,D*62
$GNVTG,,T,,M,0.009,N,0.016,K,D*36
$GNGNS,124611.00,4951.69413,N,00839.03672,E,RR,15,0.70,162.5,47.6,1.0,0000*42
$GNGGA,124611.00,4951.69413,N,00839.03672,E,4,12,0.70,162.5,M,47.6,M,1.0,0000*6A
$GNGSA,A,3,16,25,29,20,31,26,05,21,,,,,1.31,0.70,1.10*11
$GNGSA,A,3,88,79,78,81,82,80,72,,,,,,1.31,0.70,1.10*17
$GPGSV,4,1,13,02,08,040,18,04,,,47,05,18,071,44,09,02,348,21*43
$GPGSV,4,2,13,12,03,118,24,16,
I already searched for similar problems. However I was not able to find a solution which workes for me.
I ended up with code like that:
import csv
def unfussy_reader(csv_reader):
while True:
try:
yield next(csv_reader)
except csv.Error:
# log the problem or whatever
print("Problem with some row")
continue
if __name__ == '__main__':
#
# Generate malformed csv file for
# demonstration purposes
#
with open("temp.csv", "w") as fout:
fout.write("abc,def\nghi\x00,klm\n123,456")
#
# Open the malformed file for reading, fire up a
# conventional CSV reader over it, wrap that reader
# in our "unfussy" generator and enumerate over that
# generator.
#
with open("Rover.ubx") as fin:
reader = unfussy_reader(csv.reader(fin))
for n, row in enumerate(reader):
fout.write(row[0])
However I was not able to simply write a file containing just all the rows read in with the unfuss_reader wrapper using the above code.
Would be glad if you could help me.
Here is an Image of how the .ubx file looks in notepad++image
Thanks!

I am not quite sure but your file looks pretty binary. You should try to open it as such
with open(INPUT_FILENAME, 'rb') as input_file:

It seems like you did not open the file with correct coding format.
So the raw message cannot be read correctly.
If it is encoded as UTF8, you need to open the file with coding option:
with open(INPUT_FILENAME, 'r', newline='', encoding='utf8') as input_file

Hey if anyone else has this proglem to read in NMEA sentences of uBlox .ubx files
this pyhton code worked for me:
def read_in():
with open('GNGGA.txt', 'w') as GNGGA:
with open('GNRMC.txt','w') as GNRMC:
with open('rover.ubx', 'rb') as f:
for line in f:
#print line
if line.startswith('$GNGGA'):
#print line
GNGGA.write(line)
if line.startswith('$GNRMC'):
GNRMC.write(line)
read_in()

You could also use the gnssdump command line utility which is installed with the PyGPSClient and pygnssutils Python packages.
e.g.
gnssdump filename=Rover.ubx msgfilter=GNGGA
See gnssdump -h for help.
Alternatively if you want a simple Python script you could use the pyubx2 Python package, e.g.
from pyubx2 import UBXReader
with open("Rover.ubx", "rb") as stream:
ubr = UBXReader(stream)
for (_, parsed_data) in ubr.iterate():
if parsed_data.identity in ("GNGGA", "GNRMC"):
print(parsed_data)

Related

Overwrite a .txt file in python

I have a .txt file that is being written to by a python script.
Adam,3,2,4
Sorin,3,2,4
Sorin,0,0,0
new_record = studentName+","+str(Score1) +","+str(Score2) +","+str(Score3)
student_class = 0
while student_class != 1 or student_class != 2 or student_class != 3:
student_class=input("What class are you in?(1/2/3): ")
if student_class == "1":
file=open("Class1.txt", "a+")
file.write(new_record)
file.write("\n")
file.close()
with open("Class1.txt", 'r') as fp:
for count, line in enumerate(fp):
pass
break
I want the scores to be overwritten if the student name is the same. For example if I run the script again, and Sorin gets a score of "3,3,3" the .txt file would look like this:
Adam,3,2,4
Sorin,3,2,4
Sorin,0,0,0
Sorin 3,3,3
However I want it to turn out like this:
Adam,3,2,4
Sorin 3,3,3
There are some things missing in your code, like how we know which student we are working on, etc.
But either way, this is the approach I would take if the files you are working on are not too big, as the file contents will be stored in memory while editing.
It uses a StringIO as intermediary location where the rows are appended, except if the name matches the current student, and then the content of the StringIO as put in place of the original file.
Starting with:
Adam,3,2,4
Sorin,3,2,4
Sorin,0,0,0
And running the following
import csv
from io import StringIO
current_student = "Sorin"
current_scores = (3, 3, 3)
# obtain a temporary file-like object in memory with a csv writer
with StringIO() as f_tmp:
writer = csv.writer(f_tmp)
# open the input file for reading with a csv reader
with open("/tmp/classes.csv", "r", newline="") as f_in:
reader = csv.reader(f_in)
for row in reader:
# skip the rows of current_student
if row[0] == current_student:
continue
writer.writerow(row)
# add current_student and their scores
writer.writerow((current_student,) + current_scores)
# open the input file for writing
with open("/tmp/classes.csv", "w") as f_out:
f_out.write(f_tmp.getvalue())
You get
Adam,3,2,4
Sorin,3,3,3

error while writing to csv file with python

I am trying to write some output to csv file line by line
Here what I tried:
import csv
today = datetime.datetime.now().date()
filter = "eventTimestamp ge {}".format(today)
select = ",".join([
"eventTimestamp",
"eventName",
"operationName",
"resourceGroupName",
])
activity_logs = client.activity_logs.list(
filter=filter,
select=select
)
with open(r"C:\scripts\logs.csv", 'w', newline='') as f:
for log in activity_logs:
result = (" ".join([
str(log.event_timestamp),
str(log.resource_group_name),
log.event_name.localized_value,
log.operation_name.localized_value
]))
f.writerow(result)
Its throwing error:
AttributeError: '_io.TextIOWrapper' object has no attribute 'writerow'
How can i fix this error, possibly any other module ?
This:
with open(r"C:\scripts\logs.csv", 'w', newline='') as f:
is creating just text file handle. You need to create csv.writer using f and then you might use writerow, that is:
import csv
...
with open(r"C:\scripts\logs.csv", 'w', newline='') as f:
writer = csv.writer(f)
for log in activity_logs:
result = (str(log.event_timestamp),str(log.resource_group_name),log.event_name.localized_value,log.operation_name.localized_value)
writer.writerow(result)
You might find useful examples of usage in csv article at PyMOTW-3
The error is coming from the line:
f.writerow(result)
and it's telling you that the f object does not have a function named writerow.
As Jannes has commented, use the write function instead:
f.write(result)
CSV.writer is required when your trying to write into CSV . then the code can be
import csv
today = datetime.datetime.now().date()
filter = "eventTimestamp ge {}".format(today)
select = ",".join([
"eventTimestamp",
"eventName",
"operationName",
"resourceGroupName",
])
activity_logs = client.activity_logs.list(
filter=filter,
select=select
)
with open(r"C:\scripts\logs.csv", 'w', newline='') as file:
f=csv.writer(file)
for log in activity_logs:
result = (str(log.event_timestamp),
str(log.resource_group_name),
log.event_name.localized_value,
log.operation_name.localized_value)
f.writerow(result)
When the csv.writer is added after opening the csv file it will work without TextIOwrapper error

Python requests data in file, line by line

I try to get the output of this request (https://api.opendota.com/api/players/7841909) in a file, line by line.
For some reason the output is stored in byte and not str, which I can change by str().
I tried to use a regualar expression to just store the information between the {} and also tried the csv module, which lead to just store digits.
What did I do wrong? The following version ignores the linebreak and the delimiters. :/
import requests
import csv
import re
dotaId = "7841909" #somit als string gespeichert
pfad = "https://api.opendota.com/api/players/" + dotaId + "/matches"
req = requests.get(pfad)
with open('%s.csv' % dotaId, 'w') as file:
clean_line = re.findall(r'\{(.*?)\}', req.text)
file.write(str(clean_line))
Your object clean_line is a list which you are writing as a one liner into the file.
It is better to use the csv writer module and write the content row by row:
with open('new_file.csv', 'w', newline='') as file:
writer = csv.writer(file, quotechar="'")
clean_lines = re.findall(r'\{(.*?)\}', req.text)
for line in clean_lines:
writer.writerow([str(line)])

How can I pickle a python object into a csv file?

I am trying to pickle a python object into a csv file. I want to write the pickle of an object as the third column in my file. I want to use pickle to avoid writing serialization for my complex objects.
Code to write to csv :
with open(self.file_path, 'a') as csv_file:
wr = csv.writer(csv_file, delimiter='|')
row = ['klines', symbol]
row.extend(pickle.dumps(object))
wr.writerow(row)
Code to read csv :
with open(self.simulation_file_name, 'r') as csv_file:
line = csv_file.readline()
while line != '':
line = line.strip('\n')
columns = line.split('|')
event_type = line.pop(0)
symbol = line.pop(0)
pickled = line.pop(0)
klines = pickle.loads(klines)
I get the following error :
TypeError: a bytes-like object is required, not 'str'
To write bytes/binary in text file like CSV, use base64 or other methods to avoid any escaping problem. Code simplified & python3 assumed.
import base64
with open('a.csv', 'a', encoding='utf8') as csv_file:
wr = csv.writer(csv_file, delimiter='|')
pickle_bytes = pickle.dumps(obj) # unsafe to write
b64_bytes = base64.b64encode(pickle_bytes) # safe to write but still bytes
b64_str = b64_bytes.decode('utf8') # safe and in utf8
wr.writerow(['col1', 'col2', b64_str])
# the file contains
# col1|col2|gANdcQAu
with open('a.csv', 'r') as csv_file:
for line in csv_file:
line = line.strip('\n')
b64_str = line.split('|')[2] # take the pickled obj
obj = pickle.loads(base64.b64decode(b64_str)) # retrieve
P.S. If you are not writing a utf8 file (e.g. ascii file), simply replace the encoding method.
P.S. Writing bytes in CSV is possible yet hardly elegant. One alternative is dumping a whole dict with dumped objects as values and storing keys in the CSV.

How to divide csv file with condition?

I have this csv file:
89,Network activity,ip-dst,80.179.42.44,,1,20160929
89,Payload delivery,md5,4ad2924ced722ab65ff978f83a40448e,,1,20160929
89,Network activity,domain,alkamaihd.net,,1,20160929
90,Payload delivery,md5,197c018922237828683783654d3c632a,,1,20160929
90,Network activity,domain,dnsrecordsolver.tk,,1,20160929
90,Network activity,ip-dst,178.33.94.47,,1,20160929
90,Payload delivery,filename,Airline.xls,,1,20160929
91,Payload delivery,md5,23a9bbf8d64ae893db17777bedccdc05,,1,20160929
91,Payload delivery,md5,07e47f06c5ed05a062e674f8d11b01d8,,1,20160929
91,Payload delivery,md5,bd75af219f417413a4e0fae8cd89febd,,1,20160929
91,Payload delivery,md5,9f4023f2aefc8c4c261bfdd4bd911952,,1,20160929
91,Network activity,domain,mailsinfo.net,,1,20160929
91,Payload delivery,md5,1e4653631feebf507faeb9406664792f,,1,20160929
92,Payload delivery,md5,6fa869f17b703a1282b8f386d0d87bd4,,1,20160929
92,Payload delivery,md5,24befa319fd96dea587f82eb945f5d2a,,1,20160929
I need to divide this csv file to 4 csv files where as the condition is the event number at the beginning of every row. so far I created a set that includes al the event numbers {89,90,91,92}, and I know that I need to make loop in a loop and copy each row to its dedicated csv file.
data = {
'89': [],
'90': [],
'91': [],
'92': []
}
with open('yourfile.csv') as infile:
for line in infile:
prefix = line[:2]
data[prefix].append(line)
for prefix in data.keys():
with open('csv' + prefix + '.csv', 'w') as csv:
csv.writelines(''.join(data[prefix]))
However if your are open to solutions other than python then this can be easily accomplished by running four commands
grep ^89 file.csv > 89.csv
grep ^90 file.csv > 90.csv
Similarly for other values.
It would be best to not hardcode the event numbers in your code so it's not dependent on the values of the data. I also prefer to use the csv module which has been optimized to read and write .csv files.
Here's a way to do that:
import csv
prefix = 'events' # of output csv file names
data = {}
with open('conditions.csv', 'rb') as conditions:
reader = csv.reader(conditions)
for row in reader:
data.setdefault(row[0], []).append(row)
for event in sorted(data):
csv_filename = '{}_{}.csv'.format(prefix, event)
print(csv_filename)
with open(csv_filename, 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(data[event])
Update
The approach implemented above first reads the entire csv file into memory, and then writes the all the rows associated with each event value into a separate output file, one at a time.
A more memory-efficient approach would be to open multiple output files simultaneously and write each row immediately after it has been read out to the proper destination file. Doing this requires keeping track of what files are already open. Something else the file managing code needs to do is make sure all the files are closed when processing is complete.
In the code below all of this has been accomplished by defining and using a Python Context Manager type to centralize the handling of all the csv output files that might be generated depending on how many different event values there are in the input file.
import csv
import sys
PY3 = sys.version_info.major > 2
class MultiCSVOutputFileManager(object):
"""Context manager to open and close multiple csv files and csv writers.
"""
def __enter__(self):
self.files = {}
return self
def __exit__(self, exc_type, exc_value, traceback):
for file, csv_writer in self.files.values():
print('closing file: {}'.format(file.name))
file.close()
self.files.clear()
return None
def get_csv_writer(self, filename):
if filename not in self.files: # new file?
open_kwargs = dict(mode='w', newline='') if PY3 else dict(mode='wb')
print('opening file: {}'.format(filename))
file = open(filename, **open_kwargs)
self.files[filename] = file, csv.writer(file)
return self.files[filename][1] # return associated csv.writer object
And here's how to use it:
prefix = 'events' # to name of each csv output file
with open('conditions.csv', 'rb') as conditions:
reader = csv.reader(conditions)
with MultiCSVOutputFileManager() as file_manager:
for row in reader:
csv_filename = '{}_{}.csv'.format(prefix, row[0]) # row[0] is event
writer = file_manager.get_csv_writer(csv_filename)
writer.writerow(row)
You can even dynamically create the resulting files if the first field has not been encountered by keeping a mapping of that id and the associated file:
files = {}
with open('file.csv') as fd:
for line in fd:
if 0 == len(line.strip()): continue # skip empty lines
try:
id_field = line.split(',', 1)[0] # extract first field
if not id in files.keys(): # if not encountered open a new result file
files[id] = open(id + '.csv')
files[id].write(line) # write the line in proper file
except Exception as e:
print('ERR', line, e) # catchall in case of problems...

Categories

Resources