Open multiple file directories on python - python

I would like to loop over and access my files of type .bin that each contain three values of type double (pitch, yaw, roll).
So far I was able to access one file only by using with open('annotations/01/frame_00004_pose.bin', 'rb') as fid:
I am aware that I need to change that line of code for my loop to work properly. I am just unsure as to how I can proceed. So my file is annotations having 01-24 files each having many other files of type .bin
Here is what I have done so far.
import pandas as pd
import numpy as np
import os
pyr = pd.DataFrame(columns = ['pitch','yaw','roll'])
with os.scandir('annotations') as entries:
for i in entries:
with open('annotations/01/frame_00004_pose.bin', 'rb') as fid:
data_array = np.fromfile(fid, np.float32)
para = data_array[3:]
pyr = pyr.append({'pitch':para[0],'yaw':para[1],'roll':para[2]},ignore_index = True)
print(pyr)
Any help would be appreciated.

Yes, use glob is a good idea, see it:
import pandas as pd
import numpy as np
import os
import glob
pyr = pd.DataFrame(columns = ['pitch','yaw','roll'])
entries = glob.glob('annotations/**/*.bin', recursive=True)
for entry in entries:
with open(entry, 'rb') as fid:
data_array = np.fromfile(fid, np.float32)
para = data_array[3:]
pyr = pyr.append({'pitch':para[0],'yaw':para[1],'roll':para[2]},ignore_index = True)
print(pyr)

Related

How can I Insert a Path in Pandas .read_table ou read CSV file as a Variable

im tring to input a variable with my target path in pd.read_table, but it is not working.
Is is possible to do it?
import pandas as pd
import os
TXT = "C:\Users\ricar\OneDrive\Área de Trabalho\Infoprex - Olivais.txt"
DfOlivais = pd.read_table(f'{TXT}', sep="\t", encoding = "ISO-8859-1", engine='python',header=0)
You should try something like this if you are reading a txt file
import pandas as pd
import os
TXT = r"C:\Users\ricar\OneDrive\Área de Trabalho\Infoprex - Olivais.txt"
DfOlivais = pd.read_table(TXT, sep="\t", encoding = "ISO-8859-1")
I believe this should get you the same results and a lot easier to read

Error in reading csv files because of delimiter

I would need your help regarding a problem in reading files.
I have some csv files which use a different delimiter (;) instead of ,. In general, for those cases, I do as follows:
pd.read_csv('path/filename.csv', sep=';', engine='python')
and for those with no issues:
pd.read_csv('path/filename.csv')
Since I have a list of files, I do not know which one is causing the error, so I would need to edit a bit the code below, in order to include both cases if an error occurs.
The current error is:
ParserError: Error tokenizing data. C error: Expected 3 fields in line 9, saw 9
The code that I need to edit to include the conditions above is the following:
import pandas as pd
from pathlib import Path
from os.path import join
import matplotlib.pyplot as plt
import glob
def create_dataset():
country='UK'
base_path = Path(''+country)
glob_pattern = str(base_path.joinpath("*.csv"))
all_csv_filenames = glob.glob(glob_pattern)
dataframes = {
(str(Path(filename).parent), str(Path(filename).stem)): pd.read_csv(
filename, sep=','
)
for filename in all_csv_filenames
}
data = pd.concat(dataframes, names=['Country', 'FileName', '_'],)
return data
Thank you for your help
You could use try and except, sadly, AFAIK, no way of doing that in a comprehension, so use a regular for loop, something along these lines:
import pandas as pd
from pathlib import Path
from os.path import join
import matplotlib.pyplot as plt
import glob
def create_dataset():
country='UK'
base_path = Path(''+country)
glob_pattern = str(base_path.joinpath("*.csv"))
all_csv_filenames = glob.glob(glob_pattern)
dataframes ={}
for filename in all_csv_filenames:
try:
v = pd.read_csv(filename, sep=',')
except ParserError:
v = pd.read_csv(filename, sep=';')
dataframes[(str(Path(filename).parent), str(Path(filename).stem))] = v
data = pd.concat(dataframes, names=['Country', 'FileName', '_'],)
return data
If you change all instances of sep to delimiter, it should work:
import pandas as pd
from pathlib import Path
from os.path import join
import matplotlib.pyplot as plt
import glob
def create_dataset():
country='UK'
base_path = Path(''+country)
glob_pattern = str(base_path.joinpath("*.csv"))
all_csv_filenames = glob.glob(glob_pattern)
dataframes = {
(str(Path(filename).parent), str(Path(filename).stem)): pd.read_csv(
filename, delimiter=','
)
for filename in all_csv_filenames
}
data = pd.concat(dataframes, names=['Country', 'FileName', '_'],)
return data

Iterate through Time Series data from .txt file using Numpy Array

My background is VBA and very new to Python, so please forgive me at the outset.
I have a .txt file with time series data.
My goal is to loop through the data and do simple comparisons, such as High - Close etc. From a VBA background this is straight forward for me in VBA, namely (in simple terms):
Sub Loop()
Dim arrTS() As Variant, i As Long
arrTS = Array("Date", "Time", ..)
For i = LBound(arrTS, 1) to UBound(arrTS, 1)
Debug.Print arrTS(i, "High") - arrTS(i, "Close")
Next i
End Sub
Now what I have in python is:
import os
import numpy as np
import urllib.request
import matplotlib.pyplot as plt
#load the .txt file
ES_D1 = np.loadtxt(fname = os.getcwd()+"\ES\D1\ES_10122007_04122019_D1.txt", dtype='str')
#now get the shape
print(ES_D1.shape)
Out: (3025, 8)
Can anyone recommend the best way to iterate through this file line by line, with reference to specific columns, and not iterate through each element?
Something like:
For i = 0 To 3025
print(ES_D1[i,4] - ES_D1[i,5])
Next i
The regular way to read csv/tsv files for me is this:
import os
filename = '...'
filepath = '...'
infile = os.path.join(filepath, filename)
with open(infile) as fin:
for line in fin:
parts = line.split('\t')
# do something with the list "parts"
But in your case, using the pandas function read_csv()might be a better way:
import pandas as pd
# Control delimiters, rows, column names with read_csv
data = pd.read_csv(infile)
# View the first 5 lines
data.head()
Creating the simple for loop was easier than I though, here for others.
import os
import numpy as np
import urllib.requests
import matplotlib.pyplot as plt
#load the .txt file
ES_D1 = np.loadtxt(fname = os.getcwd()+"\ES\D1\ES_10122007_04122019_D1.txt", dtype='str')
#now need to loop through the array
#this is the engine
for i in range(ES_D1.shape[0]):
if ES_D1[i,3] > ES_D1[i,6]:
print(ES_D1[i,0])

How to take a column from a txt file and save in a new matrix

I did this code to go through a folder, find all .txt files and take the 4th column from this .txt file (has a lot of columns) and put in a new numpy array (data)
import numpy as np
from scipy.constants import mu_0
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import pandas as pd
data=np.zeros((44,14)) # there are 14 .txt files and the 4th column has 44 lines
indx = 0
import os
Path = "my path"
filelist = os.listdir(Path)
for i in filelist:
if i.endswith(".txt"):
newpath = Path+ '/'+i
print(newpath) # check if the path and file is right
dados= pd.read_table(newpath,header=None)
data[:,indx] = dados[:][4]
indx = indx+1
the error I'm getting is:
First: I have some problem with index, because is starting at 1 and should be at 0.
Second: Is just taking the 4th column from the first .txt file and putting in the array data, but then stops and do not run through the other files.
This is the errror: ParserError: Error tokenizing data. C error: Expected 5 fields in line 49, saw 7
Try this :
import os
import pandas as pd
workingpath = os.getcwd()
files = []
for file in os.listdir(workingpath):
if file.endswith(".txt"):
files.append(os.path.join(workingpath,file))
data = pd.DataFrame()
for col, file in enumerate(files):
dados = pd.read_csv(file, header=None)
data[col] = dados.iloc[:,4]
data = data.to_numpy()

How to write the output of a python script which is a set of float to a text file?

I have a python script which prints the first 105 columns of a text file. All the values are float and I want to write the list into a text file. I tried the code below, but the text file is empty.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
data = pd.read_csv('E:\Python_trials\Trial_codes\Lab_trial.txt', delimiter = '\t')
df = data.ix[:,:110]
print(df)
with open("test.txt", "w") as f:
sys.stdout.writelines(str(f))
f.flush()
f.close()
You dont write to f. You are writing to standard output. Try f.write(...)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
data = pd.read_csv('E:\Python_trials\Trial_codes\Lab_trial.txt', delimiter = '\t')
df = data.ix[:,:110]
print(df)
with open("test.txt", "w") as f:
f.write(str(f))
# f.close() This is useless, as soon as the "with" is over, it automatically closes the file

Categories

Resources