Python loadtxt function is not working - python

I am attempting the following program.
from numpy import zeros,loadtxt
from pylab import plot,xlim,show
from cmath import exp,pi
def dft(y):
N = len(y)
c = zeros(N//2+1,complex)
for k in range(N//2+1):
for n in range(N):
c[k] += y[n]*exp(-2j*pi*k*n/N)
return c
y = loadtxt("pitch.txt",float)
c = dft(y)
plot(abs(c))
xlim(0,500)
show()
However, when I attempt to run the program, I receive an error code for the line 13:
y = loadtxt("pitch.txt",float)
File "C:\Python32\lib\site-packages\numpy\lib\npyio.py", line 689, in loadtxt
fh = iter(open(fname, 'U'))
IOError: [Errno 2] No such file or directory: 'pitch.txt'
I was given a file that has all the resources needed to run the program, and I uploaded them into the same folders I have my Python program saved in. The pitch.txt file is a text file with a single column of numbers. I'm wondering if there is something wrong with the written program, or do did I upload the files in the wrong.

Related

Appending numpy arrays into two binary files

I want to create two binary files to append numpy arrays into each one of them during a loop. I wrote the following method (I use Python 2.7):
for _ in range(5):
C = np.random.rand(1, 5)
r = np.random.rand(1, 5)
with open("C.bin", "ab") as file1, open("r.bin", "ab") as file2:
# Append to binary files
np.array(C).tofile(file1)
np.array(r).tofile(file2)
# Now printing to check if appending is successful
C = np.load("C.bin")
r = np.load("r.bin")
print (C)
print (r)
However, I keep getting this error:
Traceback (most recent call last):
File "test.py", line 15, in <module>
C = np.load("C.bin")
File "/anaconda/lib/python2.7/site-packages/numpy/lib/npyio.py", line 429, in load
"Failed to interpret file %s as a pickle" % repr(file))
IOError: Failed to interpret file 'C.bin' as a pickle
I tried to fix it but I cannot see anything more. Any help is appreciated.
NOTE: I intentionally want to use np.load because later on I will be loading the dataset from the disk into a numpy array for further processing.
You should use the save method that is built in the numpy to store the array in the files. Here what your code should look like:
for _ in range(5):
C = np.random.rand(1, 5)
r = np.random.rand(1, 5)
np.save('C', C)
np.save('r', r)
# Now printing to check if appending is successful
C = np.load("C.npy")
r = np.load("r.npy")
print (C)
print (r)
del C, r
Please refer to the documentation https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.load.html

How to return a Dataframe in Azure ML from a python script

I have the below Python script. And I'm trying to return the new Dataframe created. Unfortunately, I'm getting "NotImplementedError:"
Please let me know, how to return a Dataframe in Azure ML from a python script
Code:
def azureml_main(df) :
therm_patterns,therm_missed_patterns = thermostat_phase(df)
th_pat = json.loads(therm_patterns)
missed_th_pat = json.loads(therm_missed_patterns)
light_patterns, light_missed_patterns = light_phase(df)
lt_pat = json.loads(light_patterns)
missed_lt_pat = json.loads(light_missed_patterns)
for j in range (0,len(lt_pat)):
for i in range (0,len(lt_pat[0]['John'])):
th_pat[0]['John'].append(lt_pat[0]['John'][i])
for j in range (0,len(lt_pat)):
for i in range (0,len(lt_pat[0]['Jane'])):
th_pat[0]['Jane'].append(lt_pat[0]['Jane'][i])
for j in range (0,len(lt_pat)):
for i in range (0,len(lt_pat[0]['Jen'])):
th_pat[0]['Jen'].append(lt_pat[0]['Jen'][i])
for j in range (0,len(missed_lt_pat)):
for i in range (0,len(missed_lt_pat[0]['John'])):
missed_th_pat[0]['John'].append(missed_lt_pat[0]['John'][i])
for j in range (0,len(missed_lt_pat)):
for i in range (0,len(missed_lt_pat[0]['Jane'])):
missed_th_pat[0]['Jane'].append(missed_lt_pat[0]['Jane'][i])
for j in range (0,len(missed_lt_pat)):
for i in range (0,len(missed_lt_pat[0]['Jen'])):
missed_th_pat[0]['Jen'].append(missed_lt_pat[0]['Jen'][i])
output = json.dumps(th_pat)
df = pd.DataFrame.from_dict({k: v[0] for k, v in json.loads(output)
[0].items()}, 'index' ).rename_axis('User').reset_index()
return df
Error:
Error 0085: The following error occurred during script evaluation, please
view the output log for more information:
---------- Start of error message from Python interpreter ----------
Caught exception while executing function: Traceback (most recent call
last):
File "C:\server\invokepy.py", line 211, in batch
xdrutils.XDRUtils.DataFrameToRFile(outlist[i], outfiles[i], True)
File "C:\server\XDRReader\xdrutils.py", line 54, in DataFrameToRFile
xdrwriter.write_attribute_list(attributes)
File "C:\server\XDRReader\xdrwriter2.py", line 155, in
write_attribute_list
self.write_object(value)
File "C:\server\XDRReader\xdrwriter2.py", line 215, in write_object
write_func(flags, converted, missingIndices)
File "C:\server\XDRReader\xdrwriter2.py", line 185, in write_objects
self.write_object(value)
File "C:\server\XDRReader\xdrwriter2.py", line 200, in write_object
raise NotImplementedError('Python Bridge conversion table not
implemented for type [{0}]'.format(value.getType()))
NotImplementedError: Python Bridge conversion table not implemented
for type [<type 'list'>]
Process returned with non-zero exit code 1
Look like your dataframe contains tuple as a value of a column. Azure machine learning workspace doesn't support tuples in dataframe as of now.
The best Idea is to return it as a CSV file( geniune option in azure python Script) and later you can transform it back to df if you wish. But I do belive that csv file is the final product you need

Python Binary files.

Hi I am having an issue using unstack in python,
fileID= open('B1b1_t100000.beam','r');
npart = 1E6;
ncoord = 7;
coords = np.reshape(struct.unpack('d'*int(ncoord*npart),fileID.read()),(npart,ncoord));
fileID.close()
And I am getting the error
Traceback (most recent call last):
File "transfer_lev_B1.py", line 30, in <module>
coords = np.reshape(struct.unpack('d'*int(ncoord*npart),fileID.read()),(npart,ncoord));
struct.error: unpack requires a string argument of length 56000000
I cant really see where the problem is. The file byte size is 56000000. In a previous attempt with np=1E4 the code worked for a different file with the same format (less total lines). But i have the problem when i go to a larger file with more lines..
ok I solved my problem,
import struct
import numpy as np
import matplotlib.pyplot as plt
if __name__ == '__main__':
fileID= open('B1b1_t100000.beam','r');
npart = 1E6;
ncoord = 7;
coords=np.fromfile('B1b1_t100000.beam',dtype=np.float64);
coords=coords[:(npart*ncoord)];
coords=np.reshape(coords,(npart,ncoord));
fileID.close()
# Beam 1
b1_x=coords[:,0];
b1_y=coords[:,2];
b1_z=coords[:,4];
b1_px=coords[:,1];
b1_py=coords[:,3];
b1_deltap =coords[:,5];
beam1=np.array([b1_x,b1_px,b1_y,b1_py,b1_z,b1_deltap,coords[:,6]],np.float64);
beam1=beam1.T;
# Map applied and new coordinates calculated.
x_mod=np.sqrt(foc)*coords[:,0];
y_mod=np.sqrt(foc)*coords[:,2];
px_mod=np.sqrt(defoc)*coords[:,1];
py_mod=np.sqrt(defoc)*coords[:,3];
beam1_mod=np.array([x_mod,px_mod,y_mod,py_mod,b1_z,b1_deltap,coords[:,6]],np.float64);
beam1_mod=beam1_mod.T;
#---------------Check shape of matrix----------------
#print coords.shape
# print (beam1_mod).shape
# print beam1.shape
# print 'beam1= \n', beam1
# print 'modified \n', beam1_mod
#----------------------------------------------------
# New coordinates printed to binary file.
fileMod=open("B1b1_t100000_mod.beam","w");
beam1_mod.tofile(fileMod);
fileMod.close()

Converting files to digital image failing with "tile cannot extend outside image"

I am trying to recreate some of the work from the blog posting http://sarvamblog.blogspot.com/2013/04/clustering-malware-corpus.html
import itertools
import glob
import numpy,scipy, os, array
from scipy.misc import imsave
for filename in list(glob.glob('file/*.file')):
f = open(filename,'rb');
#just want to make sure I get the right file'
print filename
ln = os.path.getsize(filename); # length of file in bytes
width = 256;
rem = ln%width;
a = array.array("B"); # uint8 array
a.fromfile(f,ln-rem);
f.close();
g = numpy.reshape(a,(len(a)/width,width));
g = numpy.uint8(g);
fpng = filename + ".png"
# make sure the png process and everything else is going'
print fpng
scipy.misc.imsave(fpng,g);`
And although this runs great on 1 or 2 files, I run into problems on once I expand to dozens
Traceback (most recent call last):
File "<stdin>", line 14, in <module>
File "/usr/lib/python2.7/dist-packages/scipy/misc/pilutil.py", line 120, in imsave
im = toimage(arr)
File "/usr/lib/python2.7/dist-packages/scipy/misc/pilutil.py", line 183, in toimage
image = Image.fromstring('L',shape,bytedata.tostring())
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 1797, in fromstring
im.fromstring(data, decoder_name, args)
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 590, in fromstring
d.setimage(self.im)
ValueError: tile cannot extend outside image
I assume that my issue is with not either A: closing the scipy.misc.imsave or B: not resetting the arrarys. Any help would be greatly appreciated
Managed to figure it out with a try/except loop. Once I did that I was able to determine that only certain files were canceling out. These files were extremely small (125 bytes). My assumption is that they were too small to create all the info needed for scipy
im.crop(box) ⇒ image
The box is a 4-tuple defining the left, upper, right, and lower pixel coordinate.
when lower is small than upper in my code,this error has happened.

Read multiple HDF5 files in Python using multiprocessing

I'm trying to read a bunch of HDF5 files ("a bunch" meaning N > 1000 files) using PyTables and multiprocessing. Basically, I create a class to read and store my data in RAM; it works perfectly fine in a sequential mode and I'd like to parallelize it to gain some performance.
I tried a dummy approach for now, creating a new method flatten() to my class to parallelize file reading. The following example is a simplified example of what I'm trying to do. listf is a list of strings containing the name of the files to read, nx and ny are the size of the array I want to read in the file:
import numpy as np
import multiprocessing as mp
import tables
class data:
def __init__(self, listf, nx, ny, nproc=0):
self.listinc = []
for i in range(len(listf)):
self.listinc.append((listf[i], nx, ny))
def __del__(self):
del self.listinc
def get_dsets(self, tuple_inc):
listf, nx, ny = tuple_inc
x = np.zeros((nx, ny))
f = tables.openFile(listf)
x = np.transpose(f.root.x[:ny,:nx])
f.close()
return(x)
def flatten(self):
nproc = mp.cpu_count()*2
def worker(tasks, results):
for i, x in iter(tasks.get, 'STOP'):
print i, x
results.put(i, self.get_dsets(x))
tasks = mp.Queue()
results = mp.Queue()
manager = mp.Manager()
lx = manager.list()
for i, out in enumerate(self.listinc):
tasks.put((i, out))
for i in range(nproc):
mp.Process(target=worker, args=(tasks, results)).start()
for i in range(len(self.listinc)):
j, res = results.get()
lx.append(res)
for i in range(nproc):
tasks.put('STOP')
I tried different things (including, like I did in this simple example, the use of a manager to retrieve the data) but I always get a TypeError: an integer is required.
I do not use ctypes array because I don't really require to have shared arrays (I just want to retrieve my data) and after retrieving the data, I want to play with it with NumPy.
Any thought, hint or help would be highly appreciated!
Edit: The complete error I get is the following:
Process Process-341:
Traceback (most recent call last):
File "/usr/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/home/toto/test/rd_para.py", line 81, in worker
results.put(i, self.get_dsets(x))
File "/usr/lib/python2.7/multiprocessing/queues.py", line 101, in put
if not self._sem.acquire(block, timeout):
TypeError: an integer is required
The answer was actually very simple...
In the worker, since it is a tuple that I retrieve, i can't do:
result.put(i, self.get_dsets(x))
but instead I have to do:
result.put((i, self.get_dsets(x)))
which then works perfectly well.

Categories

Resources