python ignore empty files - python

We prepare a following python scripts (python 2.7) to make histograms.
histogram.py
#!/usr/bin/env python
import sys
import numpy as np
import matplotlib as mpl
import matplotlib.mlab as mlab
mpl.use('Agg')
import matplotlib.pyplot as plt
sys.argv[1] # Define input name
sys.argv[2] # Define output name
sys.argv[3] # Define title
# Open the file name called "input_file"
input_file=sys.argv[1]
inp = open (input_file,"r")
lines = inp.readlines()
if len(lines) >= 20:
x = []
#numpoints = []
for line in lines:
# if int(line) > -10000: # Activate this line if you would like to filter any date (filter out values smaller than -10000 here)
x.append(float(line))
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=False, facecolor='gray')
plt.xlabel('Differences')
numpoints = len(lines)
plt.ylabel('Frequency ( n =' + str(numpoints) + ' ) ' )
title=sys.argv[3]
plt.title(title)
plt.grid(True)
save_file=sys.argv[2]
plt.savefig(save_file+".png")
plt.clf()
inp.close()
example: input
1
2
3
The script will do the following
python histogram.py input ${output_file_name}.png ${title_name}
We add a line "if len(lines) >= 20:" so if the data points are less than 20, we don't make a plot.
However, if the file is empty, this python script will be freeze.
We add a bash line to remove any empty files before running "python histogram.py input ${output_file_name}.png ${title_name}"
find . -size 0 -delete
For some reasons, this line always works in small scale testings but not in real production runs under several loops. So we would love to make the "histogram.py" ignore any empty files if possible.
The search only finds this link which doesn't seem to be quite helpful : (
Ignoring empty files from coverage report
Could anyone kindly offer some comments? Thanks!

Check if the input_file file is empty os.path.getsize(input_file) > 0
os.path.getsize
You will need the full path which I presume you will have and it will raise an error if the file does not exist or is inaccessible so you may want to handle those cases.
This code works, ignoring empty files:
#!/usr/bin/env python
import sys
import numpy as np
import matplotlib as mpl
import matplotlib.mlab as mlab
import os
mpl.use('Agg')
import matplotlib.pyplot as plt
sys.argv[1] # Define input name
sys.argv[2] # Define output name
sys.argv[3] # Define title
input_file=sys.argv[1]
# Open the file name called "input_file"
if os.path.getsize(input_file) > 0:
inp = open (input_file,"r")
lines = inp.readlines()
if len(lines) >= 20:
x = []
#numpoints = []
for line in lines:
# if int(line) > -10000: # Activate this line if you would like to filter any date (filter out values smaller than -10000 here)
x.append(float(line))
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=False, facecolor='gray')
plt.xlabel('Differences')
numpoints = len(lines)
plt.ylabel('Frequency ( n =' + str(numpoints) + ' ) ' )
title=sys.argv[3]
plt.title(title)
plt.grid(True)
save_file=sys.argv[2]
plt.savefig(save_file+".png")
plt.clf()
inp.close()
else:
print "Empty file"
~$ python test.py empty.txt foo bar
Empty file

Check if the file exists + is not empty before hand.
import os
def emptyfile(filepath):
return ((os.path.isfile(filepath) > 0) and (os.path.getsize(filepath) > 0))

Related

How to make details of a graph sorted

I have a directory that has 6 folders within. I am plotting folders automatically but when I see the result, it is a bit weird. While the folders are sorted in the computer, the plot is not ordered. For example, I want to have the result of C_r 0.05 before C_r 0.1 and so on. I have plotted using my folder path and I do not know how to make an example of this since I am plotting from my computer but I will put the graph that I have obtained and the code which plots the graph.
import os
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set(style="darkgrid")
#matplotlib qt
root = r'/home/hossein/Desktop/Out/INTERSECTION/BETA 15'
xx=[]
percentage=[]
labels = []
gg=[]
my_list = os.listdir(root)
my_list = [file for file in my_list if os.path.isdir(os.path.join(root, file))]
percetanges = []
for directory in my_list:
CASES = [file for file in os.listdir(os.path.join(root, directory)) if file.startswith('config')]
if len(CASES)==0:
continue
CASES.sort()
#print(CASES)
percentage=[]
for filename in CASES:
# print(filename)
with open(os.path.join(root, directory,filename), "r") as file:
#files[filename] = file.read()
lines = file.readlines()
x = [float(line.split()[0]) for line in lines]
y = [float(line.split()[1]) for line in lines]
#_new = np.array(y)
g = np.linspace(min(y),max(y),100)
h = min(y)*0.9
t = max(y)*0.9
xx=[]
for i in range(1,len(x)):
if (y[i] < h or y[i] > t):
xx.append(x[i])
percent = len(xx)/len(y)
percentage.append(percent)
labels.append(directory)
labels=sorted(labels)
percetanges.append(percentage)
percetanges=sorted(percetanges)
for i, x in enumerate(percetanges):
plt.boxplot(x,positions=[i],whis=0.001)
plt.xticks(np.arange(len(labels)),labels)
The answer is easy. it just needed to sort your directory before plotting. I mean when you want to read just by my_list.sort() . then you will find the right plot in order

How to solve the error when i draw graphic in python with using datas in csv file?

I think the problem is the following steps, but just in case,I will also write the whole body of my code down blow. The most strange thing is, this code can read over 6000 csv files and a graphic can also successfully show, but when I want more files to be read, then occours an error. The screenshot shows the graphic and the content of the csv files. as you can see, the path = r'C:\Users\AK6PRAKT\Desktop\6daten' includes all datas and path = r'C:\Users\AK6PRAKT\Desktop\daten' includes only parts of them.enter image description here
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
...(ignore some steps for reading the datas from csv files)
print(list_temp1,list_temp2) #list_temp1 is the datas of xaxise, and list_temp2 of yaxise.
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
and then...the whole body, and i must say something to declare: I had no background about computer science before, it's really a little bit hard for me to deal with such many datas at the very beginning. Actually i am now doing Internship in a german company and i started to learn python one week ago. I got an assignment from my mentor, I tried to devide the whole assignment into several steps, and I searched the commands of each of the steps and then combine them together with some revising. So, it may seem that I did a lot of useless work. Please be kind in commends(If you have suggestions about that, always glad to hear that of course)
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
fig = pyplot.figure()
for i in dirs:
if os.path.splitext(i)[1] == ".csv":
f = open(path+"/"+i)
iter_f = iter(f);
str = ""
for line in iter_f:
str = str + line
s.append(str)
with open(path+"/"+i,'r') as r:
lines=r.readlines()
with open(path+"/"+i,'w') as w:
for row in lines:
if 'Date' not in row:
w.write(row)
columns = defaultdict(list)
with open(path+"/"+i) as f:
reader = csv.reader(f)
for row in reader:
for (i,v) in enumerate(row):
columns[i].append(v)
list_temp1 = columns[0]
list_temp1 = np.array(list_temp1)
list_temp2 = columns[1]
list_temp2 = np.array(list_temp2)
print(list_temp1,list_temp2)
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
pyplot.yticks(my_y_ticks)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
the graphic from parts of datas
the graphic can not show while reading all datas

Choose File names automatically based on a calculation and then import them to python

I have run into a wall where I don't know how to proceed further. I generate a lot of Raw Data from my CFD simulations. All the raw data will be in text format. The format of the text file will be "hA-'timestep'.txt" where A equals 0,1,2,3,4,5,6,7,8,9. For Eg h1-0500.txt will refer to data obtained along h1 at 500th time step.All the files of hA will be saved in a single folder. In my post processing, I want to import files at different flow times and do some analysis. I have written a code where it will calculate the timestep based on some equation which needs the flow time as user input.
What I would like to do is import all those files which correspond to the a particular timestep calculated through an equation.For Example, if I give an input of 2400 for the flow time, then the equation will give me time step as 16144. I want those file names which correspond to this time step to be automatically imported.Please see the below code.
I have uploaded the files corresponding to 16144. How do I choose the file name automatically based on the time step that is calculated. Currently after getting the time step from equation, I have to manually change the file name. I would really appreciate if some one could guide me on this.
Samplefiles
# Notes about the Simulation#
# Total No. of Time Steps completed = 16152
# No. of Time Steps completed in HPC = 165
# Flow Time before HPC = 3.1212s
# Total Flow time of Fill Cycle = 2401.2s
import numpy as np
from matplotlib import pyplot as plt
import os
FT_init = 3.1212
delt = 0.15 # Timestep size
TS_init = 165
flowtime = input("Enter the flow time required: ") # This is user input. Timestep will be calculated based on the flow time entered.
timestep = (flowtime-FT_init)/delt
timestep = round(timestep + TS_init)
print timestep
def xlineplots(X1,Y1,V1,Tr1):
plt.figure(1)
plt.plot(X1,Tr1)
plt.legend(['h0','h3','h5','h7','h9'],loc=0)
plt.ylabel('Tracer Concentration')
plt.xlabel('X (m)')
plt.title('Tracer Concentration Variation along the Tank width')
plt.figtext(0.6,0.6,"Flow Time = 2400s",style= 'normal',alpha = 0.5)
plt.figtext(0.6,0.55,"Case: ddn110B",style= 'normal')
plt.savefig('hp1.png', format='png', dpi=600)
plt.figure(2)
plt.plot(X1,V1)
plt.legend(['h0','h3','h5','h7','h9'],loc=0)
plt.ylabel('V (m/s)')
plt.xlabel('X (m)')
plt.title('Vertical Velocity Variation along the Tank width')
plt.figtext(0.6,0.6,"Flow Time = 2400s",style= 'normal',alpha = 0.5)
plt.figtext(0.6,0.55,"Case: ddn110B",style= 'normal',alpha = 0.5)
plt.savefig('hv1.png', format='png', dpi=600)
path1='Location of the Directory' # Location where the files are located
filename1=np.array(['h0-16144.txt','h3-16144.txt','h5-16144.txt','h7-16144.txt','h9-16144.txt'])
for i in filename1:
format_name= i
data1 = os.path.join(path1,format_name)
data2 = np.loadtxt(data1,skiprows=1)
data2 = data2[data2[:,1].argsort()]
X1 = data2[:,1] # Assign x-coordinate from the imported text file
Y1 = data2[:,2] # Assign y-coordinate from the imported text file
V1 = data2[:,4] # Assign y-velocity from the imported text file
Tr1 = data2[:,5] # Assign Tracer Concentration from the imported text file
xlineplots(X1,Y1,V1,Tr1)
Error Message:
Enter the flow time required: 1250
8477
timestep: 8477
file(s) found: ['E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h0-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h1-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h2-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h3-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h4-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h5-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h6-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h7-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h8-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h9-8477.txt']
working in: E:/Fall2015/Research/CFD/ddn110B/Transfer/xline on: h0-8477
Traceback (most recent call last):
File "<ipython-input-52-0503f720722f>", line 54, in <module>
data2 = np.loadtxt(filename, skiprows=1)
File "E:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\numpy\lib\npyio.py", line 691, in loadtxt
fh = iter(open(fname, 'U'))
IOError: [Errno 2] No such file or directory: 'h9-8477.txt'
Is the issue with generating file names, or finding file names that match a certain pattern?
I could rework your code with:
hs = [0,3,5,7,9]
timestep = 16144
filenames = ['h%s-%s'%(h, timestep) for h in hs]
for name in filenames:
fname = op.path.join(path1, name)
try:
data = np.loadtxt(fname, skiprows=1)
except IOError:
# cannot open this file, most likely because it does not exist
# continue with the next
continue
...
Here I'm generating filenames with the desired format, and loading and using each one, if possible.
I could do searches with glob or re applied to directory listings, but there's nothing wrong with my try-except approach. It is good Python style.
========================
Here's an example of using glob (in an Ipython session):
First a testdir with bunch of files (created with `touch):
In [9]: ls testdir
h1-123.txt h12-1234.txt h2-123.txt h2-124.txt h3-124.txt h343.txt
In [10]: import glob
general search for files starting with h, ending with .txt:
In [11]: glob.glob('testdir/h*.txt')
Out[11]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h12-1234.txt',
'testdir/h343.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
narrow it to ones with 2 fields separated by dash
In [12]: glob.glob('testdir/h*-*.txt')
Out[12]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h12-1234.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
restrict the 1st field to single character
In [13]: glob.glob('testdir/h?-*.txt')
Out[13]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
for a specific 'time' string:
In [14]: glob.glob('testdir/h?-123.txt')
Out[14]: ['testdir/h1-123.txt', 'testdir/h2-123.txt']
The search string could be created with string formatting
In [15]: times=123
In [16]: glob.glob('testdir/h?-%s.txt'%times)
========================
With os and re I could search like:
In [28]: import os
In [29]: import re
In [30]: filelist=os.listdir('./testdir')
In [31]: [n for n in filelist if re.match('h[1-9]-123',n) is not None]
Out[31]: ['h1-123.txt', 'h2-123.txt']
======================
If the file names have to have 4 digits (or whatever) in the name then use something like:
'h%d-%04d'%(3,123) # 'h3-0123'
'testdir/h?-%04d.txt'%times
You need this sort of padding regardless of whether you use the try, glob or re.
Add zeros as prefix to a calculated value based on the number of digits
I hope I got what you meant but it wasn't that clear. When the user inputs the timestep, then only the files corresponding to that timestep are loaded and used further with your plotting function:
I considered the following structure:
project/
| cfd_plot.py
+ sample/
| | h0-16144.txt
| | h1-16144.txt
| | h3-16144.txt
| | h0-25611.txt
| | h1-25611.txt
| | <...>
and here is cfd_plot.py
from __future__ import print_function
import numpy as np
from matplotlib import pyplot as plt
import os
import re
# pth is a path for plt to save the image
def xlineplots(X1, Y1, V1, Tr1n, pth):
_, ax = plt.subplots()
ax.plot(X1, Tr1)
ax.legend(['h0', 'h3', 'h5', 'h7', 'h9'], loc=0)
ax.set_ylabel('Tracer Concentration')
ax.set_xlabel('X (m)')
ax.set_title('Tracer Concentration Variation along the Tank width')
plt.figtext(.6, .6, "Flow Time = 2400s", style='normal', alpha=.5)
plt.figtext(.6, .55, "Case: ddn110B", style='normal')
plt.savefig(pth + '-hp1.png', format='png', dpi=600)
_, ax = plt.subplots()
ax.plot(X1, V1)
ax.legend(['h0', 'h3', 'h5', 'h7', 'h9'], loc=0)
ax.set_ylabel('V (m/s)')
ax.set_xlabel('X (m)')
ax.set_title('Vertical Velocity Variation along the Tank width')
plt.figtext(.6, .6, "Flow Time = 2400s", style='normal', alpha=.5)
plt.figtext(.6, .55, "Case: ddn110B", style='normal', alpha=.5)
plt.savefig(pth + '-hv1.png', format='png', dpi=600)
FT_init = 3.1212
delt = .15 # Timestep size
TS_init = 165
flowtime = input("Enter the flow time required: ")
timestep = (int(flowtime) - FT_init) / delt
timestep = round(timestep + TS_init)
reps = ['sample'] # location where the files are located
# first simple version
# files = []
# for rep in reps: # recursive search for the files that match the timestep
# for dirpath, dirnames, filenames in os.walk(rep):
# for filename in [f for f in filenames if str(timestep) in f and f.endswith('.txt')]:
# files.append(os.path.join(dirpath, filename))
# second version, using regular expressions
reg_exp = '^.*-({:d})\.txt'.format(timestep)
files = []
for rep in reps: # recursive search for the files that match the timestep
for dirpath, dirnames, filenames in os.walk(rep):
for filename in [f for f in filenames if re.search(reg_exp, f)]:
files.append(os.path.join(dirpath, filename))
print('timestep:', timestep)
print('file(s) found: ', files)
for file in files:
directory = os.path.dirname(file) # directory of the .txt file
name = os.path.splitext(os.path.basename(file))[0] # basename of the .txt file
print('working in:', directory, 'on:', name)
data2 = np.loadtxt(file, skiprows=1)
data2 = data2[data2[:, 1].argsort()]
X1 = data2[:, 1] # Assign x-coordinate from the imported text file
Y1 = data2[:, 2] # Assign y-coordinate from the imported text file
V1 = data2[:, 4] # Assign y-velocity from the imported text file
Tr1 = data2[:, 5] # Assign Tracer Concentration from the imported text file
# here you can give directory + name or just name to xlineplots
xlineplots(X1, Y1, V1, Tr1, os.path.join(directory, name))
# xlineplots(X1, Y1, V1, Tr1, name)
UPDATE: made some edits (comments)
UPDATE2: using regular expressions on file search, the filter is '^.*-({:d})\.txt'.format(timestep):
^ match beginning of the line
.* match any character (except newline), zero or multiple times
- match the character -
({:d}) match the timestep, formatted as an integer
\. match the character .
txt match characters txt

Plotting in Python via matplotlib.pyplot (calculate the area)

I have a question \ problem. I need to plot the graph by the numbers that I got from the file (which I did) and then I need to draw a line connecting start and end, and calculate the area that between these two lines. I try to make a lot of variations, but i have no idea how I can make it..
I'm trying do it via matplotlib.pyplot library
Here the 'figure' whitch I should to get after add 'connection line between beginning and and' and now I need calcutale square between black line and blue.
PS the black one is kind of straight :)
Here is soure of code, and my data file...
http://pastebin.com/g40bAzPR
#!/path/to/python -tt
# numerical data
# python GraphicalPart.py ../dataFile.txt
import sys
import matplotlib.pyplot as plt
import numpy as np
def startDivide(fileName):
for i in range(1,2):
inputFile = open(fileName)
outputFile = open(fileName + "_" + str(i) + "_out.csv", "w")
floatList = []
for line in inputFile.readlines():
data = line.split(" ")
string = data[i]
if string.startswith('-'): #remove '-'
string = string[1:]
floatList.append(float(string))
floatList.sort() #sorting the list of data
for item in floatList:
outputFile.write("%s\n" % item)
outputFile.close()
inputFile.close()
data1=np.genfromtxt(fileName + "_" + str(i) + '_out.csv', skip_header=1)
plt.plot(data1)
plt.savefig(fileName + "_" + str(i) + "_.png")
plt.clf()
def main():
if len(sys.argv) != 2:
print "Not enough arguments. *_data.txt file only!"
else:
startDivide(sys.argv[1])
if __name__ == "__main__":
main()
for i in range(1,2) is a loop which only iterates once. Maybe you plan on increasing the number of iterations? If so, bear in mind that it's quicker to load the data once, rather than multiple times in a for-loop. You can do that using np.genfromtxt with the usecols parameter to specify the desired columns.
To find the area under the curve, you could use
np.trapz.
To find the area between two curves, you subtract area under the upper curve from the area under the lower curve. Assuming the diagonal line is always above the data curve:
import sys
import matplotlib.pyplot as plt
import numpy as np
def startDivide(filename):
data = np.genfromtxt(filename, dtype=None, usecols=[1])
data = np.abs(data)
data.sort()
np.savetxt("{}_1_out.csv".format(filename), data)
plt.plot(data)
plt.plot([0,len(data)-1], [data[0], data[-1]])
plt.savefig("{}_1_.png".format(filename))
area = np.trapz([data[0], data[-1]], dx=len(data)-1) - np.trapz(data)
print(area)
if __name__ == "__main__":
startDivide(sys.argv[1])

matplotlib: Have axis maintaining ratio

I am new to matplotlib, and I have a very simple (I'm guessing) question.
I have some data that need to be represented in a rectangle of 50x70 "units" (they're feet, actually representing a room) but I don't seem to be able to get matplotlib drawing a rectangle with the same scale on both axis and keeping the 50x70 "dimensions" at the same time.
I've tried the following:
import json
import matplotlib
import os
import sys
import traceback
import matplotlib.pyplot as plt
DATA_FILE = os.path.join(os.path.expanduser("~"), "results.json")
FLOOR_DIMENSIONS = (50, 70)
if __name__ == "__main__":
if len(sys.argv) > 1:
DATA_FILE = os.path.abspath(sys.argv[0])
print "Gonna see what happens with file %s" % DATA_FILE
try:
with open(DATA_FILE, 'r') as f:
result_dict = json.load(f)
except (IOError, OSError, ValueError), e:
print "Received %s %s when trying to parse json from %s\n"\
"Showing traceback: %s" % (type(e), e, DATA_FILE, traceback.format_exc())
result_dict = {}
for d_mac in result_dict:
data = result_dict[d_mac]
if len(data) < 3:
continue
x_s = list(d['x'] for d in data)
y_s = list(d['y'] for d in data)
plt.scatter(x_s, y_s, marker='o', c=numpy.random.rand(5,1), s=15)
plt.xlim([0, FLOOR_DIMENSIONS[0]])
plt.ylim([0, FLOOR_DIMENSIONS[1]])
#plt.axis('equal')
plt.show()
sys.exit(0)
Doing that, I get:
Which draws my data inside an square, changing the X-Y scale (X is 50 points, and Y is 70, therefor Y shows "shrunk")
Another option I tried was uncommenting the line saying plt.axis('equal'), but that "cuts" the Y axis (doesn't start in 0 and finishes in 70, but starts in 15 and ends in 55, probably because there's no data with y < 15 and y > 55)
But I don't want that either, I want the "canvas" starting in Y=0 and ending in Y=70, and if there's no data just show an empty space.
What I need is to draw something like this:
which I got by manually re-sizing the window where the plot was rendered :-D
Thank you in advance!
Add plt.axis('scaled').
edit: axis('image') may be better for your needs.
More axis settings can be found in the documentation.
import matplotlib.pyplot as plt
import numpy as np
xs = np.arange(50)
ys = (np.random.random(50)*70) + 15
plt.scatter(xs,ys)
plt.axis('image')
plt.axis([0, 50, 0, 70])
plt.show()
gives:
In the updated example I know the ys actually has a maximum of ~85, the offset was just to demonstrate proper axis enforcement.

Categories

Resources