How to make details of a graph sorted - python

I have a directory that has 6 folders within. I am plotting folders automatically but when I see the result, it is a bit weird. While the folders are sorted in the computer, the plot is not ordered. For example, I want to have the result of C_r 0.05 before C_r 0.1 and so on. I have plotted using my folder path and I do not know how to make an example of this since I am plotting from my computer but I will put the graph that I have obtained and the code which plots the graph.
import os
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
sns.set(style="darkgrid")
#matplotlib qt
root = r'/home/hossein/Desktop/Out/INTERSECTION/BETA 15'
xx=[]
percentage=[]
labels = []
gg=[]
my_list = os.listdir(root)
my_list = [file for file in my_list if os.path.isdir(os.path.join(root, file))]
percetanges = []
for directory in my_list:
CASES = [file for file in os.listdir(os.path.join(root, directory)) if file.startswith('config')]
if len(CASES)==0:
continue
CASES.sort()
#print(CASES)
percentage=[]
for filename in CASES:
# print(filename)
with open(os.path.join(root, directory,filename), "r") as file:
#files[filename] = file.read()
lines = file.readlines()
x = [float(line.split()[0]) for line in lines]
y = [float(line.split()[1]) for line in lines]
#_new = np.array(y)
g = np.linspace(min(y),max(y),100)
h = min(y)*0.9
t = max(y)*0.9
xx=[]
for i in range(1,len(x)):
if (y[i] < h or y[i] > t):
xx.append(x[i])
percent = len(xx)/len(y)
percentage.append(percent)
labels.append(directory)
labels=sorted(labels)
percetanges.append(percentage)
percetanges=sorted(percetanges)
for i, x in enumerate(percetanges):
plt.boxplot(x,positions=[i],whis=0.001)
plt.xticks(np.arange(len(labels)),labels)

The answer is easy. it just needed to sort your directory before plotting. I mean when you want to read just by my_list.sort() . then you will find the right plot in order

Related

How to automate loading multiple files into numpy arrays using a simple "for" loop?

I usually load my data, that -in most cases- consists of only two columns using np.loadtxt cammand as follows:
x0, y0 = np.loadtxt('file_0.txt', delimiter='\t', unpack=True)
x1, y1 = np.loadtxt('file_1.txt', delimiter='\t', unpack=True)
.
.
xn, yn = np.loadtxt('file_n.txt', delimiter='\t', unpack=True)
then plot each pair on its own, which is not ideal!
I want to make a simple "for" loop that goes for all text files in the same directory, load the files and plot them on the same figure.
import os
import matplotlib.pyplot as plt
# A list of all file names that end with .txt
myfiles = [myfile for myfile in os.listdir() if myfile.endswith(".txt")]
# Create a new figure
plt.figure()
# iterate over the file names
for myfile in myfiles:
# load the x, y
x, y = np.loadtxt(myfile, delimiter='\t', unpack=True)
# plot the values
plt.plot(x, y)
# show the figure after iterating over all files and plotting.
plt.show()
Load all the files in a dictionary using:
d = {}
for i in range(n):
d[i] = np.loadtxt('file_' + str(i) + '.txt', delimiter='\t', unpack=True)
Now, to access kth file, use d[k] or:
xk, yk = d[k]
Since, you have not mentioned about the data in the files and the plot you want to create, it's hard to tell what to do. But for plotting, you can refer Mttplotlib or Seaborn libraries.
You can also use glob to get all the files -
from glob import glob
import numpy as np
import os
res = []
file_path = "YOUR PATH"
file_pattern = "file_*.txt"
files_list = glob(os.path.join(file_path,file_pattern))
for f in files_list:
print(f'----- Loading {f} -----')
x, y = np.loadtxt(f, delimiter='\t', unpack=True)
res += [(x,y)]
res will contain your file contents at each index value corresponding to f

How to solve the error when i draw graphic in python with using datas in csv file?

I think the problem is the following steps, but just in case,I will also write the whole body of my code down blow. The most strange thing is, this code can read over 6000 csv files and a graphic can also successfully show, but when I want more files to be read, then occours an error. The screenshot shows the graphic and the content of the csv files. as you can see, the path = r'C:\Users\AK6PRAKT\Desktop\6daten' includes all datas and path = r'C:\Users\AK6PRAKT\Desktop\daten' includes only parts of them.enter image description here
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
...(ignore some steps for reading the datas from csv files)
print(list_temp1,list_temp2) #list_temp1 is the datas of xaxise, and list_temp2 of yaxise.
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
and then...the whole body, and i must say something to declare: I had no background about computer science before, it's really a little bit hard for me to deal with such many datas at the very beginning. Actually i am now doing Internship in a german company and i started to learn python one week ago. I got an assignment from my mentor, I tried to devide the whole assignment into several steps, and I searched the commands of each of the steps and then combine them together with some revising. So, it may seem that I did a lot of useless work. Please be kind in commends(If you have suggestions about that, always glad to hear that of course)
import os
from matplotlib import pyplot as pyplot
from collections import defaultdict
import csv
import numpy as np
path = r'C:\Users\AK6PRAKT\Desktop\6daten'
dirs = os.listdir(path)
s = []
x = []
y = []
names = []
fig = pyplot.figure()
for i in dirs:
if os.path.splitext(i)[1] == ".csv":
f = open(path+"/"+i)
iter_f = iter(f);
str = ""
for line in iter_f:
str = str + line
s.append(str)
with open(path+"/"+i,'r') as r:
lines=r.readlines()
with open(path+"/"+i,'w') as w:
for row in lines:
if 'Date' not in row:
w.write(row)
columns = defaultdict(list)
with open(path+"/"+i) as f:
reader = csv.reader(f)
for row in reader:
for (i,v) in enumerate(row):
columns[i].append(v)
list_temp1 = columns[0]
list_temp1 = np.array(list_temp1)
list_temp2 = columns[1]
list_temp2 = np.array(list_temp2)
print(list_temp1,list_temp2)
y.append(float(list_temp2))
names.append(list_temp1)
x = range(len(names))
pyplot.ylim((0, 40))
my_y_ticks = np.arange(0, 40, 10)
pyplot.plot(x,y, linewidth=2)
pyplot.xticks(x,names,rotation = 90)
pyplot.yticks(my_y_ticks)
fig = pyplot.figure(figsize=(10,10))
pyplot.show()
the graphic from parts of datas
the graphic can not show while reading all datas

having problems with matplotlib and spectroscopy data

I am trying to plot a .dat file from an stellar catalog using this code
try:
import pyfits
noPyfits=False
except:
noPyfits=True
import matplotlib.pyplot as plt
import numpy as np
f2 = open('/home/mcditoos/Desktop/Astrophysics_programs/Data_LAFT/ESPECTROS/165401.dat', 'r')
lines = f2.readlines()
f2.close()
x1 = []
y1 = []
for line in lines:
p = line.split()
x1.append(float(p[0]))
y1.append(float(p[1]))
xv = np.array(x1)
yv = np.array(y1)
plt.plot(xv, yv)
plt.show()
however i get the following error:
x1.append(float(p[0]))
IndexError: list index out of range
also i wanted to know if there is anyway of making it a program capable of opening the next .dat file given an input
I may not understand fully your question but why don't you use
X, Y = numpy.genfromtxt('yourfile', dtype='str')
X = X.astype('float')
Y = Y.astype('float')
If in your file you have 2 columns you can transpose your table with
X, Y = numpy.genfromtxt('yourfile', dtype='str').T

Choose File names automatically based on a calculation and then import them to python

I have run into a wall where I don't know how to proceed further. I generate a lot of Raw Data from my CFD simulations. All the raw data will be in text format. The format of the text file will be "hA-'timestep'.txt" where A equals 0,1,2,3,4,5,6,7,8,9. For Eg h1-0500.txt will refer to data obtained along h1 at 500th time step.All the files of hA will be saved in a single folder. In my post processing, I want to import files at different flow times and do some analysis. I have written a code where it will calculate the timestep based on some equation which needs the flow time as user input.
What I would like to do is import all those files which correspond to the a particular timestep calculated through an equation.For Example, if I give an input of 2400 for the flow time, then the equation will give me time step as 16144. I want those file names which correspond to this time step to be automatically imported.Please see the below code.
I have uploaded the files corresponding to 16144. How do I choose the file name automatically based on the time step that is calculated. Currently after getting the time step from equation, I have to manually change the file name. I would really appreciate if some one could guide me on this.
Samplefiles
# Notes about the Simulation#
# Total No. of Time Steps completed = 16152
# No. of Time Steps completed in HPC = 165
# Flow Time before HPC = 3.1212s
# Total Flow time of Fill Cycle = 2401.2s
import numpy as np
from matplotlib import pyplot as plt
import os
FT_init = 3.1212
delt = 0.15 # Timestep size
TS_init = 165
flowtime = input("Enter the flow time required: ") # This is user input. Timestep will be calculated based on the flow time entered.
timestep = (flowtime-FT_init)/delt
timestep = round(timestep + TS_init)
print timestep
def xlineplots(X1,Y1,V1,Tr1):
plt.figure(1)
plt.plot(X1,Tr1)
plt.legend(['h0','h3','h5','h7','h9'],loc=0)
plt.ylabel('Tracer Concentration')
plt.xlabel('X (m)')
plt.title('Tracer Concentration Variation along the Tank width')
plt.figtext(0.6,0.6,"Flow Time = 2400s",style= 'normal',alpha = 0.5)
plt.figtext(0.6,0.55,"Case: ddn110B",style= 'normal')
plt.savefig('hp1.png', format='png', dpi=600)
plt.figure(2)
plt.plot(X1,V1)
plt.legend(['h0','h3','h5','h7','h9'],loc=0)
plt.ylabel('V (m/s)')
plt.xlabel('X (m)')
plt.title('Vertical Velocity Variation along the Tank width')
plt.figtext(0.6,0.6,"Flow Time = 2400s",style= 'normal',alpha = 0.5)
plt.figtext(0.6,0.55,"Case: ddn110B",style= 'normal',alpha = 0.5)
plt.savefig('hv1.png', format='png', dpi=600)
path1='Location of the Directory' # Location where the files are located
filename1=np.array(['h0-16144.txt','h3-16144.txt','h5-16144.txt','h7-16144.txt','h9-16144.txt'])
for i in filename1:
format_name= i
data1 = os.path.join(path1,format_name)
data2 = np.loadtxt(data1,skiprows=1)
data2 = data2[data2[:,1].argsort()]
X1 = data2[:,1] # Assign x-coordinate from the imported text file
Y1 = data2[:,2] # Assign y-coordinate from the imported text file
V1 = data2[:,4] # Assign y-velocity from the imported text file
Tr1 = data2[:,5] # Assign Tracer Concentration from the imported text file
xlineplots(X1,Y1,V1,Tr1)
Error Message:
Enter the flow time required: 1250
8477
timestep: 8477
file(s) found: ['E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h0-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h1-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h2-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h3-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h4-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h5-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h6-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h7-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h8-8477.txt', 'E:/Fall2015/Research/CFD/ddn110B/Transfer/xline\\h9-8477.txt']
working in: E:/Fall2015/Research/CFD/ddn110B/Transfer/xline on: h0-8477
Traceback (most recent call last):
File "<ipython-input-52-0503f720722f>", line 54, in <module>
data2 = np.loadtxt(filename, skiprows=1)
File "E:\WinPython-64bit-2.7.10.3\python-2.7.10.amd64\lib\site-packages\numpy\lib\npyio.py", line 691, in loadtxt
fh = iter(open(fname, 'U'))
IOError: [Errno 2] No such file or directory: 'h9-8477.txt'
Is the issue with generating file names, or finding file names that match a certain pattern?
I could rework your code with:
hs = [0,3,5,7,9]
timestep = 16144
filenames = ['h%s-%s'%(h, timestep) for h in hs]
for name in filenames:
fname = op.path.join(path1, name)
try:
data = np.loadtxt(fname, skiprows=1)
except IOError:
# cannot open this file, most likely because it does not exist
# continue with the next
continue
...
Here I'm generating filenames with the desired format, and loading and using each one, if possible.
I could do searches with glob or re applied to directory listings, but there's nothing wrong with my try-except approach. It is good Python style.
========================
Here's an example of using glob (in an Ipython session):
First a testdir with bunch of files (created with `touch):
In [9]: ls testdir
h1-123.txt h12-1234.txt h2-123.txt h2-124.txt h3-124.txt h343.txt
In [10]: import glob
general search for files starting with h, ending with .txt:
In [11]: glob.glob('testdir/h*.txt')
Out[11]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h12-1234.txt',
'testdir/h343.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
narrow it to ones with 2 fields separated by dash
In [12]: glob.glob('testdir/h*-*.txt')
Out[12]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h12-1234.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
restrict the 1st field to single character
In [13]: glob.glob('testdir/h?-*.txt')
Out[13]:
['testdir/h2-124.txt',
'testdir/h3-124.txt',
'testdir/h1-123.txt',
'testdir/h2-123.txt']
for a specific 'time' string:
In [14]: glob.glob('testdir/h?-123.txt')
Out[14]: ['testdir/h1-123.txt', 'testdir/h2-123.txt']
The search string could be created with string formatting
In [15]: times=123
In [16]: glob.glob('testdir/h?-%s.txt'%times)
========================
With os and re I could search like:
In [28]: import os
In [29]: import re
In [30]: filelist=os.listdir('./testdir')
In [31]: [n for n in filelist if re.match('h[1-9]-123',n) is not None]
Out[31]: ['h1-123.txt', 'h2-123.txt']
======================
If the file names have to have 4 digits (or whatever) in the name then use something like:
'h%d-%04d'%(3,123) # 'h3-0123'
'testdir/h?-%04d.txt'%times
You need this sort of padding regardless of whether you use the try, glob or re.
Add zeros as prefix to a calculated value based on the number of digits
I hope I got what you meant but it wasn't that clear. When the user inputs the timestep, then only the files corresponding to that timestep are loaded and used further with your plotting function:
I considered the following structure:
project/
| cfd_plot.py
+ sample/
| | h0-16144.txt
| | h1-16144.txt
| | h3-16144.txt
| | h0-25611.txt
| | h1-25611.txt
| | <...>
and here is cfd_plot.py
from __future__ import print_function
import numpy as np
from matplotlib import pyplot as plt
import os
import re
# pth is a path for plt to save the image
def xlineplots(X1, Y1, V1, Tr1n, pth):
_, ax = plt.subplots()
ax.plot(X1, Tr1)
ax.legend(['h0', 'h3', 'h5', 'h7', 'h9'], loc=0)
ax.set_ylabel('Tracer Concentration')
ax.set_xlabel('X (m)')
ax.set_title('Tracer Concentration Variation along the Tank width')
plt.figtext(.6, .6, "Flow Time = 2400s", style='normal', alpha=.5)
plt.figtext(.6, .55, "Case: ddn110B", style='normal')
plt.savefig(pth + '-hp1.png', format='png', dpi=600)
_, ax = plt.subplots()
ax.plot(X1, V1)
ax.legend(['h0', 'h3', 'h5', 'h7', 'h9'], loc=0)
ax.set_ylabel('V (m/s)')
ax.set_xlabel('X (m)')
ax.set_title('Vertical Velocity Variation along the Tank width')
plt.figtext(.6, .6, "Flow Time = 2400s", style='normal', alpha=.5)
plt.figtext(.6, .55, "Case: ddn110B", style='normal', alpha=.5)
plt.savefig(pth + '-hv1.png', format='png', dpi=600)
FT_init = 3.1212
delt = .15 # Timestep size
TS_init = 165
flowtime = input("Enter the flow time required: ")
timestep = (int(flowtime) - FT_init) / delt
timestep = round(timestep + TS_init)
reps = ['sample'] # location where the files are located
# first simple version
# files = []
# for rep in reps: # recursive search for the files that match the timestep
# for dirpath, dirnames, filenames in os.walk(rep):
# for filename in [f for f in filenames if str(timestep) in f and f.endswith('.txt')]:
# files.append(os.path.join(dirpath, filename))
# second version, using regular expressions
reg_exp = '^.*-({:d})\.txt'.format(timestep)
files = []
for rep in reps: # recursive search for the files that match the timestep
for dirpath, dirnames, filenames in os.walk(rep):
for filename in [f for f in filenames if re.search(reg_exp, f)]:
files.append(os.path.join(dirpath, filename))
print('timestep:', timestep)
print('file(s) found: ', files)
for file in files:
directory = os.path.dirname(file) # directory of the .txt file
name = os.path.splitext(os.path.basename(file))[0] # basename of the .txt file
print('working in:', directory, 'on:', name)
data2 = np.loadtxt(file, skiprows=1)
data2 = data2[data2[:, 1].argsort()]
X1 = data2[:, 1] # Assign x-coordinate from the imported text file
Y1 = data2[:, 2] # Assign y-coordinate from the imported text file
V1 = data2[:, 4] # Assign y-velocity from the imported text file
Tr1 = data2[:, 5] # Assign Tracer Concentration from the imported text file
# here you can give directory + name or just name to xlineplots
xlineplots(X1, Y1, V1, Tr1, os.path.join(directory, name))
# xlineplots(X1, Y1, V1, Tr1, name)
UPDATE: made some edits (comments)
UPDATE2: using regular expressions on file search, the filter is '^.*-({:d})\.txt'.format(timestep):
^ match beginning of the line
.* match any character (except newline), zero or multiple times
- match the character -
({:d}) match the timestep, formatted as an integer
\. match the character .
txt match characters txt

python ignore empty files

We prepare a following python scripts (python 2.7) to make histograms.
histogram.py
#!/usr/bin/env python
import sys
import numpy as np
import matplotlib as mpl
import matplotlib.mlab as mlab
mpl.use('Agg')
import matplotlib.pyplot as plt
sys.argv[1] # Define input name
sys.argv[2] # Define output name
sys.argv[3] # Define title
# Open the file name called "input_file"
input_file=sys.argv[1]
inp = open (input_file,"r")
lines = inp.readlines()
if len(lines) >= 20:
x = []
#numpoints = []
for line in lines:
# if int(line) > -10000: # Activate this line if you would like to filter any date (filter out values smaller than -10000 here)
x.append(float(line))
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=False, facecolor='gray')
plt.xlabel('Differences')
numpoints = len(lines)
plt.ylabel('Frequency ( n =' + str(numpoints) + ' ) ' )
title=sys.argv[3]
plt.title(title)
plt.grid(True)
save_file=sys.argv[2]
plt.savefig(save_file+".png")
plt.clf()
inp.close()
example: input
1
2
3
The script will do the following
python histogram.py input ${output_file_name}.png ${title_name}
We add a line "if len(lines) >= 20:" so if the data points are less than 20, we don't make a plot.
However, if the file is empty, this python script will be freeze.
We add a bash line to remove any empty files before running "python histogram.py input ${output_file_name}.png ${title_name}"
find . -size 0 -delete
For some reasons, this line always works in small scale testings but not in real production runs under several loops. So we would love to make the "histogram.py" ignore any empty files if possible.
The search only finds this link which doesn't seem to be quite helpful : (
Ignoring empty files from coverage report
Could anyone kindly offer some comments? Thanks!
Check if the input_file file is empty os.path.getsize(input_file) > 0
os.path.getsize
You will need the full path which I presume you will have and it will raise an error if the file does not exist or is inaccessible so you may want to handle those cases.
This code works, ignoring empty files:
#!/usr/bin/env python
import sys
import numpy as np
import matplotlib as mpl
import matplotlib.mlab as mlab
import os
mpl.use('Agg')
import matplotlib.pyplot as plt
sys.argv[1] # Define input name
sys.argv[2] # Define output name
sys.argv[3] # Define title
input_file=sys.argv[1]
# Open the file name called "input_file"
if os.path.getsize(input_file) > 0:
inp = open (input_file,"r")
lines = inp.readlines()
if len(lines) >= 20:
x = []
#numpoints = []
for line in lines:
# if int(line) > -10000: # Activate this line if you would like to filter any date (filter out values smaller than -10000 here)
x.append(float(line))
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=False, facecolor='gray')
plt.xlabel('Differences')
numpoints = len(lines)
plt.ylabel('Frequency ( n =' + str(numpoints) + ' ) ' )
title=sys.argv[3]
plt.title(title)
plt.grid(True)
save_file=sys.argv[2]
plt.savefig(save_file+".png")
plt.clf()
inp.close()
else:
print "Empty file"
~$ python test.py empty.txt foo bar
Empty file
Check if the file exists + is not empty before hand.
import os
def emptyfile(filepath):
return ((os.path.isfile(filepath) > 0) and (os.path.getsize(filepath) > 0))

Categories

Resources