I am trying to create a time series of the sea surface temperature data over the whole year for six consecutive years and plot them using the subplots. I want to mark the x-ticks as the months. I tried using the matplotlib.dates option. However the years doesn't change on the subsequent subplots.
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays19 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2019.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays19 = np.delete(arrays19,[0,1,2,3,4,215,216,217],0)
sst14 = []
for i in arrays14:
d1 = i[0]
d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
datetime2.append(d2)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
for i in arrays15:
sst2 = i[2]
sst15.append(sst2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
for i in arrays16:
sst3 = i[2]
sst16.append(sst3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
for i in arrays17:
sst4 = i[2]
sst17.append(sst4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
for i in arrays18:
sst5 = i[2]
sst18.append(sst5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
np.shape(sst18)
sst19 = []
for i in arrays19:
sst6 = i[2]
sst19.append(sst6)
sst19 = np.array(sst19,dtype = np.float64)
sst19_u = np.zeros(len(sst14), dtype = np.float64)
sst19_fill = np.full([118],-9.99,dtype=np.float64)
sst19_u[0:211] = sst19[0:211]
sst19_u[211:329] = sst19_fill
sst19_u[329:365] = sst19[211:247]
sst_19_m = np.ma.masked_equal(sst19_u,-9.99)
##########Plotting
new_epoch = '2016-01-01T00:00:00'
mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot(sst_14_m)
axs[1].plot(sst_15_m)
axs[2].plot(sst_16_m)
axs[3].plot(sst_17_m)
axs[4].plot(sst_18_m)
axs[5].plot(sst_19_m)
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
#axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
I got an output like the following:
I would like to change the xlabels as 2016,2017,2018,2019 etc.
The data can be found in the folder - https://drive.google.com/drive/folders/1bETa7PjWKIUNS13xg3RgIMa5L7bpYn5W?usp=sharing
I love NumPy as much as the next person but this is a good use case for pandas. Pandas has the advantage of being able to label rows with more meaningful things than just positional index. For example, you can use dates. This is very convenient.
First, load your data:
import pandas as pd
import glob
dfs = []
for fname in glob.glob('./sst15n90e_dy_*.ascii'):
df = pd.read_csv(fname, skiprows=4, delimiter='\s+')
dfs.append(df)
df = pd.concat(dfs, axis=0, ignore_index=True)
Now do df.head() and you'll see this:
Let's convert that date to a 'datetime' object, and use it as the index instead of the default row numbers. We'll also deal with those -9.99 values.
import numpy as np
df['ds'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')
df = df.set_index('ds')
df = df.sort_index()
df.loc[df['SST'] == -9.99, 'SST'] = np.nan
Now you have a dataset you can do all sorts of magic with, like df.resample('Y')['SST'].sum() shows you the annual sum of SST.
Anyway, now we can make plots in various ways. You can plot DataFrames directly, eg check out df.groupby(df.index.year)['SST'].plot(). Or you can use seaborn (check out the gallery!), which understands DataFrames. Or you can construct a plot with matplotlib in the usual way. For instance:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
fig, axs = plt.subplots(nrows=3, ncols=2, figsize=(12, 8), sharey=True)
for ax, (year, group) in zip(axs.flat, df.groupby(df.index.year)):
ax.plot(group['SST'])
ax.set_title(year)
ax.grid(c='k', alpha=0.15)
date_form = DateFormatter("%b")
ax.xaxis.set_major_formatter(date_form)
plt.tight_layout()
This is close to what you wanted, but with a more useful data structure and quite a bit less code:
I did some modifications and got the results as desired:
from pickletools import float8
import os
import numpy as np
import sys
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import set_epoch
from datetime import datetime
# for files in os.listdir('/home/swadhin/project/sst/daily'):
# path = (files)
# print(path)
# arrays = [np.asarray(list(map(str, line.split()))) for line in open(files)]
arrays14 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2014.ascii')] #loading the data
arrays15 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2015.ascii')]
arrays16 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2016.ascii')]
arrays17 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2017.ascii')]
arrays18 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2018.ascii')]
arrays08 = [np.asarray(list(map(str, line.split()))) for line in open('/home/swadhin/project/sst/daily/sst15n90e_dy_2008.ascii')]
arrays14 = np.delete(arrays14,[0,1,2,3,4],0) #deleting the headers
arrays15 = np.delete(arrays15,[0,1,2,3,4],0)
arrays16 = np.delete(arrays16,[0,1,2,3,4],0)
arrays17 = np.delete(arrays17,[0,1,2,3,4],0)
arrays18 = np.delete(arrays18,[0,1,2,3,4],0)
arrays08 = np.delete(arrays08,[0,1,2,3,4,215,216,217],0)
sst14 = []
datetime1 = [] #year, month,date
#datetime2 = [] #hour,min,second
for i in arrays14:
d1 = i[0]
#d2 = i[2]
sst1 = i[2]
sst14.append(sst1)
datetime1.append(d1)
#datetime2.append(d2)
#reading the data
# datetime1 = np.array(datetime1,dtype = np.float64)
# datetime2 = np.array(datetime2,dtype = np.float64)
sst14 = np.array(sst14,dtype = np.float64)
sst_14_m = np.ma.masked_equal(sst14,-9.99) #masking the fillvalues
sst15 = []
datetime2 = []
for i in arrays15:
d2 = i[0]
sst2 = i[2]
sst15.append(sst2)
datetime2.append(d2)
sst15 = np.array(sst15,dtype = np.float64)
sst_15_m = np.ma.masked_equal(sst15,-9.99)
sst16 = []
datetime3 = []
for i in arrays16:
d3 = i[0]
sst3 = i[2]
sst16.append(sst3)
datetime3.append(d3)
sst16 = np.array(sst16,dtype = np.float64)
sst_16_m = np.ma.masked_equal(sst16,-9.99)
sst17 = []
datetime4 = []
for i in arrays17:
d4 = i[0]
sst4 = i[2]
sst17.append(sst4)
datetime4.append(d4)
sst17 = np.array(sst17,dtype = np.float64)
sst_17_m = np.ma.masked_equal(sst17,-9.99)
sst18 = []
datetime5 = []
for i in arrays18:
d5 = i[0]
sst5 = i[2]
sst18.append(sst5)
datetime5.append(d5)
sst18 = np.array(sst18,dtype = np.float64)
sst_18_m = np.ma.masked_equal(sst18,-9.99)
sst08 = []
datetime6 = []
for i in arrays08:
d6 = i[0]
sst6 = i[2]
sst08.append(sst6)
datetime6.append(d6)
sst08 = np.array(sst08,dtype = np.float64)
# sst08_u = np.zeros(len(sst14), dtype = np.float64)
# sst08_fill = np.full([118],-9.99,dtype=np.float64)
# sst08_u[0:211] = sst08[0:211]
# sst08_u[211:329] = sst08_fill
# sst08_u[329:365] = sst08[211:247]
sst_08_m = np.ma.masked_equal(sst08,-9.99)
dt = np.asarray([datetime1,datetime2,datetime3,datetime4,datetime5,datetime6])
dt_m = []
for i in dt:
dt_m1= []
for j in i:
datetime_object = datetime.strptime(j,'%Y%m%d')
dt_m1.append(datetime_object)
dt_m.append(dt_m1)
##########Plotting
# new_epoch = '2016-01-01T00:00:00'
# mdates.set_epoch(new_epoch)
fig, axs=plt.subplots(3, 2, figsize=(12, 8),constrained_layout=True)
axs = axs.ravel()
axs[0].plot_date(dt_m[5],sst_08_m,'-')
axs[1].plot_date(dt_m[0],sst_14_m,'-')
axs[2].plot_date(dt_m[1],sst_15_m,'-')
axs[3].plot_date(dt_m[2],sst_16_m,'-')
axs[4].plot_date(dt_m[3],sst_17_m,'-')
axs[5].plot_date(dt_m[4],sst_18_m,'-')
for i in range(6):
axs[i].xaxis.set_major_locator(mdates.MonthLocator())
axs[i].xaxis.set_minor_locator(mdates.MonthLocator())
axs[i].xaxis.set_major_formatter(mdates.ConciseDateFormatter(axs[i].xaxis.get_major_locator()))
axs[i].grid(True)
axs[i].set_ylim(bottom=25, top=32)
axs[i].set_ylabel('SST')
plt.show()
And it solved the issue.
I am converting some code from MATLAB to Python, and I have encountered an issue I can't resolve. When iterating over the For loop in the section of code, my for loop is spitting out repeated values, that are also incorrect. I believe this has to do with my definition of "x" and "z", but I am not quite Here is my Python script and the matrices D2A1 and D2A2 are giving the repeated blocks of incorrect values.
import sys
import numpy as np
import scipy as sp
import scipy.special as scl
import numpy.matlib as mat
###
#np.set_printoptions(threshold = sys.maxsize)
##
###Constants and Parameters
w = np.array([.09,.089])
a = np.array([0,3])
coup = np.array([w[0],0])/10
dE12 = -2*w[0]
gs = np.array([0,0])
ws = w**2
alpha = a[0]*ws[0]/a[1]/ws[1]
dEp = (dE12+a[0]**2*ws[0]/2+a[1]**2*ws[1]/2)/a[1]/ws[1]
ac = np.array([0,0],dtype = 'float')
ac[0] = alpha*dEp*ws[1]/(ws[0]+alpha**2*ws[1])
ac[1] = dEp - alpha*ac[0]
iS = 0 ## starting state
z0c = gs[1]
x0c = gs[0]
Mx = 128*2
Mz = 128*2
N = 2
dt = 0.05
#Now we need grid lengths L[1x1]
Lx = 10
Lz = 10
LxT = Lx*2
LzT = Lz*2
#x0-z0 = z0[1XM] = Grod of M points from 0 to L
x0 = np.array([np.linspace(-Lx,Lx,Mx)])
z0 = np.array([np.linspace(-Lz,Lz, Mz)])
x0op = np.transpose(np.matlib.repmat(x0,Mz,1))
z0op = np.matlib.repmat(z0,Mx,1)
## For loop over matricies
VDI = np.zeros((2,2),dtype = 'complex')
D2A1 = np.zeros(((2,Mx*Mz)),dtype = 'complex')
D2A2 = D2A1
V1 = D2A1
V2 = V1
VP1 = V1
VP2 = V1
for ig in range(Mz):
for jg in range(Mx):
z = z0[0,ig]
x = x0[0,jg]
###Diabtic Matrix###
VDI[0,0] = (w[1]*z)**2/2+(w[0]*x)**2/2
VDI[1,1] = (w[1]*(z-a[1]))**2/2+(w[0]*(x-a[0]))**2/2+dE12
VDI[0,1] = coup[1]*(z+ac[1])+coup[0]*(x+ac[0])
VDI[1,0] = VDI[0,1]
###Adiabatdization###
[VDt, U] = np.linalg.eigh(VDI)
VDt = np.array(VDt).reshape(2,1)
VDt = np.diagflat(VDt)
UUdVP = np.array([U#sp.linalg.expm(-1.j*dt*VDt)#U.T])
V = U#VDt#U.T
ixz = jg+(ig-1)*Mx
D2A1[:, ixz] = np.conj((U[:,0]))
D2A2[:, ixz] = np.conj((U[:,1]))
print(D2A1)
Below is the MATLAB loop I am trying to recreate.
VDI=zeros(2,2);
D2A1=zeros(2,Mx*Mz); D2A2=D2A1; V1=D2A1; V2=V1; VP1=V1; VP2=V1;
for ig=1:Mz,
for jg=1:Mx,
z = z0(ig); x = x0(jg);
% diabatic matrix
VDI(1,1) = (w(2)*z)^2/2+(w(1)*x)^2/2;
VDI(2,2) = (w(2)*(z-a(2)))^2/2+(w(2)*(x-a(1)))^2/2+dE12;
VDI(1,2) = coup(2)*(z+ac(2))+coup(1)*(x+ac(1)); VDI(2,1)=VDI(1,2);
% adiabatization
[U,VDt]=eig(VDI) ;
[VDt Ind]=sort(diag(VDt)); U=U(:,Ind);
UUdVP=U*diag(exp(-1i*dt*VDt))*U';
V=U*diag(VDt)*U';
ixz = jg + (ig-1)*Mx;
D2A1(:,ixz) = conj(U(:,1)); D2A2(:,ixz) = conj(U(:,2));
end; end;
Any help would be greatly appreciated. Thanks!
Fixed. Error was in the definition of matrices to be generated. From what I gather in Python you must specifically define each array, while in MATLAB you can set matrix equivalences and run them through a for-loop.
i want to cut an .wav file into multiple segments with the same length.
I found this code: https://gist.github.com/kylemcdonald/c8e62ef8cb9515d64df4
But its splitted into parts based on onset detection with librosa. I assume that the answer to my question is simple, but i would appreciate any help.
That's the code i used with Python 3.7.6 on Ubuntu (in conda):
import matplotlib
import matplotlib.pyplot as plt # For displaying the output
import librosa
import numpy as np # For some mathematical operations
from glob import glob # To grab files
import os
# Set directory for cutted files
save_dir = './cut_4s'
### Load the audio_file
data_dir = './' # Set Path, in this case it looks at the path where this python file is
audio_files = glob(data_dir + '/*.wav') # Grab audio files (.wav) in the data_dir
found = len(audio_files)
print("Audiofiles found: " + str(found))
input("Press Enter to continue...")
y, sr = librosa.load(audio_files[0])
length = librosa.get_duration(y=y, sr=sr) # Get the length of the file
time = np.arange(0, len(y)) / sr # Create the time array (timeline)
print(str(length))
# Plot audio over time
fig, ax = plt.subplots()
ax.plot(time, y)
ax.set(xlabel='Time (s)', ylabel='Sound Amplitude')
plt.show()
C = np.abs(librosa.cqt(y=y, sr=sr))
o_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
#o_env = librosa.onset.onset_strength(y, sr=sr, feature=librosa.cqt)
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
def prepare(y, sr=22050):
y = librosa.to_mono(y)
y = librosa.util.fix_length(y, sr) # 1 second of audio
y = librosa.util.normalize(y)
return y
def get_fingerprint(y, sr=22050):
y = prepare(y, sr)
cqt = librosa.cqt(y, sr=sr, hop_length=2048)
return cqt.flatten('F')
def normalize(x):
x -= x.min(axis=0)
x /= x.max(axis=0)
return x
def basename(file):
file = os.path.basename(file)
return os.path.splitext(file)[0]
vectors = []
words = []
filenames = []
onset_samples = list(librosa.frames_to_samples(onset_frames))
onset_samples = np.concatenate(onset_samples, len(y))
starts = onset_samples[0:-1]
stops = onset_samples[1:]
samples_folder = os.path.join(data_dir, 'samples')
try:
os.makedirs(samples_folder)
except:
pass
for i, (start, stop) in enumerate(zip(starts, stops)):
audio = y[start:stop]
filename = os.path.join(samples_folder, str(i) + '.wav')
librosa.output.write_wav(filename, audio, sr)
vector = get_fingerprint(audio, sr=sr)
word = basename(filename)
vectors.append(vector)
words.append(word)
filenames.append(filename)
np.savetxt(os.path.join(save_dir, 'vectors'), vectors, fmt='%.5f', delimiter='\t')
np.savetxt(os.path.join(save_dir, 'words'), words, fmt='%s')
np.savetxt(os.path.join(save_dir, 'filenames.txt'), filenames, fmt='%s')```
I am trying to apply a python code to all the files in a directory but it gives me a error:
test_image = cv2.imread(sys.argv[1],0)
IndexError: list index out of range
I dont know what to change I tried few things but it does not help so if someone can help with this that would be great. And using stackoverflow for the first time, just to see how it works.
import sys
import cv2
import os
import numpy as np
from utils import pointsInsideCircle, compare, zigzag
from math import pi as PI
filepath = os.path.join("/Users/ssm/Desktop/X/1/Original Images", "*.tif")
W = 8 #block size for comparision
Dsim = 0.1 #threshold for symmetry
Nd = 25 #nearest block
quadrants_points = pointsInsideCircle(W/4) #(i,j) position of blocks which are partially/completely inside circle of radius W/2
zigzag_points = zigzag(W/2)
test_image = cv2.imread(sys.argv[1],0)
height,width = test_image.shape[:2]
#print (height,width)
vectors_list = []
for j in range(0,height-W+1):
for i in range(0,width-W+1):
block = test_image[j:j+W,i:i+W]
dct_block = cv2.dct(np.float32(block))
feature_block = [[],[],[],[]]
for index,coeff_list in enumerate(zigzag_points):
for coeff in coeff_list:
feature_block[index].append(dct_block[coeff[0],coeff[1]])
feature_block_np = np.array(feature_block)
feature_vector = []
for quadrant,points in quadrants_points.iteritems():
summ = 0
for point in points:
summ = summ + feature_block_np[point[0],point[1]]
feature_vector.append(summ/PI)
vectors_list.append(np.array(feature_vector))
vectors_list2 = cv2.sort(np.array(vectors_list),cv2.SORT_EVERY_ROW)
print "vectors calculated"
import json
with open('data.json', 'w') as outfile:
json.dump(vectors_list2.tolist(), outfile)
i=0
blocks = []
for i in range(0,len(vectors_list)):
if i%width == 0:
print i/width
posA = [i/width,i%width]
j = i+1
for j in range(i+1,len(vectors_list)):
posB = [j/width,j%width]
if compare(vectors_list[i],vectors_list[j],posA,posB,Dsim,Nd):
print (posA,posB)
blocks.append([posA,posB])
output_image = cv2.imread(sys.argv[1],1)
for block in blocks:
x1 = block[0][0]
x1_8 = block[0][0]+W
y1 = block[0][1]
y1_8 = block[0][1]+W
output_image[x1:x1_8,y1:y1_8] = [0,0,255]
x2 = block[1][0]
x2_8 = block[1][0]+W
y2 = block[1][1]
y2_8 = block[1][1]+W
output_image[x2:x2_8,y2:y2_8]=[0,255,0]
cv2.imwrite("output.jpg",output_image)
print "feature vectors extracted"
test_image = cv2.imread(sys.argv[1],0)
is checking the list provided by the commandline for a file name. For example if you invoked this script with:
$python myprog.py afilename.xxx
sys.argv would be ['myprog', 'afilename.xxx'], and this imread line would load an image from afilename.xxx.
If you don't provide that filename, sys.argv will only have the script name, and sys.argv[1] will raise this error.
I have some issues while reading txt files. What i have to do is read files ( about 360 ) and make a plot. Everything works except when there is a special character in my file such us: "". When my reading function finds that character it crashes. Is there any way to skip it? My code:
import os
import matplotlib.pyplot as plt
import numpy as np
i = 10
j = 0
X = []
Y = []
Z = []
k = 0
A = np.zeros([360,719])
for i in range(10,360,10):
X = []
Y = []
if len(str(i)) == 2:
data = open(dir + '\\150317_ScPONd_0%s_radio.txt'%i, 'r')
else:
data = open(dir + '\\150317_ScPONd_%s_radio.txt'%i, 'r')
z = data.readlines()
data.close()
for line in z:
if not line.startswith('$'):
data_2 = line.split('\t')
X.append(data_2[0])
Y.append(data_2[1])
A[j,:] = X
A[(j+1),:] = Y
And here is how my file looks like:
Is there any way to skip those "$" lines? Sorry for that picture, I have no idea how to attache It better.
Thaks to #user1753919 I have found an answer. If someone would be still interested in this, here is working code:
for i in range(10,360,10):
X = []
Y = []
if len(str(i)) == 2:
data = np.genfromtxt(dir + '\\150317_ScPONd_0%s_radio.txt'%i,skip_header = 12)
else:
data = np.genfromtxt(dir + '\\150317_ScPONd_%s_radio.txt'%i,skip_header = 12)
for line in data:
X.append(line[0])
Y.append(line[1])
A[j,:] = X
A[(j+1),:] = Y
plt.plot(A[j,:],A[(j+1),:],label = '{} K'.format(i))
plt.hold
j = j+2
genfromtxt is overkill.
np.loadtxt(file, comments='$')