Image saving with plt needs more and more time - python

I wrote a simple program to convert .wav to spectogram and save this as an png.
Here you go:
import numpy as np
import matplotlib.pyplot as plt
import scipy.io.wavfile as wavfile
import os
import time as t
DATAPATH = 'dataset' #path
CATEGORIES = ['zero','one','two','three','four','five','six','seven','eight','nine']
for categorie in CATEGORIES:
path = DATAPATH + '/' + categorie + '/'
filenames = os.listdir(path) #get all filenames in categorie
print(categorie)
i = 0
for file in filenames[:100]:
start = t.time()
Fs, aud = wavfile.read(path + file)
powerSpectrum, frequenciesFound, time, imageAxis = plt.specgram(aud, Fs=Fs)
plt.subplots_adjust(left=0, right=1, bottom=0, top=1) #cut axis
plt.axis('off')
plt.savefig('pics/' + categorie + '/' + str(i) + '.png')
ende = t.time()
print(i, str(ende-start)+'s')
i += 1
The problem is that the time per image getiing higher and higher (only for a few milisekonds) but at the thousand pic it will be like 10sek per pic. Thats why I stopp the time and print it out. Some solutions?

FTR, the solution seems to be cleaning the plot after every iteration using plt.clf():
for categorie in CATEGORIES:
# ...
for file in filenames[:100]:
# ...
# plt.savefigs(...)
plt.clf()
# ...

Related

How do you save the images that are embedded in the placeholders of a pptx file using python?

I can extract all the images from the shapes of a slide, as shown in the code below. The problem comes when an image is embedded in a placeholder. I have no idea how to get the images from those placeholders, and the documentation to me isn't clear.
Note also that I have a minimum width limit for the kind of images I want, hence I have "shape.width > 250000" in the code
import os
import pptx
from pptx.enum.shapes import MSO_SHAPE_TYPE
ppFileName = "Test.pptx"
directory = os.path.dirname(__file__)
imageDirectory = directory + "\\Images " + ppFileName.replace(".pptx","")
if not os.path.exists(imageDirectory):
os.makedirs(imageDirectory)
def saveImage(shape,slideNumber,imageNumber):
image = shape.image
imageBytes = image.blob
imageFileName = f"Slide {slideNumber} Image {imageNumber}.{image.ext}"
imagePath = imageDirectory + "\\" + imageFileName
with open(imagePath, 'wb') as file:
file.write(imageBytes)
imageNumber += 1
prs = pptx.Presentation(directory + "\\" + ppFileName)
slides = prs.slides
slideNumber = 0
for slide in slides:
imageNumber = 0
for shape in slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.PICTURE and shape.width > 250000:
saveImage(shape,slideNumber,imageNumber)
elif shape.shape_type == MSO_SHAPE_TYPE.GROUP and shape.width > 250000:
for s in shape.shapes:
saveImage(s,slideNumber,imageNumber)
slideNumber += 1
Alright, I figured it out.
Just added these three lines of code:
for shape in slide.placeholders:
if hasattr(shape, "image") and shape.width > 250000:
saveImage(shape,slideNumber,imageNumber)

Cutting .wav file into segments with the same length

i want to cut an .wav file into multiple segments with the same length.
I found this code: https://gist.github.com/kylemcdonald/c8e62ef8cb9515d64df4
But its splitted into parts based on onset detection with librosa. I assume that the answer to my question is simple, but i would appreciate any help.
That's the code i used with Python 3.7.6 on Ubuntu (in conda):
import matplotlib
import matplotlib.pyplot as plt # For displaying the output
import librosa
import numpy as np # For some mathematical operations
from glob import glob # To grab files
import os
# Set directory for cutted files
save_dir = './cut_4s'
### Load the audio_file
data_dir = './' # Set Path, in this case it looks at the path where this python file is
audio_files = glob(data_dir + '/*.wav') # Grab audio files (.wav) in the data_dir
found = len(audio_files)
print("Audiofiles found: " + str(found))
input("Press Enter to continue...")
y, sr = librosa.load(audio_files[0])
length = librosa.get_duration(y=y, sr=sr) # Get the length of the file
time = np.arange(0, len(y)) / sr # Create the time array (timeline)
print(str(length))
# Plot audio over time
fig, ax = plt.subplots()
ax.plot(time, y)
ax.set(xlabel='Time (s)', ylabel='Sound Amplitude')
plt.show()
C = np.abs(librosa.cqt(y=y, sr=sr))
o_env = librosa.onset.onset_strength(sr=sr, S=librosa.amplitude_to_db(C, ref=np.max))
#o_env = librosa.onset.onset_strength(y, sr=sr, feature=librosa.cqt)
onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr)
def prepare(y, sr=22050):
y = librosa.to_mono(y)
y = librosa.util.fix_length(y, sr) # 1 second of audio
y = librosa.util.normalize(y)
return y
def get_fingerprint(y, sr=22050):
y = prepare(y, sr)
cqt = librosa.cqt(y, sr=sr, hop_length=2048)
return cqt.flatten('F')
def normalize(x):
x -= x.min(axis=0)
x /= x.max(axis=0)
return x
def basename(file):
file = os.path.basename(file)
return os.path.splitext(file)[0]
vectors = []
words = []
filenames = []
onset_samples = list(librosa.frames_to_samples(onset_frames))
onset_samples = np.concatenate(onset_samples, len(y))
starts = onset_samples[0:-1]
stops = onset_samples[1:]
samples_folder = os.path.join(data_dir, 'samples')
try:
os.makedirs(samples_folder)
except:
pass
for i, (start, stop) in enumerate(zip(starts, stops)):
audio = y[start:stop]
filename = os.path.join(samples_folder, str(i) + '.wav')
librosa.output.write_wav(filename, audio, sr)
vector = get_fingerprint(audio, sr=sr)
word = basename(filename)
vectors.append(vector)
words.append(word)
filenames.append(filename)
np.savetxt(os.path.join(save_dir, 'vectors'), vectors, fmt='%.5f', delimiter='\t')
np.savetxt(os.path.join(save_dir, 'words'), words, fmt='%s')
np.savetxt(os.path.join(save_dir, 'filenames.txt'), filenames, fmt='%s')```

Use flask to get the URL from the user and compress the images

I want to get the url from the user for crawling and download the images. The downloaded images then need to be compressed. How can I create the UI using flask? Is it possible? I tried it many times but the application is never terminating after crawling the website due to which I am not able to compress the images. I don't have much idea about flask.
import urllib.request
from bs4 import BeautifulSoup
from PIL import Image
from io import BytesIO
def make_soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
i = 1
soup = make_soup("https://www.standford.com/")
unique_srcs = []
for img in soup.findAll('img'):
if img.get('src') not in unique_srcs:
unique_srcs.append(img.get('src'))
for img_src in unique_srcs:
filename = str(i)
i = i + 1
imagefile = open(filename + '.png', 'wb')
imagefile.write(urllib.request.urlopen("https://www.standford.com/" + img_src).read())
imagefile.close()
# response = urllib.request.urlopen("https://www.standford.com/" + img_src).read()
#im = Image.open(BytesIO(response))
#im.convert("RGB").save(filename + '.png')
For compression:
import os
import numpy as np
from skimage import io
from sklearn.cluster import MiniBatchKMeans
from matplotlib import pyplot as plt
import matplotlib.pyplot as mpimg
import cv2
algorithm = "full"
for f in os.listdir('.'):
if f.endswith('.png')
:
image = io.imread(f,0)
rows = image.shape[0]
cols = image.shape[1]
pixels = image.reshape(image.shape[0] * image.shape[1], image.shape[2])
kmeans = MiniBatchKMeans(n_clusters=128, n_init=10, max_iter=200)
kmeans.fit(pixels)
clusters = np.asarray(kmeans.cluster_centers_, dtype=np.uint8)
labels = np.asarray(kmeans.labels_, dtype=np.uint8)
labels = labels.reshape(rows, cols)
colored = clusters[labels]
# np.save('codebook'+f+'.npy', clusters)
io.imsave('compressed_' + f, colored)
img1 = mpimg.imread(f,0)
img2 = mpimg.imread('compressed_' + f,0)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 10))
ax1.imshow(img1)
ax1.set_title('Original image')
ax2.imshow(img2)
ax2.set_title('Compressed image')
plt.show()
fig, ax = plt.subplots(2, 1)
img = cv2.imread(f, 0)
ax[0].hist(img.ravel(), 256, [0, 256]);
ax[0].set_title("Original image")
img1 = cv2.imread('compressed_' + f,0)
ax[1].hist(img1.ravel(), 256, [0, 256]);
ax[1].set_title("Compressed image")
plt.show()
print('size of original image: ', int(os.stat(f).st_size / 1024), 'kB')
print('size of compressed image:', int(os.stat('compressed_' + f).st_size / 1024), 'kB')
Of course, you can do what you want in flask.
You should just keep an eye on flask and scrapy.
All what you need is a UI interface for web crawel.

Plotting Date vs time from files

I have folder names which are date time formated as
2018-08-21 to 2018-10-16
Inside each folder there is a zip files which contains time values which is a linear scale, the time goes up linearly.
I'm trying to plot for each day, which has a lot of .bz2 file time series data, the time value at that date.
Right now I'm trying to do that:
timearr = np.asarray(data1['time'])
ax.plot(np.asarray(timeStamps), timearr)
ax.set_title('title')
ax.set_ylabel('date vs time ')
ax.grid(True)
# Format the x-axis for dates (label formatting, rotation)
fig.autofmt_xdate(rotation=45)
fig.tight_layout()
plt.show()
but I get an error message, that both dimensions doesn't match.
dateStamps are list[2018-08-21
2018-08-22
2018-08-23
2018-08-24
2018-08-25]
data1['time'] = list of EPOC values.
Unfortunately, I don't know the detailed file structure, so I have to guess a little were the problem actually is
Here is some code to generate some folders with generic bz2 files:
import bz2
import numpy as np
import datetime
import os
startDate = datetime.datetime(2000,5,2,10,15,0,0)
for day in range(5):
theDate = startDate + datetime.timedelta(days=day)
folder = "{}".format( theDate.replace( microsecond = 0 ).strftime("%Y-%m-%d") )
os.mkdir( folder )
data = ""
for k in range(100):
zzz = theDate + datetime.timedelta(seconds=137*k)
data += "{} ".format( zzz.replace( microsecond = 0 ).strftime("%H:%M:%S") )
d = zzz.day
m = zzz.minute
data += " {}\n".format( .17 * d + .003 * m**2 -.001 * m )
myZip = bz2.BZ2File(os.path.join( folder, 'dat.bz2' ), 'w' )
myZip.write( data )
myZip.close()
Those folders and files a treat with:
import bz2
import numpy as np
import datetime
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
"""
SE posts I used
https://stackoverflow.com/questions/1574088/plotting-time-in-python-with-matplotlib
https://stackoverflow.com/questions/11264521/date-ticks-and-rotation-in-matplotlib
"""
def split_data( inData ):
rows=data.strip().split('\n')
rowcol = [x.split() for x in rows ]
x,y = zip(*rowcol)
y = [float(z) for z in y ]
x = [ datetime.datetime.strptime(z, '%H:%M:%S') for z in x]
return x,y
dataDict = dict()
for root, dirs, files in os.walk("."):
for name in files:
if name.split('.')[-1]=='bz2':
base = os.path.basename( root )
myPath = (os.path.join(root, name))
bz = bz2.BZ2File( myPath, 'r' )
data = bz.read()
dataDict[ base ] = split_data( data )
myFmt = mdates.DateFormatter('%H:%M')
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1 )
for key, dt in dataDict.iteritems():
ax.plot( *dt , label=key )
ax.xaxis.set_major_formatter(myFmt)
for label in ax.get_xmajorticklabels():
label.set_rotation(30)
ax.set_ylabel('data (arb. u.)')
ax.set_xlabel('time')
ax.legend( loc=0 )
plt.tight_layout()
plt.show()
Providing:
Hope I got it right.

I am trying to apply a python code to all the files in a directory but it gives me a error

I am trying to apply a python code to all the files in a directory but it gives me a error:
test_image = cv2.imread(sys.argv[1],0)
IndexError: list index out of range
I dont know what to change I tried few things but it does not help so if someone can help with this that would be great. And using stackoverflow for the first time, just to see how it works.
import sys
import cv2
import os
import numpy as np
from utils import pointsInsideCircle, compare, zigzag
from math import pi as PI
filepath = os.path.join("/Users/ssm/Desktop/X/1/Original Images", "*.tif")
W = 8 #block size for comparision
Dsim = 0.1 #threshold for symmetry
Nd = 25 #nearest block
quadrants_points = pointsInsideCircle(W/4) #(i,j) position of blocks which are partially/completely inside circle of radius W/2
zigzag_points = zigzag(W/2)
test_image = cv2.imread(sys.argv[1],0)
height,width = test_image.shape[:2]
#print (height,width)
vectors_list = []
for j in range(0,height-W+1):
for i in range(0,width-W+1):
block = test_image[j:j+W,i:i+W]
dct_block = cv2.dct(np.float32(block))
feature_block = [[],[],[],[]]
for index,coeff_list in enumerate(zigzag_points):
for coeff in coeff_list:
feature_block[index].append(dct_block[coeff[0],coeff[1]])
feature_block_np = np.array(feature_block)
feature_vector = []
for quadrant,points in quadrants_points.iteritems():
summ = 0
for point in points:
summ = summ + feature_block_np[point[0],point[1]]
feature_vector.append(summ/PI)
vectors_list.append(np.array(feature_vector))
vectors_list2 = cv2.sort(np.array(vectors_list),cv2.SORT_EVERY_ROW)
print "vectors calculated"
import json
with open('data.json', 'w') as outfile:
json.dump(vectors_list2.tolist(), outfile)
i=0
blocks = []
for i in range(0,len(vectors_list)):
if i%width == 0:
print i/width
posA = [i/width,i%width]
j = i+1
for j in range(i+1,len(vectors_list)):
posB = [j/width,j%width]
if compare(vectors_list[i],vectors_list[j],posA,posB,Dsim,Nd):
print (posA,posB)
blocks.append([posA,posB])
output_image = cv2.imread(sys.argv[1],1)
for block in blocks:
x1 = block[0][0]
x1_8 = block[0][0]+W
y1 = block[0][1]
y1_8 = block[0][1]+W
output_image[x1:x1_8,y1:y1_8] = [0,0,255]
x2 = block[1][0]
x2_8 = block[1][0]+W
y2 = block[1][1]
y2_8 = block[1][1]+W
output_image[x2:x2_8,y2:y2_8]=[0,255,0]
cv2.imwrite("output.jpg",output_image)
print "feature vectors extracted"
test_image = cv2.imread(sys.argv[1],0)
is checking the list provided by the commandline for a file name. For example if you invoked this script with:
$python myprog.py afilename.xxx
sys.argv would be ['myprog', 'afilename.xxx'], and this imread line would load an image from afilename.xxx.
If you don't provide that filename, sys.argv will only have the script name, and sys.argv[1] will raise this error.

Categories

Resources