python writing jpg files into a new html file - python

Please can someone help me?
I have been trying to write in an html file that I create myself, a bunch of jpg files just to display them, but I can't seem to do anything, and there have been so many errors till I got even anywhere...
Can anyone please help, I have no experience in html from python.
Here's the code:
def download_images(img_urls, dest_dir):
#html_file = open("index.html", 'rb')
html_file = open("index.html", 'w')
print("Retrieving...")
html_file.write("""<verbatim>""")
html_file.write("""<html>""")
html_file.write("""<body>""")
for url,i in zip(img_urls,range(len(img_urls))):
image_opened = urllib.request.urlopen(url)
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg")
img_tag = r'"""<img"""' + str(i) + r' src="/edu/python/exercises/img' + str(i) + r'"""">"""'.format(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
html_file = open("index.html", 'w')
html_file.write(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
#print('<img' + str(i) + ' src="/edu/python/exercises/img"' + str(i) + '>')
html_file.write(r""""</body>""""")
html_file.write("""</html>""")
html_file.close()

I will go through what you have so far and comment on it.
The first few bits look okay until
image_opened = urllib.request.urlopen(url)
This line opens a stream to the requested url, you don't do anything with this and you don't need it as you download the image using:
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg")
You then create the html img line, which you have overcomplicated a bit. You are trying to produce a line that reads something like this:
<img src='img1.jpg' />
What you seem to be doing in:
img_tag = r'"""<img"""' + str(i) + r' src="/edu/python/exercises/img' + str(i) + r'"""">"""'.format(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
Is starting to create the right string but then you attempt to download the jpg again. You just need to do create the string as follows:
img_tag = "<img src='src" + str(i) + ".jpg' />"
You then open the html output file again using:
html_file = open("index.html", 'w')
You don't need to do this as you still have the file open from when you opened it at the beginning of the method.
You then attempt to write the html string to the file doing;
html_file.write(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
This is instead trying to download the jpg again and output the result into the html file. Instead you want to write the img_tag by:
html_file.write(img_tag)
You write the end of the file okay and close it.
html_file.write(r""""</body>""""")
html_file.write("""</html>""")
html_file.close()
Once you fix this you should have a function that looks like:
import urllib.request
def download_images(img_urls, dest_dir):
#html_file = open("index.html", 'rb')
html_file = open("index.html", 'w')
print("Retrieving...")
html_file.write("<html>")
html_file.write("<body>")
for url,i in zip(img_urls,range(len(img_urls))):
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg") # Downloads image
img_tag = "<img src='img" + str(i) + ".jpg' />"
html_file.write(img_tag)
html_file.write("</body>")
html_file.write("</html>")
html_file.close()
That you can call with something like:
download_images(["a.jpg","b.jpg"],"")

Related

Save files to a new folder with python

This code saves text files from a data frame of sentences, then saves each one as a ssml file.
How can I get the sentences to be saved in a new folder?
max = len(sentences)
for i in range(0,max):
txt = sentences[i]
new_txt = starter + txt + ender
print(new_txt)
num = num + 1
with open("text" + str(num) + ".ssml", 'w+') as f:
f.writelines(new_txt)
Add this at the start:
import os
folder_name = 'my_folder'
os.makedirs(folder_name, exist_ok=True)
Then change:
with open("text" + str(num) + ".ssml", 'w+') as f:
to:
with open(f'{folder_name}\\text{num}.ssml', 'w+') as f:

How to show python execution progress

I am trying to show the progress of csv download but I have no idea how to make it using the code bellow:
for url, filename in zip(urls, filenames):
r = requests.get(url, stream=True,verify=False)
with open(r'C:\Users\Lucas\output\ ' + filename + ' - ' + dia.zfill(2) + '_' + mes.zfill(2) + '_' + ano + '.csv', 'wb') as fd:
for chunk in r.iter_content(chunk_size=256):
fd.write(chunk)
How could I make that? Thanks for helping

Repair a PDF file with unsupported filters

My code takes a list of PDF strings and combines creates PDFs by category. I am using PyPDF2 to combine and bookmark files. I am getting 'Unsupported PNG Filter 3' and 'Unsupported PNG Filter 4' when dealing with some files. I have found that if I manually open the file in Adobe and save over itself with the same name, the problem is corrected most of the time. Is there a way to do this programmatically? I can open the file but cannot figure out how to save and close it. Ideally this would run in the background.
def repair_file(self, path):
#Open PDF
subprocess.Popen([path], shell=True)
#Want a way to simply save like this
#subprocess.save([path], shell=True)
Alternatively, does anyone have a better solution for "repairing" files?
def repair_file(self, path):
#Open PDF
subprocess.Popen([path], shell=True)
#subprocess.write([path], shell=True)
#Tried
#with open(path, 'wb') as f:
# f.write(path)
#print('File Saved')
def combine_pdf(self, isos, pkg): #Unsupprted PNG Filter 4
#Combine PDFs
dir_name = r"E:\Test Folder\Y\Combined Isometrics"
suffix = ".pdf"
merger = PdfFileMerger(strict=False)
for iso in isos:
path = os.path.join(dir_name, iso + suffix)
print(pkg," : ",path)
bookmark = iso
try:
merger.append(open(path, 'rb'),bookmark)
except Exception as e:
#self.repair_file(pdf)
log = 'Error adding drawing: ' + str(pkg) + " : " + str(iso) + '\n' + '__Error: ' + str(e)
line = ",".join(str(iso)+str(pkg)+str(e))
stat = "Failed"
self.add_line(stat,line)
self.write2log(log)
try:
self.repair_file(path)
merger.append(open(path, 'rb'),bookmark)
except Exception as e:
#self.repair_file(pdf)
log = 'Error repairing: ' + str(pkg) + " : " + str(iso) + '\n' + '__Error: ' + str(e)
self.write2log(log)
try:
merger.write(pkg + ".pdf")
merger.close()
except Exception as e:
log = 'Error saving Package: ' + pkg + '\n' + '__Error: ' + str(e)
self.write2log(log)
EDIT
I know this is an old question, but I ran into a "filter" error as well. Using qpdf to copy all pages into a new file resolved it.
qpdf --empty --pages infile.pdf -- outfile.pdf

Problem opening up text file of the downloadPath with gedit

After downloading the dependencies from nexus, I have a download path for the data to be in, but I wasn't able to open the textile its not responding, why is this so?
for item in data["items"]:
for asset in item["assets"]:
fileurl = asset["downloadUrl"]
print(fileurl)
downloadPath = '/home/centos/'
filename = downloadPath + fileurl.split('/')[-1]# '\' for Windows
outfile = open(filename, "w")
outfile.write(str(urllib.request.urlopen(fileurl).read()))
outfile.close()
if data["continuationToken"] is None:
sys.exit()
else:
#construct pagination url and loop
url = baseurl + 'components?continuationToken=' + data["continuationToken"] + '&repository=' + downloadRepository
return

BeautifulSoup xml get class name value

I am using BeautifulSoup to parse Tableau twb XML files to get list of worksheets in the report.
The XML that holds the value I am looking for is
<window class='worksheet' name='ML Productivity'>
Struggling with how to get all of the class='worksheet' and then get the name value from those eg I want to get the 'ML Productivity' value.
Code I have so far is below.
import sys, os
import bs4 as bs
twbpath = "C:/tbw tbwx files/"
outpath = "C:/out/"
outFile = open(outpath + 'output.txt', "w")
#twbList = open(outpath + 'twb.txt', "w")
for subdir, dirs, files in os.walk(twbpath):
for file in files:
if file.endswith('.twb'):
print(subdir.replace(twbpath,'') + '-' + file)
filepath = open(subdir + '/' + file, encoding='utf-8').read()
soup = bs.BeautifulSoup(filepath, 'xml')
classnodes = soup.findAll('window')
for classnode in classnodes:
if str(classnode) == 'worksheet':
outFile.writelines(file + ',' + str(classnode) + '\n')
print(subdir.replace(twbpath,'') + '-' + file, classnode)
outFile.close()
You can filter the desired window element by the class attribute value and then treat the result like a dictionary to get the desired attribute:
soup.find('window', {'class': 'worksheet'})['name']
If there are multiple window elements you need to locate, use find_all():
for window in soup.find_all('window', {'class': 'worksheet'}):
print(window['name'])

Categories

Resources