I am trying to show the progress of csv download but I have no idea how to make it using the code bellow:
for url, filename in zip(urls, filenames):
r = requests.get(url, stream=True,verify=False)
with open(r'C:\Users\Lucas\output\ ' + filename + ' - ' + dia.zfill(2) + '_' + mes.zfill(2) + '_' + ano + '.csv', 'wb') as fd:
for chunk in r.iter_content(chunk_size=256):
fd.write(chunk)
How could I make that? Thanks for helping
Related
Problem: I wanted to get the data from the following url, however, I got the following error message.
I was wondering if you could guide me to fix my error. I appreciate your time!
import requests
import os
urls = {'1Q16':'https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip'}
if not os.path.isdir('data'):
os.system('mkdir data')
for file in urls.keys():
if not os.path.exists('data/' + file):
os.system('mkdir ./data/' + file)
print('Requesting response from: ' + urls[file])
req = requests.get(urls[file])
print('Writing response to: /data/' + file + '/' + file + '.zip')
with open('data/' + file + '/' + file + '.zip', 'wb') as f:
f.write(req.content)
os.system('unzip ' + 'data/' + file + '/' + file + '.zip -d data/' + file + '/')
print('Unzipping data...')
os.system('rm ' + 'data/' + file + '/' + file + '.zip')
print(file + ' complete.')
print('------------------------------------------------------------------------------- \n')
Error messegae
Requesting response from: https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip
Writing response to: /data/1Q16/1Q16.zip
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-9-251ee1e9c629> in <module>
9 req = requests.get(urls[file])
10 print('Writing response to: /data/' + file + '/' + file + '.zip')
---> 11 with open('data/' + file + '/' + file + '.zip', 'wb') as f:
12 f.write(req.content)
13
FileNotFoundError: [Errno 2] No such file or directory: 'data/1Q16/1Q16.zip'
The problem is your directory data/<file> is not being created and hence open() can not open a file since a part of path you provided does not exist. To ensure you have full compatibity while joining paths on python, you can use os.path.join(). For you, this would be:
import requests
import os
urls = {'1Q16':'https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip'}
if not os.path.isdir('data'):
os.makedirs("data")
for file in urls.keys():
if not os.path.exists('data/' + file):
os.makedirs(os.path.join("data",file))
print('Requesting response from: ' + urls[file])
req = requests.get(urls[file])
print('Writing response to: /data/' + file + '/' + file + '.zip')
with open(os.path.join("data", file, file + '.zip', 'wb') as f:
f.write(req.content)
I have some code that I wrote that downloads images from a website. The way that it currently works it needs to guess what the file extension will be for the url it will be downloading from. The block of code that does that looks like this:
for imageLink in imageLinks:
try:
urllib.request.urlretrieve(imageLink + ".png", str(threadName) + "/" + str(count) + ".png")
except:
try:
urllib.request.urlretrieve(imageLink + ".jpg",str(threadName) + "/" + str(count) + ".png")
except:
try:
urllib.request.urlretrieve(imageLink + ".gif",str(threadName) + "/" + str(count) + ".gif")
except:
urllib.request.urlretrieve(imageLink + ".webm",str(threadName) + "/" + str(count) + ".webm")
As it stands the code is relying on a fail in order to try something else.
I wanted to know if their is a way to have this functionality but to basically just look better. These methods will give identical errors if they fail so I want to just go through them sequentially until one works
for ext in ('.png', '.jpg', '.gif', '.webm'):
try:
urllib.request.urlretrieve(imageLink + ext, str(threadName) + "/" + str(count) + ext)
break
except:
pass
You can use a try/except block inside a function and return None if the control goes to the except statement. You can optimize the for loop according to your own needs. One example is here:
def get_url(link1, link2):
try:
requestData = urllib.request.urlretrieve(link1, link2)
except:
return None
return requestData
for imageLink in imageLinks:
data = urllib.request.urlretrieve(imageLink + ".png", str(threadName) + "/" + str(count) + ".png")
if data == None:
data = urllib.request.urlretrieve(imageLink + ".jpg",str(threadName) + "/" + str(count) + ".png")
if data == None:
data = urllib.request.urlretrieve(imageLink + ".gif",str(threadName) + "/" + str(count) + ".gif")
if data == None:
urllib.request.urlretrieve(imageLink + ".webm",str(threadName) + "/" + str(count) + ".webm")
Please can someone help me?
I have been trying to write in an html file that I create myself, a bunch of jpg files just to display them, but I can't seem to do anything, and there have been so many errors till I got even anywhere...
Can anyone please help, I have no experience in html from python.
Here's the code:
def download_images(img_urls, dest_dir):
#html_file = open("index.html", 'rb')
html_file = open("index.html", 'w')
print("Retrieving...")
html_file.write("""<verbatim>""")
html_file.write("""<html>""")
html_file.write("""<body>""")
for url,i in zip(img_urls,range(len(img_urls))):
image_opened = urllib.request.urlopen(url)
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg")
img_tag = r'"""<img"""' + str(i) + r' src="/edu/python/exercises/img' + str(i) + r'"""">"""'.format(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
html_file = open("index.html", 'w')
html_file.write(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
#print('<img' + str(i) + ' src="/edu/python/exercises/img"' + str(i) + '>')
html_file.write(r""""</body>""""")
html_file.write("""</html>""")
html_file.close()
I will go through what you have so far and comment on it.
The first few bits look okay until
image_opened = urllib.request.urlopen(url)
This line opens a stream to the requested url, you don't do anything with this and you don't need it as you download the image using:
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg")
You then create the html img line, which you have overcomplicated a bit. You are trying to produce a line that reads something like this:
<img src='img1.jpg' />
What you seem to be doing in:
img_tag = r'"""<img"""' + str(i) + r' src="/edu/python/exercises/img' + str(i) + r'"""">"""'.format(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
Is starting to create the right string but then you attempt to download the jpg again. You just need to do create the string as follows:
img_tag = "<img src='src" + str(i) + ".jpg' />"
You then open the html output file again using:
html_file = open("index.html", 'w')
You don't need to do this as you still have the file open from when you opened it at the beginning of the method.
You then attempt to write the html string to the file doing;
html_file.write(urllib.request.urlretrieve(url, "img" + str(i) + ".jpg"))
This is instead trying to download the jpg again and output the result into the html file. Instead you want to write the img_tag by:
html_file.write(img_tag)
You write the end of the file okay and close it.
html_file.write(r""""</body>""""")
html_file.write("""</html>""")
html_file.close()
Once you fix this you should have a function that looks like:
import urllib.request
def download_images(img_urls, dest_dir):
#html_file = open("index.html", 'rb')
html_file = open("index.html", 'w')
print("Retrieving...")
html_file.write("<html>")
html_file.write("<body>")
for url,i in zip(img_urls,range(len(img_urls))):
urllib.request.urlretrieve(url, "img" + str(i) + ".jpg") # Downloads image
img_tag = "<img src='img" + str(i) + ".jpg' />"
html_file.write(img_tag)
html_file.write("</body>")
html_file.write("</html>")
html_file.close()
That you can call with something like:
download_images(["a.jpg","b.jpg"],"")
I have many subdirectories in my main directory and would like to write a script to unzip and convert all the files within it. If possible, I would also like to combine all the CSV within a single directory into a single CSV. But more importantly, I need help with my nested loop.
import gzip
import csv
import os
subdirlist = os.listdir('/home/user/Desktop/testloop')
subtotal = len(subdirlist)
subcounter = 0
for dirlist in subdirlist:
print "Working On " + dirlist
total = len(dirlist)
counter = 0
for dir in dirlist:
print "Working On " + dir
f = gzip.open('/' + str(subdirlist) + '/' + dir, 'rb')
file_content = f.read()
f.close()
print "25% Complete"
filename = '/' + str(subdirlist) + '/temp.txt'
target = open(filename, 'w')
target.write(file_content)
target.close()
print "50% Complete!"
csv_file = '/' + str(subdirlist) + '/' + str(dir) + '.csv'
in_txt = csv.reader(open(filename, "rb"), delimiter = '\t')
out_csv = csv.writer(open(csv_file, 'wb'))
out_csv.writerows(in_txt)
os.remove(filename)
os.remove('/' + str(subdirlist) + '/' + dir)
counter+=1
print str(counter) + "/" + str(total) + " " + str(dir) + " Complete!"
print "SubDirectory Converted!"
print str(subcounter) + "/" + str(subtotal) + " " + str(subdirlist) + " Complete!"
subcounter+=1
print "All Files Converted!"
Thanks in advance
To get lists of files and subdirectories, you can use os.walk. Below is an implementation I wrote to get all files (optionally, of certain type(s)) in arbitrarily nested subdirectories:
from os import walk, sep
from functools import reduce # in Python 3.x only
def get_filelist(root, extensions=None):
"""Return a list of files (path and name) within a supplied root directory.
To filter by extension(s), provide a list of strings, e.g.
get_filelist(root, ["zip", "csv"])
"""
return reduce(lambda x, y: x+y,
[[sep.join([item[0], name]) for name in item[2]
if (extensions is None or
name.split(".")[-1] in extensions)]
for item in walk(root)])
I would like to export the result of my code in a csv file (exel) and print in it but with respecting the Header,row.
Now it print like a normal text in the exel with no consider the row.
like :
i would like like this :
Any idea to make it possible ?
thanks
#Source : http://www.wunderground.com/weather/api/d/docs?d=resources/code-samples
import urllib2
import json
import time
import csv
from datetime import datetime#set the time
f = urllib2.urlopen('http://api.wunderground.com/api/8d3b5d3fa03ddb6f/conditions/weather/q/China/Beijing.json')
now = datetime.now()
current_year = now.year
current_day = now.day
current_month = now.month
current_hour = now.hour
current_minute = now.minute
current_second = now.second
json_string = f.read()
parsed_json = json.loads(json_string)
locations = parsed_json.get('locations', 'Beijing')
temp_f = parsed_json['current_observation']['temp_f']
weather = parsed_json['current_observation']['weather']
#--- Open the file + write on it ---
f = open('out.csv','a')
header = "location,temperature,weather,date\n"
date = str(now.month) + "/" + str(now.day) + "/" + str(now.year) + " " + str(now.hour) + ":" + str(now.minute) + ":" + str(now.second)
f.write(header)
f.write(','.join([locations,str(temp_f),weather,date]))
f.write('\n')
f.close()
# --- And Close the file ---
after the great help of seth
it print this :
Try something like
header = "location\ttemperature\tweather\date\n"
f.write(header)
for i in xrange(len(weather)):
f.write('\t'.join([locations[i],temp_f[i],weather[i],str(now.day)]))
f.write('\n')
f.close()
or the equivalent with no for loop, if you're writing inside the loop you're parsing with.
edit: or more explicitly:
f = open('out.csv','a') #open in append mode if you're already written some of the file.
header = "location\ttemperature\tweather\date\n" #only write this if you haven't already.
date = str(now.month) + "/" + str(now.day) + "/" + str(now.year) + " " + str(now.hour) + ":" + str(now.minute) + ":" + str(now.second)
f.write(header)
f.write('\t'.join([locations,temp_f,weather,date]))
f.write('\n')
f.close()