Getting JSON objects from website using standard json and urllib2 - python

I wrote a code to extract JSON objects from the github website using json and requests:
#!/usr/bin/python
import json
import requests
r = requests.get('https://github.com/timeline.json') #Replace with your website URL
with open("a.txt", "w") as f:
for item in r.json or []:
try:
f.write(item['repository']['name'] + "\n")
except KeyError:
pass
This works perfectly fine. However, I want to do the same thing using urllib2 and standard json module. How do I do that? Thanks.

Simply download the data with urlopen and parse it with Python's json module:
import json
import urllib2
r = urllib2.urlopen('https://github.com/timeline.json')
with open("a.txt", "w") as f:
for item in json.load(r) or []:
try:
f.write(item['repository']['name'] + "\n")
except KeyError:
pass

Related

save response from API to csv file

I am a new learner to python. I am working with some python code that calls an api and get a response in csv format. I would like to know how can I save that csv response to a csv fie.
#! /usr/bin/env python
import httplib2
# These aren't needed, just for this example
from pprint import pformat
from datetime import datetime
import pytz
from pvlive_api import PVLive
import pandas as pd
import json
def post_elexon(url):
http_obj = httplib2.Http()
resp, content = http_obj.request(
uri=url,
method='GET',
headers={'Content-Type': 'application/xml; charset=UTF-8'},)
return resp, content
def main():
resp, content = post_elexon(url='https://api.bmreports.com/BMRS/B1770/v1?APIKey=MY_API_KEY&SettlementDate=2015-03-01&Period=1&ServiceType=csv',)
print ("===Response===")
print (resp)
print ("===Content===")
print (pformat(content))
print ("===Finished===")
if __name__ == "__main__":
main()
Any help, advice would be greatly appreciated.
Thank you
Try this:
import csv
with open('out.csv', 'w') as f:
writer = csv.writer(resp)
for line in resp.iter_lines():
writer.writerow(line.decode('utf-8').split(','))
Edit:
I tested your request - it returns a json.
so you can save it as json:
with open('response.json', 'w') as f:
json.dump(resp, f)

TypeError: descriptor 'split' requires a 'str' object but received a 'bytes'

I am trying to scrape data from ESPN Cricinfo using a python script available on Github. The code is the following.
import urllib.request as ur
import csv
import sys
import time
import os
import unicodedata
from urllib.parse import urlparse
from bs4 import BeautifulSoup
BASE_URL = 'http://www.espncricinfo.com'
for i in range(0, 6019):
url = 'http://search.espncricinfo.com/ci/content/match/search.html?search=first%20class;all=1;page='
soupy = BeautifulSoup(ur.urlopen(url + str(i)).read())
time.sleep(1)
for new_host in soupy.findAll('a', {'class' : 'srchPlyrNmTxt'}):
try:
new_host = new_host['href']
except:
continue
odiurl = BASE_URL + urlparse(new_host).geturl()
new_host = unicodedata.normalize('NFKD', new_host).encode('ascii','ignore')
print (new_host)
print (str.split(new_host, "/"))[4]
html = urllib2.urlopen(odiurl).read()
if html:
with open('espncricinfo-fc/{0!s}'.format(str.split(new_host, "/")[4]), "wb") as f:
f.write(html)
And the error is in this line.
print (str.split(new_host, "/"))[4]
TypeError: descriptor 'split' requires a 'str' object but received a 'bytes'
Anyhelp from you would be apperciated. Thanks
Use
str.split(new_host.decode("utf-8"), "/")[4]
.decode("utf-8") obviously being the most important part. That turns your byte object to a string.
On another note, be aware that urllib2 (which you're using but not importing, by the way) is no longer used (see this). Instead, you could use from urllib.request import urlopen.
EDIT: This is the full code that won't give you the error you described in your question. I am highlighting that because without the file previously created, the with open(...) statement will give you a FileNotFoundError.
import urllib.request as ur
import csv
import sys
import time
import os
import unicodedata
from urllib.parse import urlparse
from bs4 import BeautifulSoup
from urllib.request import urlopen
BASE_URL = 'http://www.espncricinfo.com'
for i in range(0, 6019):
url = 'http://search.espncricinfo.com/ci/content/match/search.html?search=first%20class;all=1;page='
soupy = BeautifulSoup(ur.urlopen(url + str(i)).read())
time.sleep(1)
for new_host in soupy.findAll('a', {'class' : 'srchPlyrNmTxt'}):
try:
new_host = new_host['href']
except:
continue
odiurl = BASE_URL + urlparse(new_host).geturl()
new_host = unicodedata.normalize('NFKD', new_host).encode('ascii','ignore')
print(new_host)
print(str.split(new_host.decode("utf-8"), "/")[4])
html = urlopen(odiurl).read()
if html:
with open('espncricinfo-fc/{0!s}'.format(str.split(new_host.decode("utf-8"), "/")[4]), "wb") as f:
f.write(html)

Python-3 Trying to iterate through a csv and get http response codes

I am attempting to read a csv file that contains a long list of urls. I need to iterate through the list and get the urls that throw a 301, 302, or 404 response. In trying to test the script I am getting an exited with code 0 so I know it is error free but it is not doing what I need it to. I am new to python and working with files, my experience has been ui automation primarily. Any suggestions would be gladly appreciated. Below is the code.
import csv
import requests
import responses
from urllib.request import urlopen
from bs4 import BeautifulSoup
f = open('redirect.csv', 'r')
contents = []
with open('redirect.csv', 'r') as csvf: # Open file in read mode
urls = csv.reader(csvf)
for url in urls:
contents.append(url) # Add each url to list contents
def run():
resp = urllib.request.urlopen(url)
print(self.url, resp.getcode())
run()
print(run)
Given you have a CSV similar to the following (the heading is URL)
URL
https://duckduckgo.com
https://bing.com
You can do something like this using the requests library.
import csv
import requests
with open('urls.csv', newline='') as csvfile:
errors = []
reader = csv.DictReader(csvfile)
# Iterate through each line of the csv file
for row in reader:
try:
r = requests.get(row['URL'])
if r.status_code in [301, 302, 404]:
# print(f"{r.status_code}: {row['url']}")
errors.append([row['url'], r.status_code])
except:
pass
Uncomment the print statement if you want to see the results in the terminal. The code at the moment appends a list of URL and status code to an errors list. You can print or continue processing this if you prefer.

printing json value to file

#!/usr/bin/python
import os
import json
import urllib
import urllib2
url = "https://www.example.com"
parameters = {'resource': 'aaaa',
'apikey': '1111'}
data = urllib.urlencode(parameters)
req = urllib2.Request(url, data)
response = urllib2.urlopen(req)
json_data = response.read()
with open("test.json") as json_file:
json_file.write(json_data)
print json_data
I dont see I use json again it was before json_data was used now.
As Martijn Pieters pointed out, the data is already encoded so you shouldn't need the json module at all in this case
You can just write the output to a file
json_data = response.read()
with open("test.json" , "w") as json_file:
json_file.write(json_data)
Since you already have json,
with open("test.json", "w") as f:
f.write(data)
Another thing to note here, you should not have a variable named json, because
import json
json = 'some string'
json.dumps("{'a':1}")
>> AttributeError: 'str' object has no attribute 'dumps'
if you want a variable named json you can use import json as j

Download a file providing username and password using Python

My file named as 'blueberry.jpg' begins downloading, when I click on the following url manually provided that the username and password are typed when asked:
http://example.com/blueberry/download
How can I make that happen using Python?
import urllib.request
url = 'http://example.com/blueberry/download'
data = urllib.request.urlopen(url).read()
fo = open('E:\\quail\\' + url.split('/')[1] + '.jpg', 'w')
print (data, file = fo)
fo.close()
However above program does not write the required file, how can I provide the required username and password?
Use requests, which provides a friendlier interface to the various url libraries in Python:
import os
import requests
from urlparse import urlparse
username = 'foo'
password = 'sekret'
url = 'http://example.com/blueberry/download/somefile.jpg'
filename = os.path.basename(urlparse(url).path)
r = requests.get(url, auth=(username,password))
if r.status_code == 200:
with open(filename, 'wb') as out:
for bits in r.iter_content():
out.write(bits)
UPDATE:
For Python3 get urlparse with: from urllib.parse import urlparse
I'm willing to bet you are using basic auth. So try doing the following:
import urllib.request
url = 'http://username:pwd#example.com/blueberry/download'
data = urllib.request.urlopen(url).read()
fo = open('E:\\quail\\' + url.split('/')[1] + '.jpg', 'w')
print (data, file = fo)
fo.close()
Let me know if this works.

Categories

Resources