Joining Keywords in a list NYT - python

For each article that a keyword list is returned for. We want to join all the words using the key --> values into a list, as shown below. I would like to strip out the ‘u’ from the list, before I do the append. Then we want to compare how many common words in both list and return the result.
Example lists returned from dic['keywords']:
Article one returns:
[
{
u'value': u'Dunford, Joseph F Jr',
u'name': u'persons',
u'rank': u'1'
},
{
u'value': u'Afghanistan',
u'name': u'glocations',
u'rank': u'1'
},
{
u'value': u'Afghan National Police',
u'name': u'organizations',
u'rank': u'1'
},
{
u'value': u'Afghanistan War (2001- )',
u'name': u'subject',
u'rank': u'1'
},
{
u'value': u'Defense and Military Forces',
u'name': u'subject',
u'rank': u'2'
}
]
Article two returns:
[
{
u'value': u'Gall, Carlotta',
u'name': u'persons',
u'rank': u'1'
},
{
u'value': u'Gannon, Kathy',
u'name': u'persons',
u'rank': u'2'
},
{
u'value': u'Niedringhaus, Anja (1965-2014)',
u'name': u'persons',
u'rank': u'3'
},
{
u'value': u'Kabul (Afghanistan)',
u'name': u'glocations',
u'rank': u'2'
},
{
u'value': u'Afghanistan',
u'name': u'glocations',
u'rank': u'1'
},
{
u'value': u'Afghan National Police',
u'name': u'organizations',
u'rank': u'1'
},
{
u'value': u'Afghanistan War (2001- )',
u'name': u'subject',
u'rank': u'1'
}
]
Desired Output:
List1 = ['Dunford, Joseph F Jr',’ Afghanistan’, ‘Afghan National Police’, ‘: Afghanistan War (2001- )’, ‘Defense and Military Forces’]
List2 = [‘Gall, Carlotta'’,’ u'Gannon, Kathy',’ Niedringhaus, Anja (1965-2014)’,’Afghanistan’]
Keywords in common: 2
My Code is as follows:
from flask import Flask, render_template, request, session, g, redirect, url_for
from nytimesarticle import articleAPI
api = articleAPI('X')
articles = api.search( q = 'Afghan War',
fq = {'headline':'', 'source':['Reuters','AP', 'The New York Times']},
begin_date = 20111231 )
def parse_articles(articles):
'''
This function takes in a response to the NYT api and parses
the articles into a list of dictionaries
'''
news = []
for i in articles['response']['docs']:
dic = {}
dic['id'] = i['_id']
if i['abstract'] is not None:
dic['abstract'] = i['abstract'].encode("utf8")
dic['headline'] = i['headline']['main'].encode("utf8")
dic['desk'] = i['news_desk']
dic['date'] = i['pub_date'][0:10] # cutting time of day.
dic['section'] = i['section_name']
dic['keywords'] = i['keywords']
print dic['keywords']
if i['snippet'] is not None:
dic['snippet'] = i['snippet'].encode("utf8")
dic['source'] = i['source']
dic['type'] = i['type_of_material']
dic['url'] = i['web_url']
dic['word_count'] = i['word_count']
# locations
locations = []
for x in range(0,len(i['keywords'])):
if 'glocations' in i['keywords'][x]['name']:
locations.append(i['keywords'][x]['value'])
dic['locations'] = locations
# subject
subjects = []
for x in range(0,len(i['keywords'])):
if 'subject' in i['keywords'][x]['name']:
subjects.append(i['keywords'][x]['value'])
dic['subjects'] = subjects
news.append(dic)
return(news)
print(parse_articles(articles))

You can use list comprehension to build lists from the given dict.
d = [{u'value': u'Dunford, Joseph F Jr', u'name': u'persons', u'rank': u'1'}, {u'value': u'Afghanistan', u'name': u'glocations', u'rank': u'1'}, {u'value': u'Afghan National Police', u'name': u'organizations', u'rank': u'1'}, {u'value': u'Afghanistan War (2001- )', u'name': u'subject', u'rank': u'1'}, {u'value': u'Defense and Military Forces', u'name': u'subject', u'rank': u'2'}]
print [v['value'] for v in d] # prints [u'Dunford, Joseph F Jr', u'Afghanistan', u'Afghan National Police', u'Afghanistan War (2001- )', u'Defense and Military Forces']

Related

how to add fields in google cloud fire store using python

i have two Json data data 1 and data 2 and i want to merge it in fire store document when i try to merge two dicts only half part is showing .
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA'
}
data2= {
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}
db.collection(u'cities').document(u'LA').set(data)
city_ref = db.collection(u'cities').document(u'LA')
city_ref.set({
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}, merge=True)
#only this part is showing
{
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
}
#when i, doing this
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA',
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'}
#only this much part is showing in my field
u'name': u'MIAMI',
u'state': u'CA',
u'country': u'USA'
is there any way to merge this two in python
You can only have one value per field name. Merge is useful when you want to add additional fields to the same document, for example:
data = {
u'name': u'Los Angeles',
u'state': u'CA',
u'country': u'USA'
}
db.collection(u'cities').document(u'LA').set(data)
city_ref = db.collection(u'cities').document(u'LA')
city_ref.set({
u'name2': u'MIAMI',
u'state2': u'CA',
u'country2': u'USA'
}, merge=True)
# Result
data = {
u'country': u'USA',
u'country2': u'USA',
u'name': u'Los Angeles',
u'name2': u'MIAMI',
u'state': u'CA',
u'state2': u'CA',}
You can use this code( it will add the new fields if they don't exist and update them if they exist already).
doc.reference.update({
u'newField1': newValue1
u'newField2': newValue2
})

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?

I am trying to get answer in json format but it simply returns a String.What am i doing wrong?
from flask import Flask, request, Response
import mysql.connector
import json
app = Flask(__name__)
def createConnection():
connection = mysql.connector.connect(user="akshay", password="akshay", host="localhost", database="BOOKS")
return connection
#app.route("/contact")
def getContacts():
print(request)
connection = createConnection()
cursor = connection.cursor()
cursor.execute("SELECT ID, NAME, AUTHOR, SUBJECT, PRICE FROM BOOKS;")
contacts = []
for (ID, NAME, AUTHOR, SUBJECT, PRICE) in cursor:
contacts.append({
'ID': ID,
'NAME': NAME,
"AUTHOR": AUTHOR,
"SUBJECT": SUBJECT,
"PRICE": PRICE
})
cursor.close()
connection.close()
response = Response()
response.content_type ='application/json'
response.data = json.loads(json.dumps(contacts))
return response
app.run('0.0.0.0', 4000)
And this is Output::
[{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
4, u'SUBJECT': u'BIG_DATA'}, {u'PRICE': 123.456, u'AUTHOR': u'Yashwant
Kanetkar', u'NAME': u'Exploring C', u'ID': 1001, u'SUBJECT': u'C
Programming'}, {u'PRICE': 371.019, u'AUTHOR': u'Yashwant Kanetkar',
u'NAME': u'Pointers in C', u'ID': 1002, u'SUBJECT': u'C Programming'},
{u'PRICE': 334.215, u'AUTHOR': u'E Balaguruswami', u'NAME': u'ANSI C
Programming', u'ID': 1003, u'SUBJECT': u'C Programming'}, {u'PRICE':
140.121, u'AUTHOR': u'Dennis Ritchie', u'NAME': u'ANSI C Programming', u'ID': 1004, u'SUBJECT': u'C Programming'}, {u'PRICE': 417.764,
u'AUTHOR': u'Herbert Schildt', u'NAME': u'C++ Complete Reference',
u'ID': 2001, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 620.665,
u'AUTHOR': u'Stanley Lippman', u'NAME': u'C++ Primer', u'ID': 2002,
u'SUBJECT': u'C++ Programming'}, {u'PRICE': 987.213, u'AUTHOR':
u'Bjarne Stroustrup', u'NAME': u'C++ Programming Language', u'ID':
2003, u'SUBJECT': u'C++ Programming'}, {u'PRICE': 525.121, u'AUTHOR':
u'Herbert Schildt', u'NAME': u'Java Complete Reference', u'ID': 3001,
u'SUBJECT': u'Java Programming'}, {u'PRICE': 575.651, u'AUTHOR': u'Cay
Horstmann', u'NAME': u'Core Java Volume I', u'ID': 3002, u'SUBJECT':
u'Java Programming'}, {u'PRICE': 458.238, u'AUTHOR': u'James Gosling',
u'NAME': u'Java Programming Language', u'ID': 3003, u'SUBJECT': u'Java
Programming'}, {u'PRICE': 567.391, u'AUTHOR': u'Peter Galvin',
u'NAME': u'Operatig System Concepts', u'ID': 4001, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 421.938, u'AUTHOR': u'Mauris J
Bach', u'NAME': u'Design of UNIX Operating System', u'ID': 4002,
u'SUBJECT': u'Operating Systems'}, {u'PRICE': 352.822, u'AUTHOR':
u'Uresh Vahalia', u'NAME': u'UNIX Internals', u'ID': 4003, u'SUBJECT':
u'Operating Systems'}, {u'PRICE': 872.652, u'AUTHOR': u'Ayn Rand',
u'NAME': u'Atlas Shrugged', u'ID': 8001, u'SUBJECT': u'Novell'},
{u'PRICE': 9999.999, u'AUTHOR': u'AKSHAY', u'NAME': u'AKSHAY', u'ID':
9001, u'SUBJECT': u'BIG_DATA'}]
There is nothing particularly wrong with your code, just remove the call to json.loads() so that the JSON encoded data is used as the response data.
Calling json.loads() will recreate and pass a Python data structure (list of dictionaries in this case) to the response, which will not serialise as valid JSON. In Python 2 it will serialise using the Python repr() format as produced by str() which will not always produce a JSON compatible string (the u'' string prefix for example is not valid JSON). In Python 3 is will probably cause an exception because it won't be able to convert to a bytes object.
You could improve the code by using flask.json.jsonify() which will return a Response object with the correct content type header and a JSON body:
from flask.json import jsonify
#app.route("/contact")
def getContacts():
# create contacts...
return jsonify(contacts)
Ok ,finally got the proper output::
/ 20171119101107
// http://localhost:4000/contact
[
{
"SUBJECT": "BIG_DATA",
"PRICE": 9999.999,
"AUTHOR": "AKSHAY",
"ID": 4,
"NAME": "AKSHAY"
},
{
"SUBJECT": "C Programming",
"PRICE": 123.456,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1001,
"NAME": "Exploring C"
},
{
"SUBJECT": "C Programming",
"PRICE": 371.019,
"AUTHOR": "Yashwant Kanetkar",
"ID": 1002,
"NAME": "Pointers in C"
},
I removed json.loads() and installed json viewer plugin.Thank you very much for help.

SugarCRM call records REST API

I am trying to get the call records from my SugarCRM account using the REST API and I am using Python.
There I want to obtain all the attendees but all I get is the user to whom the call is assigned.
u'assigned_user_id': u'xxxxxxxx',
The response I've received is,
{u'created_by_link': {u'id': u'1', u'full_name': u'adminx', u'_acl': {u'fields': {u'last_login': {u'write': u'no', u'create': u'no'}, u'pwd_last_changed': {u'write': u'no', u'create': u'no'}}, u'_hash': u'xxxx', u'delete': u'no'}}, u'dri_workflow_task_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'customer_journey_points': 10,
u'dri_subworkflow_id': u'',
u'recurrence_id': u'',
u'created_by_name': u'adminx',
u'date_end': u'2018-05-02T09:45:00+00:00',
u'dri_subworkflow_template_id': u'',
u'parent_type': u'Accounts',
u'contact_id': u'xxxx',
u'_acl': {u'fields': {}},
u'duration_minutes': 30,
u'tag': [],
u'assigned_user_name': u'xxxx',
u'repeat_ordinal': u'',
u'repeat_count': None,
u'contact_name': u'xxxx',
u'repeat_interval': 1, u'id': u'xxxx', > - u'parent_name': u'ABC',
u'customer_journey_parent_activity_id': u'',
u'date_entered': u'2017-07-17T12:49:23+00:00',
u'outlook_id': u'',
u'team_name': [{u'name_2': u'', u'selected': False, u'primary': True, u'id': u'1', u'name': u'Global'}, {u'name_2': u'', u'selected': False, u'primary': False, u'id': u'West', u'name': u'West'}],
u'contacts': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'xxx', u'id': u'xxx'},
u'dri_workflow_task_template_id': u'',
u'customer_journey_score': None,
u'date_start': u'2018-05-02T09:15:00+00:00',
u'reminder_checked': u'',
u'dri_workflow_sort_order': u'1',
u'created_by': u'1',
u'parent_id': u'xxxx',
u'dri_subworkflow_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'dri_subworkflow_name': u'',
u'dri_subworkflow_link': {u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'', u'id': u''},
u'modified_by_name': u'adminx',
u'repeat_selector': u'',
u'email_reminder_sent': False,
u'dri_workflow_template_id': u'',
u'status': u'Not Held',
u'direction': u'Outbound',
u'accept_status_users': u'',
u'repeat_dow': u'',
u'description': u'',
u'parent': {u'type': u'Accounts', u'_acl': {u'fields': [], u'_hash': u'xxxx'}, u'name': u'XYZ Funding Inc', u'id': u'xxxx'},
u'repeat_unit': u'',
u'deleted': False,
u'is_customer_journey_parent_activity': False,
u'customer_journey_parent_activity_type': u'',
u'locked_fields': [],
u'email_reminder_time': -1,
u'following': False,
u'assigned_user_link': {u'id': u'xxxx', u'full_name': u'xxxx', u'_acl': {u'fields': [], u'_hash': u'xxxx'}},
u'repeat_type': u'',
u'assigned_user_id': u'seed_sally_id',
u'team_count_link': {u'team_count': u'', u'id': u'1', u'_acl': {u'fields': [], u'_hash': u'xxxx'}},
u'dri_workflow_task_template_name': u'',
u'modified_user_link': {u'id': u'1', u'full_name': u'adminx', u'_acl': {u'fields': {u'last_login': {u'write': u'no', u'create': u'no'}, u'pwd_last_changed': {u'write': u'no', u'create': u'no'}}, u'_hash': u'xxx', u'delete': u'no'}},
u'email_reminder_checked': u'',
u'_module': u'Calls',
u'modified_user_id': u'1',
u'repeat_until': u'',
u'name': u'test',
u'date_modified': u'2017-07-17T12:49:23+00:00',
u'accept_status': u'',
u'reminder_time': -1,
u'customer_journey_progress': 0,
u'dri_workflow_template_name': u'',
u'my_favorite': False,
u'dri_subworkflow_template_name': u'',
u'dri_workflow_template_link': {u'_acl': {u'fields': [], u'_hash': u'xxx'}, u'name': u'', u'id': u''}, > - u'set_accept_links': u'',
u'repeat_days': u'',
u'is_customer_journey_activity': False,
u'repeat_parent_id': u'',
u'team_count': u'',
u'duration_hours': 0,
u'recurring_source': u''},
Strangely, the object which contains the list of "Guests" is not present in the standard GET request i.e.
https://{INSTANCE}/rest/v10/Calls/{RECORD_ID}
After doing some troubleshooting, and looking at the model in the web application itself, I found that the "Guests" field in the GUI ties back to a model property called "invitees".
Running a web request specifically referencing this field provides you with an array of records linked to the Call ID. So try running a GET request to this endpoint:
https://{INSTANCE}/rest/v10/Calls/{RECORD_ID}?fields=invitees
This should provide you with JSON akin to the below:
{
"id": "ec041f60-72b1-11e7-89f0-00163ef1f82f",
"date_modified": "2017-08-08T12:26:47+00:00",
"invitees": {
"records": [
{
"id": "cf378211-2b38-4fe5-949b-a53040717f04",
"date_modified": "2017-08-01T16:12:48+00:00",
"_acl": {
"fields": {}
},
"_module": "Users",
"_link": "users"
},
{
"id": "fe1740e6-3fa4-11e7-8fef-00163ef1f82f",
"date_modified": "2017-05-23T10:45:52+00:00",
"_acl": {
"fields": {}
},
"_module": "Contacts",
"_link": "contacts"
},
{
"id": "dcc526fc-72b1-11e7-a6dd-00163ef1f82f",
"date_modified": "2017-07-27T09:57:21+00:00",
"_acl": {
"fields": {}
},
"_module": "Leads",
"_link": "leads"
},
{
"id": "89f8a6d1-7df0-0e0b-3568-58a5bb6ecf34",
"date_modified": "2017-04-06T10:36:16+00:00",
"_acl": {
"fields": {}
},
"_module": "Leads",
"_link": "leads"
}
],
"next_offset": {
"contacts": -1,
"leads": -1,
"users": -1
}
},
"_acl": {
"fields": {}
},
"contact_name": "test",
"_module": "Calls"
}

Python :: nested JSON result in Spotify

I'm having a really hard time to get a track id in Spotify search endpoint.
It is deeply nested.
So, if I do this:
results = sp.search(q='artist:' + 'Nirvava + ' track:' + 'Milk it', type='track')
pprint.pprint(results)
I am able to get:
{u'tracks': {u'href': u'https://api.spotify.com/v1/search?query=artist%3ANirvana+track%3AMilk+it&type=track&offset=0&limit=10',
u'items': [{u'album': {u'album_type': u'album',
u'artists': [{u'external_urls': {u'spotify': u'https://open.spotify.com/artist/6olE6TJLqED3rqDCT0FyPh'},
u'href': u'https://api.spotify.com/v1/artists/6olE6TJLqED3rqDCT0FyPh',
u'id': u'6olE6TJLqED3rqDCT0FyPh',
u'name': u'Nirvana',
u'type': u'artist',
u'uri': u'spotify:artist:6olE6TJLqED3rqDCT0FyPh'}],
u'available_markets': [u'CA',
u'MX',
u'US'],
u'external_urls': {u'spotify': u'https://open.spotify.com/album/7wOOA7l306K8HfBKfPoafr'},
u'href': u'https://api.spotify.com/v1/albums/7wOOA7l306K8HfBKfPoafr',
u'id': u'7wOOA7l306K8HfBKfPoafr',
u'images': [{u'height': 640,
u'url': u'https://i.scdn.co/image/3dd2699f0fcf661c35d45745313b64e50f63f91f',
u'width': 640},
{u'height': 300,
u'url': u'https://i.scdn.co/image/a6c604a82d274e4728a8660603ef31ea35e9e1bd',
u'width': 300},
{u'height': 64,
u'url': u'https://i.scdn.co/image/f52728b0ecf5b6bfc998dfd0f6e5b6b5cdfe73f1',
u'width': 64}],
u'name': u'In Utero - 20th Anniversary Remaster',
u'type': u'album',
u'uri': u'spotify:album:7wOOA7l306K8HfBKfPoafr'},
u'artists': [{u'external_urls': {u'spotify': u'https://open.spotify.com/artist/6olE6TJLqED3rqDCT0FyPh'},
u'href': u'https://api.spotify.com/v1/artists/6olE6TJLqED3rqDCT0FyPh',
u'id': u'6olE6TJLqED3rqDCT0FyPh',
u'name': u'Nirvana',
u'type': u'artist',
u'uri': u'spotify:artist:6olE6TJLqED3rqDCT0FyPh'}],
u'available_markets': [u'CA', u'MX', u'US'],
u'disc_number': 1,
u'duration_ms': 234746,
u'explicit': False,
u'external_ids': {u'isrc': u'USGF19960708'},
u'external_urls': {u'spotify': u'https://open.spotify.com/track/4rtZtLpriBscg7zta3TZxp'},
u'href': u'https://api.spotify.com/v1/tracks/4rtZtLpriBscg7zta3TZxp',
u'id': u'4rtZtLpriBscg7zta3TZxp',
u'name': u'Milk It',
u'popularity': 43,
u'preview_url': None,
u'track_number': 8,
u'type': u'track',
-----> u'uri':u'spotify:track:4rtZtLpriBscg7zta3TZxp'},
QUESTION:
now, how do I fetch the last 'uri' (u'uri': u'spotify:track:4rtZtLpriBscg7zta3TZxp'}, under the name 'Milk It'?
>>> print results['tracks']['items'][0]['uri']
spotify:track:4rtZtLpriBscg7zta3TZxp

Python value error json

I am getting the following error for the following code and I am not able to solve the error. The error occures after the loop has created some files correctly.
I am using the following code:
import urllib
import json
symbolslist = open("C:/Users/Asus/Desktop/Python/web scraper/scraping/symbols.txt").read()
symbolslist = [s for s in symbolslist.split("\n") if s]
for symbol in symbolslist:
myfile = open("C:/Users/Asus/Desktop/Python/web scraper/scraping/year_data/" + symbol +".txt" , "w+")
myfile.close()
htmltext = urllib.urlopen("http://www.bloomberg.com/markets/api/bulk-time-series/price/"+symbol+"%3AUS?timeFrame=1_YEAR")
data = json.load(htmltext)
try:
datapoints = data[0]["price"]
except KeyError:
print 'Unknown symbol: {0}'.format(symbol)
continue
myfile = open("C:/Users/Asus/Desktop/Python/web scraper/scraping/year_data/" + symbol +".txt" , "a")
for point in datapoints:
myfile.write(str(symbol +","+ str(point['date']) +","+ str(point['value'])+"\n"))
myfile.close()
A symbol is for example AAPL (for Apple) and this is the link to bloomberg data for Apple:
http://www.bloomberg.com/markets/api/bulk-time-series/price/AAPL%3AUS?timeFrame=1_YEAR
This is the data:
[{u'lastPrice': 10.18, u'price': [{u'date': u'2014-12-26', u'value': 10.01}, {u'date': u'2014-12-30', u'value': 10.01}, {u'date': u'2015-01-07', u'value': 10}, {u'date': u'2015-01-09', u'value': 9.97}, {u'date': u'2015-01-12', u'value': 10}, {u'date': u'2015-01-13', u'value': 10.02}, {u'date': u'2015-01-14', u'value': 10.04}, {u'date': u'2015-01-16', u'value': 10.1}, {u'date': u'2015-01-20', u'value': 10.39}, {u'date': u'2015-01-21', u'value': 9.98}, {u'date': u'2015-01-22', u'value': 9.98}, {u'date': u'2015-01-23', u'value': 9.98}, {u'date': u'2015-01-26', u'value': 10.08}, {u'date': u'2015-01-27', u'value': 10.05}, {u'date': u'2015-01-29', u'value': 10.02}, {u'date': u'2015-02-02', u'value': 10.08}, {u'date': u'2015-02-06', u'value': 9.98}, {u'date': u'2015-02-09', u'value': 9.97}, {u'date': u'2015-03-06', u'value': 10.07}, {u'date': u'2015-03-10', u'value': 10.1}, {u'date': u'2015-03-25', u'value': 10.3}, {u'date': u'2015-03-26', u'value': 10.01}, {u'date': u'2015-03-27', u'value': 10.02}, {u'date': u'2015-03-30', u'value': 10.02}, {u'date': u'2015-03-31', u'value': 10.02}, {u'date': u'2015-04-01', u'value': 10.02}, {u'date': u'2015-04-02', u'value': 10.19}, {u'date': u'2015-04-10', u'value': 10.3}, {u'date': u'2015-04-17', u'value': 10.4}, {u'date': u'2015-05-01', u'value': 10.39}, {u'date': u'2015-05-07', u'value': 10.35}, {u'date': u'2015-05-08', u'value': 10.35}, {u'date': u'2015-05-14', u'value': 10.28}, {u'date': u'2015-06-10', u'value': 10.15}, {u'date': u'2015-06-16', u'value': 10.34}, {u'date': u'2015-06-25', u'value': 10.3}, {u'date': u'2015-06-29', u'value': 10.27}, {u'date': u'2015-07-07', u'value': 10.3599}, {u'date': u'2015-07-08', u'value': 10.36}, {u'date': u'2015-07-15', u'value': 10.37}, {u'date': u'2015-07-30', u'value': 10.27}, {u'date': u'2015-08-13', u'value': 10.1}, {u'date': u'2015-08-14', u'value': 10.1}, {u'date': u'2015-08-24', u'value': 10.14}, {u'date': u'2015-08-25', u'value': 10.14}, {u'date': u'2015-08-27', u'value': 10.17}, {u'date': u'2015-08-31', u'value': 10.2}, {u'date': u'2015-09-01', u'value': 10.25}, {u'date': u'2015-09-03', u'value': 10.24}, {u'date': u'2015-09-04', u'value': 10.35}, {u'date': u'2015-09-15', u'value': 10.2401}, {u'date': u'2015-09-21', u'value': 10.22}, {u'date': u'2015-09-22', u'value': 10.22}, {u'date': u'2015-09-28', u'value': 10.2933}, {u'date': u'2015-10-02', u'value': 10.4}, {u'date': u'2015-10-06', u'value': 10.44}, {u'date': u'2015-10-12', u'value': 10.22}, {u'date': u'2015-10-13', u'value': 10.21}, {u'date': u'2015-10-22', u'value': 10.25}, {u'date': u'2015-10-28', u'value': 10.15}, {u'date': u'2015-11-04', u'value': 10.15}, {u'date': u'2015-12-09', u'value': 10}, {u'date': u'2015-12-16', u'value': 10.18}], u'priceMinDecimals': 2, u'nyTradeEndTime': u'16:30:00.000', u'lastUpdateDate': u'2015-12-16', u'nyTradeStartTime': u'09:30:00.000', u'id': u'TFSCU:US', u'timeZoneOffset': -5, u'dateTimeRanges': {}}]
The problem I found is that there exists no data for some symbols, that means the data shown on the URL is: {u'securityType': u'UNKNOWN'}. This occures for example for following symbols: AGNCB, AGNCP, MTGEP, ASRVP, AMSGP, AGIIL, ARWAU, BANFP, BKEPP, BPFHP, BBEPP.
The Problem still is how I can exclude missing data, without interrupting the loop? And why do I get an ValueError after the loop correctly has created 440 files?
The current Error:
Traceback (most recent call last):
File "<pyshell#27>", line 1, in <module>
execfile("C:\Users\Asus\Desktop\Python\web scraper\scraping\stock_to_file.py")
File "C:\Users\Asus\Desktop\Python\web scraper\scraping\stock_to_file.py", line 11, in <module>
data = json.load(htmltext)
File "C:\Python27\lib\json\__init__.py", line 291, in load
**kw)
File "C:\Python27\lib\json\__init__.py", line 339, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 364, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 382, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
According to your comment, the response you're getting from Bloomberg is:
{u'securityType': u'UNKNOWN'}
Bloomberg will return this if it doesn't recognize the symbol you're requesting. If you want to identify the bad symbols and continue with the script for the good ones, you can do one of:
1. Detect the error response, take any desired action, and move to the next (LBYL)
data = json.load(htmltext)
if u'securityType' in data and data[u'securityType'] == u'UNKNOWN':
print 'Unknown symbol: {0}'.format(symbol)
continue
2. Go ahead with the processing and capture the exception (EAFP)
try:
datapoints = data[0]["price"]
except KeyError:
print 'Unknown symbol: {0}'.format(symbol)
continue
It's a stylistic choice. In either case, the important thing is to continue to go to the next symbol once you detect the bad response.

Categories

Resources