KeyErrors while reading Twitter json files in Python - python

I am trying to analyze a json file with data I have collected from twitter, but when I try to search for a keyword it says it is not found, but I can see it is there. I tried this two different ways. I'll post them below. Any advice would be great.
Attempt #1:
import sys
import os
import numpy as np
import scipy
import matplotlib.pyplot as plt
import json
import pandas as pan
tweets_file = open('twitter_data.txt', "r")
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
tweets = pan.DataFrame()
tweets['text'] = map(lambda tweet: tweet['text'], tweets_data)
Attempt #2: Same previous steps, but did a loop instead
t=tweets[0]
tweet_text = [t['text'] for t in tweets]
Error:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 1, in <lambda>
KeyError: 'text'
If I print tweets_data, this I is what I see. 'text',etc, is definitely, there. Am I missing a character?
>>> print(tweet_data[0])
{u'contributors': None, u'truncated': False, u'text': u'RT
#iHippieVibes: \u2b50\ufe0fFAV For This Lace Cardigan \n\nUSE Discount
code for 10% off: SOLO\n\nFree Shipping\n\nhttp://t.co/d8kiIt3J5f
http://t.c\u2026', u'in_reply_to_status....
(pasted only part of the output)
Thanks! Any suggestions would be greatly appreciated.

Not all your tweets have a 'text' key. Filter those out or use dict.get() to return a default:
tweet_text = [t['text'] for t in tweets if 'text' in t]
or
tweet_text = [t.get('text', '') for t in tweets]

Related

Error while writing API result to another JSON file

I am working on AZURE Cognitive API Search. While getting the result from API, I want to write it into a new JSON File. I tried to access the analyse_result variable with the line but it does not work. It shows that the object is not JSON Serializable. My code is-
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
from array import array
import os
from PIL import Image
import sys
import time
import json
import csv
subscription_key = ""
endpoint = ""
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
def azure_ocr_api(): #image_url
local_image_url = r"E:\Bank of Baroda\BOB IMAGE\Cheque309086.jpeg"
# read_response = computervision_client.read_in_stream(open("./Images/" + image_url,'rb'), raw=True)
read_response = computervision_client.read_in_stream(open(local_image_url,'rb'), raw=True)
# Get the operation location (URL with an ID at the end) from the response
read_operation_location = read_response.headers["Operation-Location"]
# Grab the ID from the URL
operation_id = read_operation_location.split("/")[-1]
# Call the "GET" API and wait for it to retrieve the results
while True:
read_result = computervision_client.get_read_result(operation_id)
if read_result.status not in ['notStarted', 'running']:
break
time.sleep(1)
list = []
if read_result.status == OperationStatusCodes.succeeded:
for text_result in read_result.analyze_result.read_results:
for line in text_result.lines:
with open('data.json', 'w', encoding='utf-8') as f:
json.dump(line, f, ensure_ascii=False, indent=4)
# print(list)
# pass
# return list
azure_ocr_api()
print("End of Computer Vision quickstart.")
The code shows a error like this -
Traceback (most recent call last):
File "e:\Bank of Baroda\m.py", line 44, in <module>
azure_ocr_api()
File "e:\Bank of Baroda\m.py", line 40, in azure_ocr_api
json.dump(line, f, ensure_ascii=False, indent=4)
File "C:\Users\Clasher\anaconda3\lib\json\__init__.py", line 179, in dump
for chunk in iterable:
File "C:\Users\Clasher\anaconda3\lib\json\encoder.py", line 438, in _iterencode
o = _default(o)
File "C:\Users\Clasher\anaconda3\lib\json\encoder.py", line 179, in default
TypeError: Object of type Line is not JSON serializable
Please help.

I don't understan why checksum error occurs

I found on Internet pynmea2 library, that used the parse(data, check=False) function, which takes a string containing a NMEA 0183 sentence and returns a NMEASentence object.
I try to write some easy (very easy) code to understand functioning:
import pynmea2
def main():
f = open("file.nmea", "r")
for line in f.readlines():
msg = pynmea2.parse(line)
print(str(msg))
So, I read sentences from a file and passed them to parse function, but an error raise:
Traceback (most recent call last):
File "/home/maestrutti15/PycharmProjects/prova/main.py", line 13, in <module>
main()
File "/home/maestrutti15/PycharmProjects/prova/main.py", line 9, in main
msg = pynmea2.parse(str(line))
File "/home/maestrutti15/PycharmProjects/prova/venv/lib/python3.8/site-packages/pynmea2/nmea.py", line 115, in parse
raise ChecksumError(
pynmea2.nmea.ChecksumError: ('checksum does not match: 17 != 3B', ['121626.10', 'A', '4608.25657', 'N', '01313.38859', 'E', '0.071', '270421', 'A', 'V'])
Can anyone tell me why this errors appears? I don't understand... if I write
msg = pynmea2.parse("$GNRMC,121626.15, ..)
in this way, it prints the result.
Thank you!

I'm getting an TypeError when trying to use read(\n) in a python script when I'm pulling data from a file

I think I'm missing something super basic here, so proper heckling is okay... I'm using a custom lib here and just trying to get this script to open a file to read out a list formatted like so:
test12.someother
test78.someother
test1014.othersome
test101.someotherother
However, when I do a line-by-line execute, I get this error
Traceback (most recent call last):
File "delete_custom_device.py", line 11, in
deletelist = [x for x in inf.read('\n') if x]
TypeError: argument should be integer or None, not 'str'
import getpass
import customlib
username = getpass.getuser()
password = getpass.getpass()
cdevice = customlib.Session(username, password)
with open('deletefromcustom_test') as inf:
deletelist = [x for x in inf.read() if x]
errorlist = []
for element in deletelist:
try:
custom.deleteDeviceByName(element)
except:
errorlist.append(element)
print('These devices that had issues:')
for x in errorlist:
print(x)
Any suggestions? Thanks in advance!

ValueError with NLTK

Using NLTK, I'm trying to print a line of text if the last word of the line has an "NN" POS tag, but I'm getting: "ValueError: too many values to unpack" on the following code. Any ideas why? Thanks in advance.
import nltk
from nltk.tokenize import word_tokenize
def end_of_line():
filename = raw_input("Please enter a text file.> ")
with open(filename) as f:
for line in f:
linewords = nltk.tokenize.word_tokenize(line)
lw_tagged = nltk.tag.pos_tag(linewords)
last_lw_tagged = lw_tagged.pop()
for (word, tag) in last_lw_tagged:
if tag == "NN":
print line
end_of_line()
Traceback (most recent call last):
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 15, in <module>
end_of_line()
File "/private/var/folders/ly/n5ph6rcx47q8zz_j4pcj3b880000gn/T/Cleanup At Startup/endofline-477697124.590.py", line 11, in end_of_line
for (word, tag) in last_lw_tagged:
ValueError: too many values to unpack
logout
Instead of this:
for (word, tag) in last_lw_tagged:
if tag == "NN":
Do this:
if last_lw_tagged[1] == "NN:

extract tweets from a text file (python)

Sorry, I am just trying to store 'id_str' from each tweet to a new list called ids[]..
but getting the following error:
Traceback (most recent call last):
File "extract_tweet.py", line 17, in
print tweet['id_str']
KeyError: 'id_str'
My code is:
import json
import sys
if __name__ == '__main__':
tweets = []
for line in open (sys.argv[1]):
try:
tweets.append(json.loads(line))
except:
pass
ids = []
for tweet in tweets:
ids.append(tweet['id_str'])
The json data from tweets are sometimes missing fields. Try something like this,
ids = []
for tweet in tweets:
if 'id_str' in tweet:
ids.append(tweet['id_str'])
or equivalently,
ids = [tweet['id_str'] for tweet in tweets if 'id_str' in tweet]
import json
tweets = []
tweets.append(
json.loads('{"a": 1}')
)
tweet = tweets[0]
print(tweet)
print( tweet['id_str'] )
--output:--
{'a': 1}
Traceback (most recent call last):
File "1.py", line 9, in <module>
print( tweet['id_str'] )
KeyError: 'id_str'
And:
my_dict = {u"id_str": 1}
print my_dict["id_str"]
--output:--
1

Categories

Resources