Collecting data from instagram using json - python

I was working on a project that requires data from instagram, i am collecting it using the json.
When i use the test link: http://vocab.nic.in/rest.php/states/json - it works, but when i use: https://www.instagram.com/instagram/?__a=1 (also json) - it didn't work.
UnicodeEncodeError: 'charmap' codec can't encode characters in position 196-197: character maps to
import urllib.request
import json
def getResponse(url):
operUrl = urllib.request.urlopen(url)
if(operUrl.getcode()==200):
data = operUrl.read()
jsonData = json.loads(data)
else:
print("Error receiving data", operUrl.getcode())
return jsonData
def main():
urlData = "https://www.instagram.com/instagram/?__a=1"
jsonData = getResponse(urlData)
# print the state id and state name corresponding
print(jsonData)
if __name__ == '__main__':
main()
Full Traceback:
Traceback (most recent call last):
File "C:\Users\andrey_user\Desktop\ig\test.py", line 21, in [module] main()
File "C:\Users\andrey_user\Desktop\ig\test.py", line 18, in main print(jsonData)
File "D:\programs\programing\Python\lib\encodings\cp1251.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 196-197: character maps to [undefined]

Related

UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-3: character maps to

I'm having error but my output will be save perfectly. How?
this is the output in terminal......
Traceback (most recent call last):
File "C:\Users\Ankit\Desktop\PYTHON Project\Project App Monitoring\test1.py", line 19, in <module>
file.write(item + "\n")
File "C:\Users\Ankit\AppData\Local\Programs\Python\Python310\lib\encodings\cp1252.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode characters in position 0-3: character maps to <undefined>
Note that: my output file that i expect to save is properly fine.
this is the program......
import winreg
# Connecting to the registry key
access_registry = winreg.ConnectRegistry(None,winreg.HKEY_LOCAL_MACHINE)
access_key = winreg.OpenKey(access_registry,r"SOFTWARE\Microsoft\Windows\CurrentVersion\Uninstall")
# Enumerating all values under the key
output = []
for n in range(winreg.QueryInfoKey(access_key)[0-3]):
try:
value = winreg.EnumKey(access_key, n)
output.append(value)
except:
break
# Writing the output to a file
with open("output.txt", "w") as file:
for item in output:
file.write(item + "\n")
# Closing the connection
winreg.CloseKey(access_key)
winreg.CloseKey(access_registry)

json.loads() gives UnicodeEncodeError when parsing JSON object recived from node.js

I am trying to send some json object from my node.js server to a python script. However when trying to convert the json object to dictionary using json.loads, for many inputs there are UnicodeEncodeErrors. WHat do I need to do in order to correctly decode the js object.
Error: UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 2062: character maps to <undefined>
at PythonShell.parseError (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:183:17)
at terminateIfNeeded (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:98:28)
at ChildProcess.<anonymous> (D:\Users\Temp\Desktop\empman\node_modules\python-shell\index.js:88:9)
at emitTwo (events.js:106:13)
at ChildProcess.emit (events.js:191:7)
at Process.ChildProcess._handle.onexit (internal/child_process.js:219:12)
at Process.onexit (D:\Users\Temp\Desktop\empman\node_modules\async-listener\glue.js:188:31)
----- Python Traceback -----
File "word.py", line 38, in <module>
json_data=open('data.txt').read()
File "D:\Users\Temp\AppData\Local\Programs\Python\Python36-32\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
corresponding python code
from docx import Document
from docx.shared import Inches
import sys
import io
import json
document = Document('template.docx')
# newdocument = Document('resume.docx')
# print(sys.argv) # Note the first argument is always the script filename.
resumearray = [];
for x in range(0, 21):
resumearray.append(input())
#json_data=open('data.txt').read()
f = io.open('data','r', encoding='utf-16-le')
# #datastore = json.loads(f.read)
print(f.read())
# text = f.read()
# json_data = text
# document.add_paragraph('_______________________________________________________________________')
#document.add_paragraph(resumearray[1])
k=resumearray[1]
#document.add_paragraph(k)
jsobject = json.loads(k)
document.add_paragraph('_______________________________________________')
#document.add_paragraph(jsobject.values())
for x in range(0, 9):
if resumearray[x]=='[]':
document.add_paragraph('nothing was found')
else:
document.add_paragraph(resumearray[x])
You are running python on Windows, where the default encoding is cp1252. The json is encoded as utf-8, hence the error.
>>> with open('blob.json', encoding='cp1252') as f:
... j = json.load(f)
...
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "/usr/local/lib/python3.6/json/__init__.py", line 296, in load
return loads(fp.read(),
File "/usr/local/lib/python3.6/encodings/cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 2795: character maps to <undefined>
Use utf-8 instead:
>>> with open('blob.json', encoding='utf-8') as f:
... j = json.load(f)
...
>>> print(len(j))
29

python encoding issue, searching tweets

I have written the following code to crawl tweets with 'utf-8' encoding:
kws=[]
f=codecs.open("keywords", encoding='utf-8')
kws = f.readlines()
f.close()
print kws
for kw in kws:
timeline_endpoint ='https://api.twitter.com/1.1/search/tweets.json?q='+kw+'&count=100&lang=fr'
print timeline_endpoint
response, data = client.request(timeline_endpoint)
tweets = json.loads(data)
for tweet in tweets['statuses']:
my_on_data(json.dumps(tweet.encode('utf-8')))
time.sleep(3)
but I am getting the following error:
response, data = client.request(timeline_endpoint)
File "build/bdist.linux-x86_64/egg/oauth2/__init__.py", line 676, in request
File "build/bdist.linux-x86_64/egg/oauth2/__init__.py", line 440, in to_url
File "/usr/lib/python2.7/urllib.py", line 1357, in urlencode
l.append(k + '=' + quote_plus(str(elt)))
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe9' in position 1: ordinal not in range(128)
I would appreciate any help.
Okay, here is the solution using a different search approach:
auth = tweepy.OAuthHandler("k1", "k2")
auth.set_access_token("k3", "k4")
api = tweepy.API(auth)
for kw in kws:
max_tweets = 10
searched_tweets = [status for status in tweepy.Cursor(api.search, q=kw.encode('utf-8')).items(max_tweets)]
for tweet in searched_tweets:
my_on_data(json.dumps(tweet._json))
time.sleep(3)

UnicodeDecodeError in bs4 Python 3.4

I have a part of code
from bs4 import BeautifulSoup
for i in range(1,10):
print(str(i))
soup = BeautifulSoup(open("downloads/" + str(i) + ".html","rt"), 'html.parser')
text1 = soup.find_all("div", class_="content html_format")
text1 = text1[0].get_text()
print(text1)
After executing I get an error:
Traceback (most recent call last):
File "classifier1.py", line 6, in <module>
soup = BeautifulSoup(open("downloads/" + str(i) + ".html","rt"), 'html.parser')
File "C:\Users\K18\Desktop\redictgames\classifier\bs4\__init__.py", line 175, in __init__
markup = markup.read()
File "C:\python34\lib\encodings\cp1251.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x98
in position 11152: character maps to <undefined>
I was tried 'rb' and 'r' modes too, but this doesn't work...
In the files I have articles with Russian words.
Few days ago that was working perfectly

python : unicodeEncodeError: 'charpmap' codec can't encode character '\u2026'

I try to analyse some tweets I got from tweeter, but It seems I have a probleme of encoding, if you have any idea..
import json
#Next we will read the data in into an array that we call tweets.
tweets_data_path = 'C:/Python34/TESTS/twitter_data.txt'
tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
print(len(tweets_data))#412 tweets
print(tweet)
I got the mistake :
File "C:\Python34\lib\encodings\cp850.py", line 19, in encode return codecs.charmap_encode(input,self.errors,encoding_map)[0]
unicodeEncodeError: 'charpmap' codec can't encode character '\u2026' in position 1345: character maps to undefined
At work, I didn't get the error, but I have python 3.3, does it make a difference, do you think ?
-----EDIT
The comment from #MarkRamson answered my question
You should specify the encoding when opening the file:
tweets_file = open(tweets_data_path, "r", encoding="utf-8-sig")

Categories

Resources