I keep getting error from crawl data from certain forum - python

I want to get information from a forum, Dcard.
I have also built a function to arrange the data I receive.
import json
import pandas as pd
import requests
from requests_html import HTML
import re
import openpyxl
def Crawl(ID):
link='https://www.dcard.tw/_api/posts/'+str(ID)
requ=requests.get(link)
rejs=requ.json()
return(pd.DataFrame(
data=
[{'ID':rejs['id'],
'title':rejs['title'],
'content':rejs['content'],
'excerpt':rejs['excerpt'],
'createdAt':rejs['createdAt'],
'updatedAt':rejs['updatedAt'],
'commentCount':rejs['commentCount'],
'forumName':rejs['forumName'],
'forumAlias':rejs['forumAlias'],
'gender':rejs['gender'],
'likeCount':rejs['likeCount'],
'reactions':rejs['reactions'],
'topics':rejs['topics']}],
columns=['ID',"title","content","excerpt","createdAt","updateAt","commentCount",
"forumName","forumAlias","gender","likeCount","reactionns","topics"])
)
test3=open('D:/Quinna/爬蟲/test4.txt','w',encoding='UTF-8')
p=requests.Session()
url=requests.get("https://www.dcard.tw/search?query=%E5%88%86%E6%89%8B&forum=relationship")
soup=BeautifulSoup(url.text,"html.parser")
a_tags=soup.find_all('h2')
for tag in a_tags:
a=tag.select_one('a').get('href')[18:27] #get the ID from the API
print(Crawl(a))
However, whenever I tried, the error message still comes up.
Is the json not a valid format? How to amend it?
Traceback (most recent call last):
File "D:\Quinna\爬蟲\test4.py", line 38, in <module>
print(Crawl(a))
File "D:\Quinna\爬蟲\test4.py", line 12, in Crawl
rejs=requ.json()
File "C:\Users\user\AppData\Roaming\Python\Python38\site-packages\requests\models.py", line 900, in json
return complexjson.loads(self.text, **kwargs)
File "C:\Program Files (x86)\Python38-32\lib\json\__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Program Files (x86)\Python38-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Program Files (x86)\Python38-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

This this. You aren't getting the IDs in a proper way.
Replace this line:
a = tag.select_one('a').get('href')[18:27]
to this line:
a = tag.select_one('a').get('href').split('/')[-1]

Related

How fix this error on python (selenium) - json.decoder.JSONDecodeError:

Iniciado!
[WDM] - Downloading: 19.0kB [00:00, 19.5MB/s]
Traceback (most recent call last):
File "main.py", line 58, in
FIREFOX(login)
File "main.py", line 26, in FIREFOX
driver = webdriver.Firefox(executable_path=GeckoDriverManager().install())
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\site-packages\webdriver_manager\firefox.py", line 37, in install
driver_path = self.get_driver_path(self.driver)
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\site-packages\webdriver_manager\core\manager.py", line 26, in get_driver_path
binary_path = self.driver_cache.find_driver(driver)
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\site-packages\webdriver_manager\core\driver_cache.py", line 101, in find_driver
metadata = self.get_metadata()
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\site-packages\webdriver_manager\core\driver_cache.py", line 135, in get_metadata
return json.load(outfile)
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\json_init.py", line 293, in load
return loads(fp.read(),
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\json_init.py", line 357, in loads
return _default_decoder.decode(s)
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\moonl\AppData\Local\Programs\Python\Python38\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
enter image description here
i am trying to start a selenium program but i get this error
Its very unclear, but seems that is libary error. We need see a part of your code that points to json.
See that if you have the:
import json
If you trying to read a txt file as json, try:
with open('/xxxx/xxxxx/xxxx.xxx') as jsonfile:
data = json.load(jsonfile)
Other topic in stackoverflow can help you, please check the awnser in:
Why am I getting the error: "JSONDecodeError: Expecting value: line 1 column 1 (char 0)" after iteration 28?
PS: One sugestion, try put Try and Catch in your code and put in some blocks, this help see where the error is on the code. One example is:
try
if xxx.......
............
Finnaly
print some error

Reading in JSON file in Python

Keep having issue reading in JSON file. I've tried everything from changing cwd to specifying absolute path... but nothing seems to work. Here is the current code:
from dataclasses import dataclass
import json
import os
os.chdir('C:/Users/NEHRU/Desktop/New Project')
cwd = os.getcwd() # Get the current working directory (cwd)
files = os.listdir(cwd) # Get all the files in that directory
print("Files in %r: %s" % (cwd, files))
with open('C:\\Users\\NEHRU\\Desktop\\New Project\\identities.json') as f:
data = json.load(f)
print(data)
And this is the output I keep getting:
PS C:\Users\NEHRU> & C:/Users/NEHRU/AppData/Local/Microsoft/WindowsApps/python3.10.exe "c:/Users/NEHRU/Desktop/New Project/import json.py"
Files in 'C:\\Users\\NEHRU\\Desktop\\New Project': ['collection.json', 'identities.json', 'import json.py', 'Project Instructions']
Traceback (most recent call last):
File "c:\Users\NEHRU\Desktop\New Project\import json.py", line 12, in <module>
data = json.load(f)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.1520.0_x64__qbz5n2kfra8p0\lib\json\__init__.py", line 293, in load
return loads(fp.read(),
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.1520.0_x64__qbz5n2kfra8p0\lib\json\__init__.py", line 346, in loads
return _default_decoder.decode(s)
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.1520.0_x64__qbz5n2kfra8p0\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.10_3.10.1520.0_x64__qbz5n2kfra8p0\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 27 column 3 (char 238)
PS C:\Users\NEHRU>

Rospy JSONDecodeError when loading from file with json.load()

I'm trying to store the data published on a topic to a JSON file, but I keep getting a JSONDecodeError.
DB = '/home/path/data.json'
f = open(DB, 'w+')
json_array = json.load(fp=f)
json_array.append(data)
json.dump(json_array, f)
f.close()
The open() command successfully creates the file. But loading fails. I have tried running it with the file contents: [] and {}, both gave the same Exception:
[ERROR] [1637699609.562673]: bad callback: <function callback at 0x7fe36196b1f0>
Traceback (most recent call last):
File "/opt/ros/noetic/lib/python3/dist-packages/rospy/topics.py", line 750, in _invoke_callback
cb(msg)
File "/home/path/scripts/extract_info_node.py", line 43, in callback
json_array = json.load(fp=f)
File "/usr/lib/python3.8/json/__init__.py", line 293, in load
return loads(fp.read(),
File "/usr/lib/python3.8/json/__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
It's not really descriptive and I have nothing to go on. Is there something about the way subscribers are run that doesn't allow writing? Can you even do this from a callback function? I have just started working in ROS so it could be something simple or obvious to someone more experienced.
.load() takes a .read() supporting file. You're only opening the file for reading. Instead try this:
f = open(DB, 'r+')
json_array = json.load(f)
As another note, if you're storing and re-reading topic data I'd suggest looking potentially using rosbag. This is, however, dependent on your actual application.

How can I open a json file from another folder?

I am trying to make a code in python that imports a JSON file from a folder into my program in order for it to access the data inside. However, I am facing errors
global bees
with open('data/bees.json') as f:
bees = json.load(f)["bees"]
Where in the data/bees.json I have this:
{
"bees": []
}
The error I get
Traceback (most recent call last):
File "d:/Entertainment/Coding/Python/Pygame/BUG WORLD/main.py", line 70, in <module>
bees = json.load(f)
File "C:\Users\ernes\AppData\Local\Programs\Python\Python37-32\lib\json\__init__.py", line 296, in load
parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)
File "C:\Users\ernes\AppData\Local\Programs\Python\Python37-32\lib\json\__init__.py", line 348, in loads
return _default_decoder.decode(s)
File "C:\Users\ernes\AppData\Local\Programs\Python\Python37-32\lib\json\decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Users\ernes\AppData\Local\Programs\Python\Python37-32\lib\json\decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Is there a way I am able to import JSON files from another folder without causing any errors?? Please help
Looks like you're on Windows, so:
with open(r'c:\path\to\file\bees.json') as json_file:
bees_js = json.load(json_file)
The relative path should be open("data/bees.json") without the /, starting a path with / means absolute path from the root.
You might want to try the following:
import json
with open('path_to_file/bees.json') as json_file:
bees_js = json.load(json_file)

Invalid Control Character Error in Python 2.7.9

I am receiving the following error when running the below python script.
ValueError: Invalid control character at: line 7591 column 220620 (char 385678)
I did some research on this and it appeared that it would be resolved by passing 'strict=false' within json.dumps(), but I'm still receiving the same error. This is the only REST service that I have attempted to query that returns this error.
import arcgis
import json
from arcgis import ArcGIS
service = ArcGIS("http://mapping.dekalbcountyga.gov/arcgis/rest/services/LandUse/MapServer")
query = service.get(0, count_only=False)
json_query = json.dumps(query, strict=False)
f = open("dekalb_parcels.geojson", "w")
f.write(json_query)
f.close()
Any help that can be provided would be very appreciated. Thank you.
UPDATE - This is the full error that I am receiving.
Traceback (most recent call last):
File "G:\Python\Scripts\dekalb_parcel_query.py", line 8, in <module>
query = service.get(0, count_only=False)
File "C:\Python27\lib\site-packages\arcgis\arcgis.py", line 146, in get
jsobj = self.get_json(layer, where, fields, count_only, srid)
File "C:\Python27\lib\site-packages\arcgis\arcgis.py", line 90, in get_json
return response.json()
File "C:\Python27\lib\site-packages\requests\models.py", line 802, in json
return json.loads(self.text, **kwargs)
File "C:\Python27\lib\json\__init__.py", line 338, in loads
return _default_decoder.decode(s)
File "C:\Python27\lib\json\decoder.py", line 366, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "C:\Python27\lib\json\decoder.py", line 382, in raw_decode
obj, end = self.scan_once(s, idx)
ValueError: Invalid control character at: line 7591 column 220620 (char 385678)
I was able to fix this issue by passing strict=False within return response.json()

Categories

Resources