I am trying to make an exe file with -py2exe Here is my code:
import requests
from bs4 import BeautifulSoup
import csv
def get_html(url):
r = requests.get(url)
return r.text
url = 'https://loadtv.info/'
html=get_html(url)
soup=BeautifulSoup(html, 'html.parser')
article = soup.findAll('p')
text=[]
article_text = ''
for element in article:
text.append(element.getText().encode('utf8'))
pages=soup.findAll('a',class_='more-link')
links=[]
for page in pages :
links.append(page.get('href'))
zo=[]
for i in links:
z=get_html(i)
soup=BeautifulSoup(z, 'html.parser')
t=[]
r=soup.findAll('p')
w=[]
tex=[]
for i in r :
w.append(i.getText().encode('utf8'))
for i in range(3,6):
tex.append(w[i])
zo.append(tex)
video=[]
for i in links:
z=get_html(i)
soup=BeautifulSoup(z, 'html.parser')
vid = soup.find("iframe").get('src')
video.append(vid)
da={'links': links,'text' :zo,'video':video}
with open('C:\\2\\123.txt','wb') as f:
writer=csv.writer(f)
for i in da:
for rows in da[i]:
writer.writerow([rows])
I get the following error in log file after compilation:
Traceback (most recent call last):
File "films.py", line 12, in <module>
File "films.py", line 9, in resource_path
NameError: global name 'os' is not defined
Traceback (most recent call last):
File "films.py", line 15, in <module>
File "films.py", line 9, in get_html
File "requests\api.pyc", line 70, in get
File "requests\api.pyc", line 56, in request
File "requests\sessions.pyc", line 488, in request
File "requests\sessions.pyc", line 609, in send
File "requests\adapters.pyc", line 497, in send
requests.exceptions.SSLError: [Errno 2] No such file or directory
I made exe by run in cmd python setup.py py2exe
I have no idea how to fix it. The problem is related to SSL certificate, but I have added it to my setup modules. Please help.
setup.py:
from distutils.core import setup
import py2exe
setup(
windows=[{"script":"films.py"}],
options={"py2exe": {"includes":["bs4","requests","csv"]}},
zipfile=None
)
Related
I have a list (tuple), where is local path and img url. Works fine until now.
On Python 3.11 and Windows 11 still works well (no error at all), but on Ubuntu 20.04 with python 3.11 throwing FileNotFoundError.
I'm downloading more then 30 images, but i will cut off the list here.
I can't figure out what I'm missing?
/media/hdd/img/ directory exists with 755 permission.
from multiprocessing.pool import ThreadPool
import os
import requests
list2 = [('/media/hdd/img/tt24949716.jpg', 'https://www.cdnzone.org/uploads/2023/01/29/Devil-Beneath-2023-Cover.jpg'),
('/media/hdd/img/tt11231356.jpg', 'https://www.cdnzone.org/uploads/2023/01/29/Beneath-the-Green-2022-Cover.jpg'),
('/media/hdd/img/tt13103732.jpg', 'https://www.cdnzone.org/uploads/2023/01/29/The-Offering-2022-Cover.jpg'),
('/media/hdd/img/tt14826022.jpg', 'https://www.cdnzone.org/uploads/2023/01/29/You-People-2023-Cover.jpg'),
('/media/hdd/img/tt18252340.jpg', 'https://www.cdnzone.org/uploads/2023/01/29/Carnifex-2022-Cover.jpg')]
def download_images(list):
results = ThreadPool(8).imap_unordered(fetch_url, list)
for path in results:
print(path)
def fetch_url(entry):
path, uri = entry
if not os.path.exists(path):
r = requests.get(uri, stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r:
f.write(chunk)
return path
>>> download_images(list2)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "<stdin>", line 2, in download_images
File "/usr/lib/python3.11/multiprocessing/pool.py", line 930, in __init__
File "/usr/lib/python3.11/multiprocessing/pool.py", line 196, in __init__
File "/usr/lib/python3.11/multiprocessing/context.py", line 113, in SimpleQueue
File "/usr/lib/python3.11/multiprocessing/queues.py", line 341, in __init__
File "/usr/lib/python3.11/multiprocessing/context.py", line 68, in Lock
File "/usr/lib/python3.11/multiprocessing/synchronize.py", line 162, in __init__
File "/usr/lib/python3.11/multiprocessing/synchronize.py", line 57, in __init__
FileNotFoundError: [Errno 2] No such file or directory
I am trying to convert an HTML web page using wkhtmltopdf API and pdfkit library. But when I am entering the URL of any web page it is showing me this error.
Traceback (most recent call last):
File "c:\\Users\\Fai\\Desktop\\doc\\convert.py", line 13, in \<module\>
pdfkit.from_url("www.imdb.com/chart/top/", "imdb.pdf", configuration=config)
File "C:\\Python\\Python310\\lib\\site-packages\\pdfkit\\api.py", line 27, in from_url
return r.to_pdf(output_path)
File "C:\\Python\\Python310\\lib\\site-packages\\pdfkit\\pdfkit.py", line 201, in to_pdf
File "c:\\Users\\Fai\\Desktop\\doc\\convert.py", line 13, in \<module\>
pdfkit.from_url("www.imdb.com", "imdb.pdf", configuration=config)
File "C:\\Python\\Python310\\lib\\site-packages\\pdfkit\\api.py", line 27, in from_url
return r.to_pdf(output_path)
File "C:\\Python\\Python310\\lib\\site-packages\\pdfkit\\pdfkit.py", line 201, in to_pdf
self.handle_error(exit_code, stderr)
File "C:\\Python\\Python310\\lib\\site-packages\\pdfkit\\pdfkit.py", line 155, in handle_error
raise IOError('wkhtmltopdf reported an error:\\n' + stderr)
OSError: wkhtmltopdf reported an error:
Error: Failed loading page https://www.google.com (sometimes it will work just to ignore this error with --load-error-handling ignore)
Exit with code 1 due to network error: SslHandshakeFailedError
My code:
import os
from docx2pdf import convert
import pdfkit
path_wkhtmltopdf = r'C:\\Program Files\\wkhtmltopdf\\bin\\wkhtmltopdf.exe'
config = pdfkit.configuration(wkhtmltopdf=path_wkhtmltopdf)
PATH = "C:/Users/Fai/Desktop/doc"
os.chdir(PATH)
list_dir = os.listdir()
pdfkit.from_url("www.imdb.com", "imdb.pdf", configuration=config)
I have built a scraper to retrieve concert data from songkick by using their api. However, it takes a lot of time to retrieve all the data from these artists. After scraping for approximately 15 hours the script is still running but the JSON file doesn’t change anymore. I interrupted the script and I checked if I could access my data with TinyDB. Unfortunately I get the following error. Does anybody know why this is happening?
Error:
('cannot fetch url', 'http://api.songkick.com/api/3.0/artists/8689004/gigography.json?apikey=###########&min_date=2015-04-25&max_date=2017-03-01')
8961344
Traceback (most recent call last):
File "C:\Users\rmlj\Dropbox\Data\concerts.py", line 42, in <module>
load_events()
File "C:\Users\rmlj\Dropbox\Data\concerts.py", line 27, in load_events
print(artist)
File "C:\Python27\lib\idlelib\PyShell.py", line 1356, in write
return self.shell.write(s, self.tags)
KeyboardInterrupt
>>> mydat = db.all()
Traceback (most recent call last):
File "<pyshell#0>", line 1, in <module>
mydat = db.all()
File "C:\Python27\lib\site-packages\tinydb\database.py", line 304, in all
return list(itervalues(self._read()))
File "C:\Python27\lib\site-packages\tinydb\database.py", line 277, in _read
return self._storage.read()
File "C:\Python27\lib\site-packages\tinydb\database.py", line 31, in read
raw_data = (self._storage.read() or {})[self._table_name]
File "C:\Python27\lib\site-packages\tinydb\storages.py", line 105, in read
return json.load(self._handle)
File "C:\Python27\lib\json\__init__.py", line 287, in load
return loads(fp.read(),
MemoryError
below you can find my script
import urllib2
import requests
import json
import csv
import codecs
from tinydb import TinyDB, Query
db = TinyDB('events.json')
def load_events():
MIN_DATE = "2015-04-25"
MAX_DATE = "2017-03-01"
API_KEY= "###############"
with open('artistid.txt', 'r') as f:
for a in f:
artist = a.strip()
print(artist)
url_base = 'http://api.songkick.com/api/3.0/artists/{}/gigography.json?apikey={}&min_date={}&max_date={}'
url = url_base.format(artist, API_KEY, MIN_DATE, MAX_DATE)
# url = u'http://api.songkick.com/api/3.0/search/artists.json?query='+artist+'&apikey=WBmvXDarTCEfqq7h'
try:
r = requests.get(url)
resp = r.json()
if(resp['resultsPage']['totalEntries']):
results = resp['resultsPage']['results']['event']
for x in results:
print(x)
db.insert(x)
except:
print('cannot fetch url',url);
load_events()
db.close()
print ("End of script")
MemoryError is a built in Python exception (https://docs.python.org/3.6/library/exceptions.html#MemoryError) so it looks like the process is out of memory and this isn't really related to Songkick.
This question probably has the information you need to debug this: How to debug a MemoryError in Python? Tools for tracking memory use?
I have a simple task but cannot make my code work. I want to loop over the URLs listed in my textfile and download it using wget command in Python. Each URL are placed in separate line in the textfile.
Basically, this is the structure of the list in my textfile:
http://e4ftl01.cr.usgs.gov//MODIS_Composites/MOLT/MOD11C3.005/2000.03.01/MOD11C3.A2000061.005.2007177231646.hdf
http://e4ftl01.cr.usgs.gov//MODIS_Composites/MOLT/MOD11C3.005/2014.12.01/MOD11C3.A2014335.005.2015005235231.hdf
all the URLs are about 178 lines. Then save it in the current working directory.
Below is the initial code that I am working:
import os, fileinput, urllib2 as url, wget
os.chdir("E:/Test/dwnld")
for line in fileinput.FileInput("E:/Test/dwnld/data.txt"):
print line
openurl = wget.download(line)
The error message is:
Traceback (most recent call last): File "E:\Python_scripts\General_purpose\download_url_from_textfile.py", line 5, in <module>
openurl = wget.download(line) File "C:\Python278\lib\site-packages\wget.py", line 297, in download
(fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=prefix, dir=".") File "C:\Python278\lib\tempfile.py", line 308, in mkstemp
return _mkstemp_inner(dir, prefix, suffix, flags) File "C:\Python278\lib\tempfile.py", line 239, in _mkstemp_inner
fd = _os.open(file, flags, 0600) OSError: [Errno 22] Invalid argument: ".\\MOD11C3.A2000061.005.2007177231646.hdf'\n.frbfrp.tmp"
Try to use urllib.urlretrieve. Check the documentation here: https://docs.python.org/2/library/urllib.html#urllib.urlretrieve
start_url=requests.get('http://www.delicious.com/golisoda')
soup=BeautifulSoup(start_url)
this code is displaying the following error:
Traceback (most recent call last):
File "test2_requests.py", line 10, in <module>
soup=BeautifulSoup(start_url)
File "/usr/local/lib/python2.7/dist-packages/bs4/__init__.py", line 169, in __init__
self.builder.prepare_markup(markup, from_encoding))
File "/usr/local/lib/python2.7/dist-packages/bs4/builder/_lxml.py", line 68, in prepare_markup
dammit = UnicodeDammit(markup, try_encodings, is_html=True)
File "/usr/local/lib/python2.7/dist-packages/bs4/dammit.py", line 203, in __init__
self._detectEncoding(markup, is_html)
File "/usr/local/lib/python2.7/dist-packages/bs4/dammit.py", line 373, in _detectEncoding
xml_encoding_match = xml_encoding_re.match(xml_data)
TypeError: expected string or buffer
Use the .content of the response:
start_url = requests.get('http://www.delicious.com/golisoda')
soup = BeautifulSoup(start_url.content)
Alternatively, you can use the decoded unicode text:
start_url = requests.get('http://www.delicious.com/golisoda')
soup = BeautifulSoup(start_url.text)
See the Response content section of the documentation.
you probebly need to Use
using
soup=BeautifulSoup(start_url.read())
or
soup=BeautifulSoup(start_url.text)
from BeautifulSoup import BeautifulSoup
import urllib2
data=urllib2.urlopen('http://www.delicious.com/golisoda').read()
soup=BeautifulSoup(data)