I'm trying to take recipe hyperlinks from a pre-existing file and add them to a table in a database. I have already created the database and given the file name to set up a connection and insert the data into the table but whenever I do so I get the error sqlite3.OperationalError: unable to open database file.
Here's my code:
import bs4, os, requests, time
import sqlite3
from flask import current_app, g
def create_connection(db_file):
db = sqlite3.connect(db_file)
c = db.cursor()
return db
def get_html():
'''
Get the BBC Food sitemap and save it to a local file.
'''
page = None
db = create_connection("pantry/instance/flaskr.sqlite")
for attempt in range(1, 4):
print("line 40")
page = requests.get('http://www.bbc.co.uk/food/sitemap.xml')
try:
page.raise_for_status()
break
except requests.RequestException:
time.sleep(attempt * 10)
if not page:
raise Exception('Failed to get sitemap.xml')
sitemap = bs4.BeautifulSoup(page.text, 'html.parser')
# Write the recipe urls to a text file
print("line 53")
for line in sitemap.find_all('loc'):
for string in line.stripped_strings:
if string.startswith('https://www.bbc.co.uk/food/recipes/'):
print("line 57")
recipeUrl = string
if (
db.execute("SELECT recipeID FROM recipe WHERE weblink = ?", (recipeUrl,)).fetchone()
is not None
):
error = "recipe weblink {0} is already inputted.".format(recipeUrl)
if error is None:
db.execute(
'INSERT INTO recipe (weblink) VALUES (?)',
recipeUrl
)
db.commit()
db.close()
And the error message:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "C:\Program Files\JetBrains\PyCharm 2019.2.2\helpers\pydev\_pydev_bundle\pydev_umd.py", line 197, in runfile
pydev_imports.execfile(filename, global_vars, local_vars) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2019.2.2\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/Eva Morris/PycharmProjects/pantry/flaskr/BBCscraper/scraperecipes.py", line 161, in <module>
get_html()
File "C:/Users/Eva Morris/PycharmProjects/pantry/flaskr/BBCscraper/scraperecipes.py", line 34, in get_html
db = create_connection("pantry/instance/flaskr.sqlite")
File "C:/Users/Eva Morris/PycharmProjects/pantry/flaskr/BBCscraper/scraperecipes.py", line 23, in create_connection
db = sqlite3.connect(db_file)
sqlite3.OperationalError: unable to open database file
Based on the consoles error report, your .py file is located in: C:/Users/Eva Morris/PycharmProjects/pantry/flaskr/BBCscraper/scraperecipes.py".
In your code you set your databases directory using the relative path as:pantry/instance/flaskr.sqlite.
This means python is looking for the directory: C:/Users/Eva Morris/PycharmProjects/pantry/flaskr/BBCscraper/pantry/instance/ to create/link your database file in.
If that is not a preexisting directory, sqlite3.connect will not be able to create your database flaskr.sqlite.
You may need to update the code db = create_connection("pantry/instance/flaskr.sqlite") to something like db = create_connection("flaskr.sqlite") which should work without any problem since it will just create the database in your working directory.
Related
I have data formatted in .json file. The end goal is to reformat the data to sqlite table and store into a database for further analysis.
Here is a sample of the data:
{"_id":{"$oid":"60551"},"barcode":"511111019862","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac114be37ce2ead437550"},"$ref":"Cogs"},"name":"test brand #1612366101024","topBrand":false}
{"_id":{"$oid":"601c5460be37ce2ead43755f"},"barcode":"511111519928","brandCode":"STARBUCKS","category":"Beverages","categoryCode":"BEVERAGES","cpg":{"$id":{"$oid":"5332f5fbe4b03c9a25efd0ba"},"$ref":"Cogs"},"name":"Starbucks","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755d"},"barcode":"511111819905","brandCode":"TEST BRANDCODE #1612366146176","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146176","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755a"},"barcode":"511111519874","brandCode":"TEST BRANDCODE #1612366146051","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146051","topBrand":false}
Followed by the code:
import pandas as pd
import json
import sqlite3
# Open json file and convert to a list
with open("users.json") as f:
dat = [json.loads(line.strip()) for line in f]
# create a datafrom from json file
df = pd.DataFrame(dat)
#open database connection
con = sqlite3.connect("fetch_rewards.db")
c = con.cursor()
df.to_sql("users", con)
c.close()
The error I am getting:
Traceback (most recent call last):
File "C:\Users\mohammed.alabbas\Desktop\sqlite\import_csv.py", line 16, in <module>
df.to_sql("users", con)
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 2605, in to_sql
sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 589, in to_sql
pandas_sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1828, in to_sql
table.insert(chunksize, method)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 830, in insert
exec_insert(conn, keys, chunk_iter)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1555, in _execute_insert
conn.executemany(self.insert_statement(num_rows=1), data_list)
sqlite3.InterfaceError: Error binding parameter 1 - probably unsupported type.
Thanks in advance
I use pymongo to connect to my databases on a mongodb server. I set everything up and used a simple tutorial to start with basic things in pymongo. I ended up writting this into a python file:
from pymongo import MongoClient
from random import randint
client = MongoClient("localhost", 27017) #Class from PyMongo module
db = client["rothe_plana"]
# Initialize database settings for employers and events collections:
employersCollect = db["employers"]
eventsCollect = db["events"]
#-----------------------------------------------------
#Employer database managment:
#-----------------------------------------------------
#Inserts passed dictionary objects of employer profiles:
def insertNewEmployer(new_employer_profile):
while True:
try:
readyProfile = new_employer_profile.copy()
readyProfile['employer_id'] = randint(100, 999)
employersCollect.insert_one()
except pymongo.errors.DuplicateKeyError:
continue
break
def getListOfEmployerIDs():
pass #get employer ids to identify and render template elements.
# -----------------------------------------------------
# Events database managment:
# -----------------------------------------------------
#Inserts passed dictionary objects of event data:
def insertNewEvent(new_event_data):
while True:
try:
readyEventData = new_employer_profile.copy()
readyEventData['event_id'] = randint(10000000, 99999999)
employersCollect.insert_one()
except pymongo.errors.DuplicateKeyError:
continue
break
But if I run this I get an exception:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm 2018.1.4\helpers\pydev\pydevd.py", line 1664, in <module>
main()
File "C:\Program Files\JetBrains\PyCharm 2018.1.4\helpers\pydev\pydevd.py", line 1658, in main
globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm 2018.1.4\helpers\pydev\pydevd.py", line 1068, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Program Files\JetBrains\PyCharm 2018.1.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/thoma/OneDrive/Projects_For_The_Web/Fliesen Rothe/PlanA/Pyramid_PlanA/pyramid_plana/datadbhandler.py", line 1, in <module>
from pymongo import MongoClient
File "C:\Users\thoma\OneDrive\Projects_For_The_Web\Fliesen Rothe\PlanA\Pyramid_PlanA\venv\lib\site-packages\pymongo\__init__.py", line 77, in <module>
from pymongo.collection import ReturnDocument
File "C:\Users\thoma\OneDrive\Projects_For_The_Web\Fliesen Rothe\PlanA\Pyramid_PlanA\venv\lib\site-packages\pymongo\collection.py", line 29, in <module>
from pymongo import (common,
File "C:\Users\thoma\OneDrive\Projects_For_The_Web\Fliesen Rothe\PlanA\Pyramid_PlanA\venv\lib\site-packages\pymongo\message.py", line 654, in <module>
_op_msg_uncompressed = _cmessage._op_msg
AttributeError: module 'pymongo._cmessage' has no attribute '_op_msg'
Since I certainly did not touch the Pymongo module code, I am doing something wrong in my code above. Also the web didn't bring up any results so is there a clear explanation for this?
EDIT: I had a closer look into the files that were provided by the above error. And I can see that the attribute in the specified class actually do exist. So that is quite strange. Even if I comment the dependent line out of pymongo, there is another AtrributeError for the same class.
I finally resolved the problem. It turned out that the permissions in my filesystem were not handled right.
I originally installed PyMongo via PyCharm (pip install pymongo). But this just does not work (no idea why) but I finally uninstalled pymongo from the virtual environment and installed it manually again via PowerShell in the virtual environment:
python -m pip install pymongo
Restarting PyCharm and running the project did bring up no errors anymore. Hope this may help others with this problem
I am trying to use functions with mysql and python and I am getting errors:
I am reading a file cnn.cvs and I want to insert in a table noticias but I have errors in the code. Here I share the code:
import csv
import MySQLdb
mydb = MySQLdb.connect(host='localhost',
user='root',
passwd='password',
db='cnn')
cursor = mydb.cursor()
f = open('cnn.csv', 'r')
csv_data = csv.reader(f)
for row in csv_data:
cursor.execute('INSERT INTO noticias(title, \
link, pubDate )' \
'VALUES("%s", "%s", "%s")',
row)
#close the connection to the database.
mydb.commit()
cursor.close()
print ("Done")
and when i execute this is the result:
C:\Users\SoriyAntony\AppData\Local\Programs\Python\Python36-32\python.exe
"C:\Program Files\JetBrains\PyCharm Community Edition
2017.1.4\helpers\pydev\pydevd.py" --multiproc --qt-support --client 127.0.0.1 --port 59726 --file C:/Users/SoriyAntony/PycharmProjects/cnnbd/cnnbd
pydev debugger: process 10368 is connecting
Connected to pydev debugger (build 171.4694.38)
Traceback (most recent call last):
File "C:\Users\SoriyAntony\AppData\Local\Programs\Python\Python36-
32\lib\site-packages\MySQLdb\cursors.py", line 238, in execute
query = query % args
TypeError: not enough arguments for format string
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Program Files\JetBrains\PyCharm Community Edition
2017.1.4\helpers\pydev\pydevd.py", line 1591, in <module>
globals = debugger.run(setup['file'], None, None, is_module)
File "C:\Program Files\JetBrains\PyCharm Community Edition
2017.1.4\helpers\pydev\pydevd.py", line 1018, in run
pydev_imports.execfile(file, globals, locals) # execute the script
File "C:\Program Files\JetBrains\PyCharm Community Edition
2017.1.4\helpers\pydev\_pydev_imps\_pydev_execfile.py", line 18, in execfile
exec(compile(contents+"\n", file, 'exec'), glob, loc)
File "C:/Users/SoriyAntony/PycharmProjects/cnnbd/cnnbd", line 17, in
<module>
row)
File "C:\Users\SoriyAntony\AppData\Local\Programs\Python\Python36-
32\lib\site-packages\MySQLdb\cursors.py", line 240, in execute
self.errorhandler(self, ProgrammingError, str(m))
File "C:\Users\SoriyAntony\AppData\Local\Programs\Python\Python36-
32\lib\site-
packages\MySQLdb\connections.py", line 52, in defaulterrorhandler
raise errorclass(errorvalue)
_mysql_exceptions.ProgrammingError: not enough arguments for format string
Process finished with exit code 1
I am using python 3.6, any idea of the ?
UPDATE:
This issue is resolved:
I use
cursor.executemany()
for this and its work.
The problem is that each row read from the csv file is returned as a list of strings for csv.reader, you have to convert the list row to tuple, change your code to:
cursor.execute('INSERT INTO noticias(title, \
link, pubDate )' \
'VALUES("%s", "%s", "%s")',
tuple(row))
Update:
Make sure there is table noticias in your mysql db, for example:
create table noticias (title Varchar(255),link varchar(255),pubDate varchar(255))
your cnn.csv file contains empty line, when read it, there will be empty list, you have to drop it, change your for loop like this:
for row in csv_data:
if len(row)!=0:
cursor.execute('INSERT INTO noticias(title, link, pubDate ) VALUES("%s", "%s", "%s")',tuple(row))
I have built a scraper to retrieve concert data from songkick by using their api. However, it takes a lot of time to retrieve all the data from these artists. After scraping for approximately 15 hours the script is still running but the JSON file doesn’t change anymore. I interrupted the script and I checked if I could access my data with TinyDB. Unfortunately I get the following error. Does anybody know why this is happening?
Error:
('cannot fetch url', 'http://api.songkick.com/api/3.0/artists/8689004/gigography.json?apikey=###########&min_date=2015-04-25&max_date=2017-03-01')
8961344
Traceback (most recent call last):
File "C:\Users\rmlj\Dropbox\Data\concerts.py", line 42, in <module>
load_events()
File "C:\Users\rmlj\Dropbox\Data\concerts.py", line 27, in load_events
print(artist)
File "C:\Python27\lib\idlelib\PyShell.py", line 1356, in write
return self.shell.write(s, self.tags)
KeyboardInterrupt
>>> mydat = db.all()
Traceback (most recent call last):
File "<pyshell#0>", line 1, in <module>
mydat = db.all()
File "C:\Python27\lib\site-packages\tinydb\database.py", line 304, in all
return list(itervalues(self._read()))
File "C:\Python27\lib\site-packages\tinydb\database.py", line 277, in _read
return self._storage.read()
File "C:\Python27\lib\site-packages\tinydb\database.py", line 31, in read
raw_data = (self._storage.read() or {})[self._table_name]
File "C:\Python27\lib\site-packages\tinydb\storages.py", line 105, in read
return json.load(self._handle)
File "C:\Python27\lib\json\__init__.py", line 287, in load
return loads(fp.read(),
MemoryError
below you can find my script
import urllib2
import requests
import json
import csv
import codecs
from tinydb import TinyDB, Query
db = TinyDB('events.json')
def load_events():
MIN_DATE = "2015-04-25"
MAX_DATE = "2017-03-01"
API_KEY= "###############"
with open('artistid.txt', 'r') as f:
for a in f:
artist = a.strip()
print(artist)
url_base = 'http://api.songkick.com/api/3.0/artists/{}/gigography.json?apikey={}&min_date={}&max_date={}'
url = url_base.format(artist, API_KEY, MIN_DATE, MAX_DATE)
# url = u'http://api.songkick.com/api/3.0/search/artists.json?query='+artist+'&apikey=WBmvXDarTCEfqq7h'
try:
r = requests.get(url)
resp = r.json()
if(resp['resultsPage']['totalEntries']):
results = resp['resultsPage']['results']['event']
for x in results:
print(x)
db.insert(x)
except:
print('cannot fetch url',url);
load_events()
db.close()
print ("End of script")
MemoryError is a built in Python exception (https://docs.python.org/3.6/library/exceptions.html#MemoryError) so it looks like the process is out of memory and this isn't really related to Songkick.
This question probably has the information you need to debug this: How to debug a MemoryError in Python? Tools for tracking memory use?
I have data in a text file which I need to upload into a table. My script in python 3 and uses mysql.connector (https://launchpad.net/myconnpy) to connect to DB and execute commands. I have been able successfully use mysql.connector in past without any problems but I am facing problem in using the command that uploads file to a table. My code is as follows:
def TableUpload(con2):
cur = con2.cursor()##Connect to destination server with table
res_file = 'extend2'
cur.execute("TRUNCATE TABLE data.results")## Clear table before writing
cur.execute("LOAD DATA LOCAL INFILE './extend2' INTO TABLE data.results FIELDS TERMINATED BY ','")
The code clears the table and than try to upload data from text file to table. It successfully clears the table but generated following error while filling table:
Traceback (most recent call last):
File "cl3.py", line 575, in <module>
TableUpload(con2)
File "cl3.py", line 547, in TableUpload
cur.execute("LOAD DATA LOCAL INFILE './extend2' INTO TABLE kakrana_data.mir_page_results FIELDS TERMINATED BY ','")
File "/usr/local/lib/python3.2/site-packages/mysql/connector/cursor.py", line 333, in execute
res = self.db().protocol.cmd_query(stmt)
File "/usr/local/lib/python3.2/site-packages/mysql/connector/protocol.py", line 137, in deco
return func(*args, **kwargs)
File "/usr/local/lib/python3.2/site-packages/mysql/connector/protocol.py", line 495, in cmd_query
return self.handle_cmd_result(self.conn.recv())
File "/usr/local/lib/python3.2/site-packages/mysql/connector/connection.py", line 180, in recv_plain
errors.raise_error(buf)
File "/usr/local/lib/python3.2/site-packages/mysql/connector/errors.py", line 84, in raise_error
raise get_mysql_exception(errno,errmsg)
mysql.connector.errors.NotSupportedError: 1148: The used command is not allowed with this MySQL version
When I use the command for uploading file directly from terminal than it works well. It is just that command is not working from script. The error says that command is not allowed with mysql version though it works from terminal. Please suggest what mistake I am making or alternative way to achieve data upload to a table from local file.