How to import CSV files to SQLite3 with python - python

This is my code.
import sqlite3
import pandas
db = sqlite3.connect('testdb.db')
df = pandas.read_csv('testcsv.csv')
df.to_sql('testTable', 'db', if_exists='append', index=False)
I got the last two lines of code from another article on stackoverflow, but it doesn't work for me. This is the error I get, even after I installed sqlalchemy, because it complained that it wasn't installed.
Traceback (most recent call last):
File "C:/Users/pitye/PycharmProjects/gradeCalcV2/venv/sqlite.py", line 7, in <module>
df.to_sql('testTable', 'db', if_exists='append', index=False)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\core\generic.py", line 2663, in to_sql
method=method,
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 503, in to_sql
pandas_sql = pandasSQL_builder(con, schema=schema)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 577, in pandasSQL_builder
con = _engine_builder(con)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\pandas\io\sql.py", line 564, in _engine_builder
con = sqlalchemy.create_engine(con)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\__init__.py", line 479, in create_engine
return strategy.create(*args, **kwargs)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\strategies.py", line 54, in create
u = url.make_url(name_or_url)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\url.py", line 229, in make_url
return _parse_rfc1738_args(name_or_url)
File "C:\Users\pitye\PycharmProjects\gradeCalcV2\venv\lib\site-packages\sqlalchemy\engine\url.py", line 291, in _parse_rfc1738_args
"Could not parse rfc1738 URL from string '%s'" % name
sqlalchemy.exc.ArgumentError: Could not parse rfc1738 URL from string 'db'
I just want to create a table from a CSV file in SQLite. Is this even the right way of doing it, or am I waaay off?

I think you just have to replace
df.to_sql('testTable', 'db', if_exists='append', index=False)
With
df.to_sql('testTable', db, if_exists='append', index=False)

Related

Invalid argument error when using f string in path of DataFrame.to_csv()

I want to write pandas dataframe to a csv file every 10 secs. The csv file name includes the current timestamp. Here is part of the code:
import pandas as pd
import time
while True:
df = pd.read_sql_query('select * from dbo.tbl_tag_values', cnxn)
t = time.localtime()
current_time = time.strftime('%Y-%m-%dT%H:%M:%S',t)
csv_path =f'C:/Users/00_Projects/App/data-{current_time}.csv'
df.to_csv(csv_path)
time.sleep(10)
Without using f-string and a static file name, the script works fine but with the f-string I get the error:
Traceback (most recent call last):
File "c:\Users\00_Projects\App\script.py", line 24, in <module>
df.to_csv(csv_path)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py", line 3466, in to_csv
return DataFrameRenderer(formatter).to_csv(
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\format.py", line 1105, in to_csv
csv_formatter.save()
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\formats\csvs.py", line 237, in save
with get_handle(
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\common.py", line 702, in get_handle
handle = open(
OSError: [Errno 22] Invalid argument: 'C:/Users/00_Projects/App/data-2023-01-02T15:33:19.csv'
I read this post How to use f string in a path location and tried Path from pathlib but got the same error.
My OS is windows.
Thanks for any help!

Writing data into Snowflake table using Python

I am trying to read data from Excel to pandas dataframe and then write the dataframe to Snowflake table. Code as below.
Connection is established and Excel read is working fine but write to snowflake table is not working. Am getting below error . Requesting help to resolve the error
snowflake.connector.errors.MissingDependencyError: Missing optional dependency: pandas Process finished with exit code 1
import pandas as pd
from sqlalchemy import create_engine
from snowflake.sqlalchemy import URL
from snowflake.connector.pandas_tools import pd_writer
url = URL(
account = '',
user = '',
schema = 'TMP',
database = 'TMP',
warehouse= 'DATABRICKS',
role = '',
authenticator='externalbrowser',
)
engine = create_engine(url)
con = engine.connect()
df = pd.read_excel("C:\\Final.xlsx")
df.columns = df.columns.astype(str)
table_name = 'test_connect'
if_exists = 'replace'
df.to_sql(name=table_name.lower(), con=con,index= False, if_exists=if_exists, method=pd_writer)
Detailed Error info below
Traceback (most recent call last):
File "C:\Users\XYZ\AppData\Roaming\JetBrains\DataSpell2022.2\scratches\scratch.py", line 32, in <module>
df.to_sql(name=table_name.lower(), con=con,index= False, if_exists=if_exists, method=pd_writer)
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\pandas\core\generic.py", line 2963, in to_sql
return sql.to_sql(
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\pandas\io\sql.py", line 697, in to_sql
return pandas_sql.to_sql(
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\pandas\io\sql.py", line 1739, in to_sql
total_inserted = sql_engine.insert_records(
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\pandas\io\sql.py", line 1322, in insert_records
return table.insert(chunksize=chunksize, method=method)
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\pandas\io\sql.py", line 950, in insert
num_inserted = exec_insert(conn, keys, chunk_iter)
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\snowflake\connector\pandas_tools.py", line 320, in pd_writer
df = pandas.DataFrame(data_iter, columns=keys)
File "C:\Users\XYZ\AppData\Roaming\Python\Python310\site-packages\snowflake\connector\options.py", line 36, in __getattr__
raise MissingDependencyError(self._dep_name)
snowflake.connector.errors.MissingDependencyError: Missing optional dependency: pandas
Process finished with exit code 1
I believe the following dependency install step has not been completed: https://docs.snowflake.com/en/user-guide/python-connector-pandas.html#installation

Working with json file to convert to a sqlite table format in python

I have data formatted in .json file. The end goal is to reformat the data to sqlite table and store into a database for further analysis.
Here is a sample of the data:
{"_id":{"$oid":"60551"},"barcode":"511111019862","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac114be37ce2ead437550"},"$ref":"Cogs"},"name":"test brand #1612366101024","topBrand":false}
{"_id":{"$oid":"601c5460be37ce2ead43755f"},"barcode":"511111519928","brandCode":"STARBUCKS","category":"Beverages","categoryCode":"BEVERAGES","cpg":{"$id":{"$oid":"5332f5fbe4b03c9a25efd0ba"},"$ref":"Cogs"},"name":"Starbucks","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755d"},"barcode":"511111819905","brandCode":"TEST BRANDCODE #1612366146176","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146176","topBrand":false}
{"_id":{"$oid":"601ac142be37ce2ead43755a"},"barcode":"511111519874","brandCode":"TEST BRANDCODE #1612366146051","category":"Baking","categoryCode":"BAKING","cpg":{"$id":{"$oid":"601ac142be37ce2ead437559"},"$ref":"Cogs"},"name":"test brand #1612366146051","topBrand":false}
Followed by the code:
import pandas as pd
import json
import sqlite3
# Open json file and convert to a list
with open("users.json") as f:
dat = [json.loads(line.strip()) for line in f]
# create a datafrom from json file
df = pd.DataFrame(dat)
#open database connection
con = sqlite3.connect("fetch_rewards.db")
c = con.cursor()
df.to_sql("users", con)
c.close()
The error I am getting:
Traceback (most recent call last):
File "C:\Users\mohammed.alabbas\Desktop\sqlite\import_csv.py", line 16, in <module>
df.to_sql("users", con)
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\core\generic.py", line 2605, in to_sql
sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 589, in to_sql
pandas_sql.to_sql(
File "C:\Users\name\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1828, in to_sql
table.insert(chunksize, method)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 830, in insert
exec_insert(conn, keys, chunk_iter)
File "C:\Users\mname\AppData\Roaming\Python\Python39\site-packages\pandas\io\sql.py", line 1555, in _execute_insert
conn.executemany(self.insert_statement(num_rows=1), data_list)
sqlite3.InterfaceError: Error binding parameter 1 - probably unsupported type.
Thanks in advance

Why can't I access the excel file in python?

I'm trying to use some data that I have in an excel file. However, I'm getting an error saying that it doesn't find the file. I've looked up and the directory and the file name are correct, What am I doing wrong?
Here is the code:
import os
import pandas as pd
print(os.getcwd())
df = pd.read_excel(r'C:/Users/Eder/Desktop/TFG/Data/Interpolation_sample.xlsx',
index_col =0,parse_dates=True, sheet_name='sheet3')
And the answer from the console:
runcell(0, 'C:/Users/Eder/untitled0.py')
C:\Users\Eder\Desktop\TFG\Data
Traceback (most recent call last):
File "C:\Users\Eder\untitled0.py", line 14, in <module>
index_col =0,parse_dates=True, sheet_name='sheet3')
File "E:\Anaconda3\lib\site-packages\pandas\util\_decorators.py", line 299, in wrapper
return func(*args, **kwargs)
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 336, in read_excel
io = ExcelFile(io, storage_options=storage_options, engine=engine)
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 1072, in __init__
content=path_or_buffer, storage_options=storage_options
File "E:\Anaconda3\lib\site-packages\pandas\io\excel\_base.py", line 950, in inspect_excel_format
content_or_path, "rb", storage_options=storage_options, is_text=False
File "E:\Anaconda3\lib\site-packages\pandas\io\common.py", line 651, in get_handle
handle = open(handle, ioargs.mode)
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\Eder\\Desktop\\TFG\\Data\\Interpolation_sample.xlsx'
I've figured out a way to solve the problem. I just changed the name of the file from 'Interpolation_sample' to 'Interpolation sample'. I don't know why, but the underscore in the file name is what was causing this error.

python pandas Access Excel MemoryError

All,
I've been trying to use pandas in python to load a table from access then write the data to an excel file see following code:
When running it the code (python 3.5.2) I receive the following output:
<!-- language: Python -->
import pandas as pd
import pypyodbc
conn = 'DSN=MyDSNTest'
cnxn = pypyodbc.connect(conn)
crsr = cnxn.cursor()
qy = """select * from mytbl;"""
df = pd.read_sql(qy, cnxn)
cnxn.commit()
crsr.close()
cnxn.close()
print ("read into dataframe")
#writer = pd.ExcelWriter('c:/tmp/test.xlsx')
#df.to_excel(writer, 'Data')
df.to_excel('E:/Reports/AnalyticsInput/tblHistoryAC.xlsx', Data',index=False)
# Close the Pandas Excel writer and output the Excel file.
#writer.save()
read into dataframe 199966 Traceback (most recent call last):
File "C:\Users\jeff\test.py", line 23, in
df.to_excel('c:/tmp/MyTest.xlsx', 'Data', index=False) File "C:\Python35-32\lib\site-packages\pandas\core\frame.py", line 1466, in
to_excel
excel_writer.save() File "C:\Python35-32\lib\site-packages\pandas\io\excel.py", line 790, in
save
return self.book.save(self.path) File "C:\Python35-32\lib\site-packages\openpyxl\workbook\workbook.py", line
345, in save
save_workbook(self, filename) File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 266,
in save_workbook
writer.save(filename) File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 248,
in save
self.write_data() File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 81,
in write_data
self._write_worksheets() File "C:\Python35-32\lib\site-packages\openpyxl\writer\excel.py", line 197,
in _write_worksheets
xml = ws._write() File "C:\Python35-32\lib\site-packages\openpyxl\worksheet\worksheet.py",
line 870, in _write
return write_worksheet(self) File "C:\Python35-32\lib\site-packages\openpyxl\writer\worksheet.py", line
107, in write_worksheet
write_rows(xf, ws) MemoryError
While the file is 200,000 rows I'd have to believe there is something else or another way to produce the xlsx file without getting a memory error.
Any ideas? Thanks!
Jeff

Categories

Resources