openpyxl read out excel and save into database - python

I am trying to read out an excel sheet and save it into my database. This is my excel table:
My code where I am trying to get the data and save it into an data array looks like this (workbook was loaded before with load_workbook):
def getExampleData(workbook):
sheet = workbook.get_sheet_by_name('ExampleData')
titleCell = sheet.cell('D1')
assert titleCell.value = u'Overview'
startRow = (2, 1)
endRow = (6, 1) #this is the first empty row and I have it hardcodened
to this time but it needs to be dynamically found
in the future
data['ExData'] = {}
for i in range(startRow, endRow):
exData = {}
exData['Name'] = sheet.cell(row=I, column=1).value
exData['Param1'] = sheet.cell(row=I, column=2).value
exData['Param2'] = sheet.cell(row=I, column=3).value
exData['Param3'] = sheet.cell(row=I, column=4).value
data['ExData'| = exData
return data['ExData']
and then I want it to load it into my database table named ExampleDB (the whole project is made with Django so I am loading the ExampleDB just with an import) like this:
def saveExampleData():
xData = data['ExData']
ex = ExampleDB.objects.filter(name = xData['Name'], param1 = xData['Param1'],
param2 = xData['Param2'], param3 = xData['Param3])
ex.save()
I just want to say that I KNOW that this functions don't work but I think they show what I want to do. Maybe someone can help me to understand how this works.
I am thankful for any help!

IIUC, here is a solution, using pandas and sqlalchemy:
from sqlalchemy import create_engine
import pandas as pd
db = create_engine('sqlite:///stocks.db')
df = pd.read_excel('excelfile.xlsx')
df.to_sql('required_table', db, if_exists='append')

Related

Saving Edited .xslx Worksheet with openpyxl

I've been trying to edit a .xslx worksheet using python and I have been successfully able to alter the cells value but when I use the save command from openpyxl, close the program, and open the excel spreadsheet, no changes have been saved. I have attached the code below and I would appreciate it if you could help me. I have tried reading other stackoverflow posts but when I try what they suggest it still doesn't work so I've turned to creating my first post here.
def editStock(choice, edit, stockSymbol):
sheet = setup()
stockRow = getStockRow(stockSymbol)
if choice == 7 or choice == 3 or choice == 2 or choice == 1:
print("Before")
print(sheet.cell(row = stockRow, column = choice).value)
sheet.cell(row = stockRow, column = choice).value = edit
print("After")
print(sheet.cell(row = stockRow, column = choice).value)
else:
sheet.cell(row = stockRow, column = choice).value = float(edit)
workbook = getWorkBook()
workbook.save(filename="Stocks.xlsx")
Here's my setup():
def setup():
directory = "C:\\Users\\shrey\\Desktop"
directory = directory.lower()
os.chdir(directory)
spreadname = "Stocks.xlsx"
workbook = openpyxl.load_workbook(spreadname)
sheet = workbook["Sheet1"]
return sheet
Here's my getWorkBook() for reference:
def getWorkBook():
directory = "C:\\Users\\shrey\\Desktop"
directory = directory.lower()
os.chdir(directory)
spreadname = "Stocks.xlsx"
workbook = openpyxl.load_workbook(spreadname)
return workbook
Here's my output when I call editStock():
Before
None
After
Dec-21-2021
And proof that it doesn't work: date is not altered
Sorry, the image is not very clear but the Dec-21-2021 should be right after the 'TSLA'
You should probably actually make a separate test script and share the whole thing, because you probably posted the methods that are working exactly correctly, and people typically will ask for that on stack overflow (I couldn't just grab your code and run it; that should usually be the case)
I wrote this little script to see what the matter was, and for me it worked fine.
But I noticed that there were two sheets called Sheet1. So make sure you are looking at the different sheets (the tabs). Once I figured that out, the data showed up just fine.
This code works when I run it, (including if the file already exists):
import os
import openpyxl
spreadname = "Stocks.xlsx"
sheetname = "THISONE"
if not os.path.exists(spreadname):
workbook = openpyxl.Workbook()
workbook.create_sheet(title=sheetname)
else:
workbook = openpyxl.load_workbook(spreadname)
sheet = workbook[sheetname]
c1 = sheet.cell(row=3, column=6)
c1.value = 123.456
c2 = sheet['B9']
c2.value = 456.321
workbook.save(spreadname)
Specifically, it creates a sheet called "THISONE" and the data is there.

Read and Write multiple excel data into one excel file using openpyxl

I am trying to copy the data from multiple excel into one excel. I am novice to python and openpyxl. So i have opened each file and went row by row and copied them. I want to do this with multiple files. How do i loop through row and columns and copy the data consider the column in all the files are same order?
import openpyxl as xl
from openpyxl import workbook
incident_wb = xl.load_workbook('incident resolved yesterday.xlsx')
incident_sheet = incident_wb['Page 1']
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
combined_wb.save('combined_sheet.xlsx')
for row in range(1, incident_sheet.max_row+1):
incident_no = incident_sheet.cell(row,1)
opened_date = incident_sheet.cell(row,2)
shrt_desc = incident_sheet.cell(row,3)
requester = incident_sheet.cell(row,4)
incdnt_type = incident_sheet.cell(row,5)
priority = incident_sheet.cell(row,6)
assgn_grp = incident_sheet.cell(row,7)
assgn_to = incident_sheet.cell(row,8)
updated = incident_sheet.cell(row,9)
status = incident_sheet.cell(row,10)
sub_status = incident_sheet.cell(row,11)
##copy the data into the new sheet
incident_no_1 = combined_sheet.cell(row,1)
incident_no_1.value = incident_no.value
opened_date_1 = combined_sheet.cell(row,2)
opened_date_1.value = opened_date.value
shrt_desc_1 = combined_sheet.cell(row,3)
shrt_desc_1.value = shrt_desc.value
requester_1 = combined_sheet.cell(row,4)
requester_1.value = requester.value
incdnt_type_1 = combined_sheet.cell(row,5)
incdnt_type_1.value = incdnt_type.value
priority_1 = combined_sheet.cell(row,6)
priority_1.value = priority.value
assgn_grp_1 = combined_sheet.cell(row,7)
assgn_grp_1.value = assgn_grp.value
assgn_to_1 = combined_sheet.cell(row,8)
assgn_to_1.value = assgn_to.value
updated_1 = combined_sheet.cell(row,9)
updated_1.value = updated.value
status_1 = combined_sheet.cell(row,10)
status_1.value = status.value
sub_status_1 = combined_sheet.cell(row,11)
sub_status_1.value = sub_status.value
##print(f"The incident resolved yesterday {incident_no.value}")
combined_wb.save('combined_sheet.xlsx')
An alternative approach would be to build a list of date from multiple excel files and then write it to another file.
As a proof of concept:
import openpyxl as xl
from openpyxl import workbook
def provide_data(workbookName, sheetName):
wb = xl.load_workbook(workbookName)
sheet = wb[sheetName]
return [[y.value for y in x] for x in sheet.iter_rows()]
# This creates an array of rows, which contain an array of cell values.
# It will be much better to provide mapping for cells and return business object.
def save_data(list_of_sheets):
combined_wb = xl.Workbook()
combined_sheet = combined_wb.active
combined_sheet.title = "combined_sheet"
for sheet in list_of_sheets:
for row in sheet:
combined_sheet.append(row) # combining multiple rows.
combined_wb.save('combined_sheet.xlsx')
workSheetsToCopy = [['incident resolved yesterday.xlsx', 'Page 1'], ['other.xlsx', 'Page 1']]
workSheetsToCopy = [provide_data(x[0], x[1]) for x in workSheetsToCopy]
save_data(workSheetsToCopy)

How can i achieve a vlookup excel like functionality in Python

In Billing Roster - SOW.xlsx I have new column data one is named as SOW and other is named SOW Description (Match value for SOW).
And now when i open ACFC_Resource_Allocation.xlsx excel and for an example if select a value in D2 (SOW) cell from the dropdown i should get a matching value into E2 cell after the selection from dropdown.
I only have an idea than a vlookup from Excel like below should solve my case. Not sure how to achieve in python.
=VLOOKUP(D2,'[Billing Roster - SOW.xlsx]SOW List'!$A$1:$B$14,1,FALSE)
Tried below code
from openpyxl import *
from openpyxl.styles import *
import webbrowser
import pandas
from openpyxl.worksheet.datavalidation import DataValidation
# Read all Excels into pandas dataframes
sowexcel = pandas.read_excel('Billing Roster - SOW.xlsx')
#Load the existing Resource Allocation Excel
wb = load_workbook('ACFC_Resource_Allocation.xlsx')
allocationsheet = wb.active
def load():
maxrow = allocationsheet.max_row
sow_list = sowexcel['SOW #'].tolist()
column_sow = ','.join(sow_list)
validator_sow = DataValidation(type='list', formula1='"{}"'.format(column_sow), allow_blank=True)
allocationsheet.add_data_validation(validator_sow)
validator_sow.add('D2:D%s' %maxrow)
# save the file
wb.save('ACFC_Resource_Allocation.xlsx')
wb.close()
# Driver code
if __name__ == "__main__":
load()
file_open = webbrowser.open('ACFC_Resource_Allocation.xlsx')

Pandas Not Reading Excel Properly

I am trying to use and Add-In for Excel that gets removed when I use win32com.client forcing me to restart my computer. I have found a work around using xlrd, openpyxl, and pandas but I have run into a completely new issue.
I first open Excel with the pandas and read through the file extracting the information that I require.
xl = pandas.ExcelFile(xlsx)
sheets = xl.sheet_names
df = xl.parse(sheets[2])
I then have to go into the same workbook and update the Meter Name and the date.
for i, value in enumerate(dataList):
wb = openpyxl.load_workbook(xlsx)
worksheets = wb.sheetnames
worksheet = wb.get_sheet_by_name(worksheets[0])
rowCoordinate = i
meterName = value[0]
creationDate = value[1]
units = value[2]
worksheet.cell(row=1, column=2).value = meterName
wb.save(copyXlsx)
dateList = []
for k, dateRange in enumerate(value[3]):
sDate = dateRange[0]
eDate = dateRange[1]
wb = openpyxl.load_workbook(copyXlsx)
worksheets = wb.sheetnames
worksheet = wb.get_sheet_by_name(worksheets[0])
worksheet.cell(row=2, column=2).value = sDate
worksheet.cell(row=3, column=2).value = eDate
wb.save(copyXlsx1)
print meterName, dateRange
xl1 = pandas.ExcelFile(copyXlsx1)
sheets = xl1.sheet_names
df = xl.parse(sheets[0])
print df
My issue is that the excel file opens and write the information perfectly. but pandas has all the header information updated but the numbers are the same from the original document. I have gone in and explored the Intermediate Excel Document and it doesn't match the number pandas shows

Why is it so much slower to export my data to .xlsx than to .xls or .csv?

I have a dataframe that I'm exporting to Excel, and people want it in .xlsx. I use to_excel, but when I change the extension from .xls to .xlsx, the exporting step takes about 9 seconds as opposed to 1 second. Exporting to a .csv is even faster, which I believe is due to the fact that it's just a specially formatted text file.
Perhaps the .xlsx files just added a lot more features so it takes longer to write to them, but I'm hoping there is something I can do to prevent this.
Pandas defaults to using OpenPyXL for writing xlsx files which can be slower than than the xlwt module used for writing xls files.
Try it instead with XlsxWriter as the xlsx output engine:
df.to_excel('file.xlsx', sheet_name='Sheet1', engine='xlsxwriter')
It should be as fast as the xls engine.
As per different Python to Excel modules benchmark, pyexcelerate has better performance.
Below code used to take sqlite tables data into xlsx file datasheets. table is not stored in xlsx file unless raw size is less than 1000000 raws. In that case info is stored in csv file.
def passfile(datb, tables):
"""copy to xlsx or csv files tables from query results"""
import sqlite3
import pandas as pd
import timeit
import csv
from pyexcelerate import Workbook
from pathlib import Path
from datetime import date
dat_dir = Path("C:/XML")
db_path = dat_dir / datb
start_time = timeit.default_timer()
conn = sqlite3.connect(db_path) # database connection
c = conn.cursor()
today = date.today()
tablist = []
with open(tables, 'r') as csv_file: # tables to be collected file
csv_reader = csv.DictReader(csv_file)
for line in csv_reader:
tablist.append(line['table']) #column header
xls_file = "Param" + today.strftime("%y%m%d") + ".xlsx"
xls_path = dat_dir / xls_file # xls file path-name
csv_path = dat_dir / "csv" # csv path to store big data
wb = Workbook() # excelerator file init
for line in tablist:
try:
df = pd.read_sql_query("select * from " + line + ";", conn) # pandas dataframe from sqlite
if len(df) > 1000000: # excel not supported
print('save to csv')
csv_loc = line + today.strftime("%y%m%d") + '.csv.gz' # compressed csv file name
df.to_csv(csv_path / csv_loc, compression='gzip')
else:
data = [df.columns.tolist()] + df.values.tolist()
data = [[index] + row for index, row in zip(df.index, data)]
wb.new_sheet(line, data=data)
except sqlite3.Error as error: # sqlite error handling
print('SQLite error: %s' % (' '.join(error.args)))
print("saving workbook")
wb.save(xls_path)
end_time = timeit.default_timer()
delta = round(end_time - start_time, 2)
print("Took " + str(delta) + " secs")
c.close()
conn.close()
passfile("20200522_sqlite.db", "tablesSQL.csv")

Categories

Resources