I am trying to take a dictionary in python and place it into an excel worksheet where the keys are displayed in the header section of the sheet and the values are in to columns. I am close I am just missing something small and cannot figure it out here is my code. Caution I use way to many imports
import os
import re
import openpyxl
from openpyxl.utils import get_column_letter, column_index_from_string
import xlsxwriter
import pprint
from openpyxl.workbook import Workbook
from openpyxl.worksheet.copier import WorksheetCopy
workbook = xlsxwriter.Workbook('dicExcel.xlsx')
worksheet = workbook.add_worksheet()
d = {'a':['Alpha','Bata','Gamma'], 'b':['1','2','3'], 'c':['1.0','2.0','3.0']}
row = 0
col = 1
for key in d.keys():
row += 1
worksheet.write(row, col, key)
for item in d[key]:
worksheet.write(row, col + 1, item)
row += 1
workbook.close()
I think this is what you are trying to do:
import xlsxwriter
workbook = xlsxwriter.Workbook('dicExcel.xlsx')
worksheet = workbook.add_worksheet()
d = {'a':['Alpha','Bata','Gamma'], 'b':['1','2','3'], 'c':['1.0','2.0','3.0']}
row = 0
col = 0
for key in d.keys():
row = 0
worksheet.write(row, col, key)
row += 1
for item in d[key]:
worksheet.write(row, col, item)
row += 1
col += 1
workbook.close()
This puts the data in this format:
a c b
Alpha 1.0 1
Bata 2.0 2
Gamma 3.0 3
Is this what you wanted?
One option is to use the pandas package, you won't need too many imports.
import pandas as pd
d = {'a':['Alpha','Bata','Gamma'], 'b':['1','2','3'], 'c':['1.0','2.0','3.0']}
df = pd.DataFrame(d)
df will look like this:
a b c
0 Alpha 1 1.0
1 Bata 2 2.0
2 Gamma 3 3.0
To write the DataFrame back in to an Excel file:
df.to_excel("Path to write Excel File + File Name")
Related
i have a an excel table that contains :
ID product
03/1/2021
16/1/2022
12/2/2022
14/3/2023
A
4
1
2
5
B
6
1
3
C
7
6
and in the same sheet I have a drop down list that contains(the year , and the month)
if i select in the drop down list for example year = 2020 and month= 1,
it will be return something like this:
ID product
03/1/2021
A
4
B
6
C
and then it will calculate the som of the cells : som = 10 in this case
here is my code :
# import load_workbook
import pandas as pd
import numpy as np
from openpyxl import load_workbook
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl import Workbook
from openpyxl.styles import PatternFill
# set file path
filepath= r'test.xlsx'
wb=load_workbook(filepath)
ws=wb["sheet1"]
#Generates 10 year in the Column MK;
for number in range(1,10):
ws['MK{}'.format(number)].value= "202{}".format(number)
data_val = DataValidation(type="list",formula1='=MK1:MK10')
ws.add_data_validation(data_val)
# drop down list with all the values from the column MK
data_val.add(ws["E2"])
#Generates the numbers of month in the Column MN;
for numbers in range(1,12):
ws['MN{}'.format(numbers)].value= "{}".format(numbers)
data_vals = DataValidation(type="list",formula1='=MN1:MN14')
ws.add_data_validation(data_vals)
# drop down list with all the values from the sheet list column MK
data_vals.add(ws["E3"])
# add a color to the cell 'year' and 'month'
ws['E2'].fill = PatternFill(start_color='FFFFFF00', end_color='FFFFFF00', fill_type = 'solid')
ws['E3'].fill = PatternFill(start_color='FFFFFF00', end_color='FFFFFF00', fill_type = 'solid')
# save workbook
wb.save(filepath)
Any suggestions?
thank you for your help.
Assuming your excel file looks like below:
Final Code looks like below:
import xlrd
file = r'C:\path\test_exl.xlsx'
sheetname='Sheet1'
n=2
df = pd.read_excel(file,skiprows=[*range(2)],index_col=[0])
workbook = xlrd.open_workbook(file)
worksheet = workbook.sheet_by_name(sheetname)
year = worksheet.cell(0,0).value
month = worksheet.cell(1,0).value
datetime_cols= pd.to_datetime(df.columns,dayfirst=True,errors='coerce')
out = (df.loc[:,(datetime_cols.year == year) & (datetime_cols.month == month)]
.reset_index())
print(out)
ID Product 03-01-2021
0 A 4.0
1 B 6.0
2 C NaN
Breakdown:
you can first read the table in pandas using pd.read_excel:
file = r'C:\path\test_exl.xlsx'
sheetname='Sheet1'
n=2 #change n to how many lines to skip to read the table.
#In the above image my dataframe starts at line 3 onwards so I put n=2
df = pd.read_excel(file,skiprows=[*range(n)],index_col=[0])
Then access the cell values using xlrd:
import xlrd
workbook = xlrd.open_workbook(file)
worksheet = workbook.sheet_by_name(sheetname)
year = worksheet.cell(0,0).value #A1 is 0,0
month = worksheet.cell(1,0).value #A2 is 1,0 and so on..
#print(year,month) gives 2021 and 1
then convert columns to datetime and filter:
datetime_cols= pd.to_datetime(df.columns,dayfirst=True,errors='coerce')
out = (df.loc[:,(datetime_cols.year == year) & (datetime_cols.month == month)]
.reset_index())
I am trying to parse a word (.docx) for tables, then copy these tables over to excel using xlsxwriter.
This is my code:
from docx.api import Document
import xlsxwriter
document = Document('/Users/xxx/Documents/xxx/Clauses Sample - Copy v1 - for merge.docx')
tables = document.tables
wb = xlsxwriter.Workbook('C:/Users/xxx/Documents/xxx/test clause retrieval.xlsx')
Sheet1 = wb.add_worksheet("Compliance")
index_row = 0
print(len(tables))
for table in document.tables:
data = []
keys = None
for i, row in enumerate(table.rows):
text = (cell.text for cell in row.cells)
if i == 0:
keys = tuple(text)
continue
row_data = dict(zip(keys, text))
data.append(row_data)
#print (data)
#big_data.append(data)
Sheet1.write(index_row,0, str(row_data))
index_row = index_row + 1
print(row_data)
wb.close()
This is my desired output:
However, here is my actual output:
I am aware that my current output produces a list of string instead.
Is there anyway that I can get my desired output using xlsxwriter? Any help is greatly appreciated
I would go using pandas package, instead of xlsxwriter, as follows:
from docx.api import Document
import pandas as pd
document = Document("D:/tmp/test.docx")
tables = document.tables
df = pd.DataFrame()
for table in document.tables:
for row in table.rows:
text = [cell.text for cell in row.cells]
df = df.append([text], ignore_index=True)
df.columns = ["Column1", "Column2"]
df.to_excel("D:/tmp/test.xlsx")
print df
Which outputs the following that is inserted in the excel:
>>>
Column1 Column2
0 Hello TEST
1 Est Ting
2 Gg ff
This is the portion of my code update that allowed me to get the output I want:
for row in block.rows:
for x, cell in enumerate(row.cells):
print(cell.text)
Sheet1.write(index_row, x, cell.text)
index_row += 1
Output:
for key, value in fetched.keys():
ValueError: too many values to unpack
Here is the program
import xml.etree.cElementTree as etree
import xlsxwriter
import pprint
from csv import DictWriter
xmlDoc = open('C:/Users/Talha/Documents/abc.xml', 'r')
xmlDocData = xmlDoc.read()
xmlDocTree = etree.XML(xmlDocData)
sections = ['Srno','State','Statecd','District','IssuedOn','Day','normal_rainfall','normal_temp_max','normal_temp_min']
fetched = dict()
for sec in sections:
fetched[sec] = []
for item in xmlDocTree.iter( sec ):
fetched[sec].append( item.text )
#print fetched['State']
workbook = xlsxwriter.Workbook('data.xlsx')
worksheet = workbook.add_worksheet()
row = 0
col = 0
for key, value in fetched.keys():
worksheet.write(row, col, key)
worksheet.write(row, col + 1, value)
row += 1
workbook.close()
fetched dict contain data like this
fetched = {'Srno': ['1','2','3'....], 'State':['dads','dadada'.....],'District':['dadad','gdgdfg'......]}
Use .items() while traversing the dictionary with key and value at the same time.
Use
for key, value in fetched.items():
worksheet.write(row, col, key)
worksheet.write(row, col + 1, ','.join(value))
row += 1
Change keys to items.
You are trying to write a list to a cell. Convert the list to a string.
Use the following concept.
l = ['1','2','3']
print ','.join(l)
Convert the list to a string using a join with a separator.
Hello all…a question in using Panda to combine Excel spreadsheets.
The problem is that, sequence of columns are lost when they are combined. If there are more files to combine, the format will be even worse.
If gives an error message, if the number of files are big.
ValueError: column index (256) not an int in range(256)
What I am using is below:
import pandas as pd
df = pd.DataFrame()
for f in ['c:\\1635.xls', 'c:\\1644.xls']:
data = pd.read_excel(f, 'Sheet1')
data.index = [os.path.basename(f)] * len(data)
df = df.append(data)
df.to_excel('c:\\CB.xls')
The original files and combined look like:
what's the best way to combine great amount of such similar Excel files?
thanks.
I usually use xlrd and xlwt:
#!/usr/bin/env python
# encoding: utf-8
import xlwt
import xlrd
import os
current_file = xlwt.Workbook()
write_table = current_file.add_sheet('sheet1', cell_overwrite_ok=True)
key_list = [u'City', u'Country', u'Received Date', u'Shipping Date', u'Weight', u'1635']
for title_index, text in enumerate(key_list):
write_table.write(0, title_index, text)
file_list = ['1635.xlsx', '1644.xlsx']
i = 1
for name in file_list:
data = xlrd.open_workbook(name)
table = data.sheets()[0]
nrows = table.nrows
for row in range(nrows):
if row == 0:
continue
for index, context in enumerate(table.row_values(row)):
write_table.write(i, index, context)
i += 1
current_file.save(os.getcwd() + '/result.xls')
Instead of data.index = [os.path.basename(f)] * len(data) you should use df.reset_index().
For example:
1.xlsx:
a b
1 1
2 2
3 3
2.xlsx:
a b
4 4
5 5
6 6
code:
df = pd.DataFrame()
for f in [r"C:\Users\Adi\Desktop\1.xlsx", r"C:\Users\Adi\Desktop\2.xlsx"]:
data = pd.read_excel(f, 'Sheet1')
df = df.append(data)
df.reset_index(inplace=True, drop=True)
df.to_excel('c:\\CB.xls')
cb.xls:
a b
0 1 1
1 2 2
2 3 3
3 4 4
4 5 5
5 6 6
If you don't want the dataframe's index to be in the output file, you can use df.to_excel('c:\\CB.xls', index=False).
I have a series of tables I would like to write to the same worksheet. The only other post similar to this is here. I also looked here but didn't see a solution.
I was hoping for a similar situation to SAS ODS Output codes that send proc freq results to an excel file. My thought was turning the table results into a new data frame and then stacking the output results to a worksheet.
pd.value_counts(df['name'])
df.groupby('name').aggregate({'Id': lambda x: x.unique()})
If I know the number of rows corresponding to the table, I should ideally know the appropriate range of cells to write to.
I am using:
import xlsxwriter
workbook = xlsxwriter.Workbook('demo.xlsx')
worksheet = workbook.add_worksheet()
tableone = pd.value_counts(df['name'])
tabletwo = df.groupby('name').aggregate({'Id': lambda x: x.unique()})
worksheet.write('B2:C15', tableone)
worksheet.write('D2:E15', tabletwo)
workbook.close()
EDIT: Include view of tableone
TableOne:
Name | Freq
A 5
B 1
C 6
D 11
import xlsxwriter
workbook = xlsxwriter.Workbook('demo.xlsx')
worksheet = workbook.add_worksheet()
tableone = pd.value_counts(df['name'])
tabletwo = df.groupby('name').aggregate({'Id': lambda x: x.unique()})
col = 1, row = 1 #This is cell b2
for value in tableone:
if col == 16:
row += 1
col = 1
worksheet.write(row,col, value)
col += 1
col = 3, row = 1 #This is cell d2
for value in tabletwo:
if col == 16:
row += 1
col = 1
worksheet.write(row,col,value)
col += 1