How to find total number of rows using XLWT or XLRD in Python? I have an excel file(accounts.xls) and would like to append rows in it.
I am getting an error here - AttributeError: 'Sheet' object has no attribute 'write'
from xlrd import open_workbook
from xlwt import Workbook
def saveWorkSpace(fields,r):
wb = open_workbook('accounts.xls')
ws = wb.sheet_by_index(0)
r = ws.nrows
r += 1
wb = Workbook()
ws.write(r,0,fields['name'])
ws.write(r,1,fields['phone'])
ws.write(r,2,fields['email'])
wb.save('accounts.xls')
print 'Wrote accounts.xls'
Here is the solution of the above question
import xlrd
import xlwt
from xlutils.copy import copy
def saveWorkSpace(fields):
rb = xlrd.open_workbook('accounts.xls',formatting_info=True)
r_sheet = rb.sheet_by_index(0)
r = r_sheet.nrows
wb = copy(rb)
sheet = wb.get_sheet(0)
sheet.write(r,0,fields['name'])
sheet.write(r,1,fields['phone'])
sheet.write(r,2,fields['email'])
wb.save('accounts.xls')
print 'Wrote accounts.xls'
Python Program to add Values to the last data row an Excel sheet.
from xlwt import Workbook
from xlrd import open_workbook
import openpyxl
# Function to get the last RowCount in the Excel sheet , change the index of the sheet accordingly to get desired sheet.
def getDataColumn():
#define the variables
rowCount=0
columnNumber=0
wb = open_workbook('C:\\Temp\\exp\\data.xlsx')
ws = wb.sheet_by_index(0)
rowCount = ws.nrows
rowCount+=1
columnNumber=1
print(rowCount)
writedata(rowCount,columnNumber)
#Data to specified cells.
def writedata(rowNumber,columnNumber):
book = openpyxl.load_workbook('C:\\Temp\\exp\\data.xlsx')
sheet = book.get_sheet_by_name('Sheet1')
sheet.cell(row=rowNumber, column=columnNumber).value = 'Appended Data'
book.save('C:\\Temp\\exp\\data.xlsx')
print('saved')
getDataColumn()
exit()
Related
I have the next code:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define sheet1
#define max values for columns
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
max_row_for_b = max((b.row for b in ws['B'] if b.value is not None))
#LOOP
for row in ws.iter_rows(min_row=2, max_col=1, max_row=max_row_for_a):
pdf= row[0].value
print (pdf)
for row2 in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_b, min_col=2):
extracto = row2[0].value
pdfselect=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselect)
pagina = leer.getPage(0)
extracto = pagina.extractText()
print(extracto)
wb.save("PRUEBA_PDF2.xlsx")
wb.close
The idea is read the excel's column A with the pdf's names and write in the column B the pdf extracts, but when i execute the code don't show anything, even dont show error. I try with a minimal case like this and i dont have problems:
#pdfselect=open("ejemplo.pdf","rb")
#leer = PyPDF2.PdfFileReader(pdfselect)
#pagina = leer.getPage(0)
#sheet = wb.active
#ws['B2'] = pagina.extractText()
#wb.save("PRUEBA_PDF2.xlsx")
#wb.close
what i'm doing wrong?
Greetings!
I found the problem. The correct code it must like this:
import PyPDF2
import openpyxl
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.worksheet.cell_range import CellRange
#Definir Archivo Excel
file = 'C:\\Users\\lorrego\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx'
wb = openpyxl.load_workbook(file)#define workook
ws = wb['Hoja1']#define la hoja 1
max_row_for_a = max((a.row for a in ws['A'] if a.value is not None))
for row in ws.iter_rows(min_row=2, max_col=2, max_row=max_row_for_a):
pdf= row[0].value
pdfselected=open(pdf,"rb")
leer = PyPDF2.PdfFileReader(pdfselected)
pagina = leer.getPage(0)
ws.cell(row=row[0].row, column=2).value = pagina.extractText()
wb.save("C:\\Users\\Desktop\\PYTHON_PDF\\PRUEBA_PDF.xlsx")
wb.close
I am wanting to copy and paste data from a csv to an excel so I can later filter that table. I have done all these steps in VBA but I've noticed that VBA can be buggy so am wanting to migrate to Python.
I have converted the csv to an excel and I have successfully copied the converted xlsx file to the excel document.
My question is, how do I copy and paste to a specific starting column. As I have other data I need to copy at cell AN1.
I have tried the below.. I am able to write to one specific cell but I am wanting to post the data...
for row in ws1:
for cell in row:
ws2['K1'].value
#ws2[cell.coordinate].value = cell.value
wb2.save(path2)
Entirety...
## csv to xlsx
from openpyxl import Workbook
import csv
wb = Workbook()
ws = wb.active
with open('C:/B.csv', 'r') as f:
for row in csv.reader(f):
ws.append(row)
wb.save('C:/B.xlsx')
###### COPY FROM B to existing E workbook
import openpyxl as xl
path1 = 'C:/B.xlsx'
path2 = 'C:/E.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
#ws2 = wb2.create_sheet(ws1.title)
#cell.value = ['A2']
for row in ws1:
for cell in row:
ws2.cell(row=1, column=1).value = cell.value
wb2.save(path2)
Copying columns between two different workbooks using openpyxl could be done as follows:
import openpyxl
wb1 = openpyxl.load_workbook('B.xlsx')
ws1 = wb1.active
wb2 = openpyxl.load_workbook('E.xlsx')
ws2 = wb2.active
for src, dst in zip(ws1['B:B'], ws2['AN:AN']):
dst.value = src.value
wb2.save('E.xlsx')
For a range of columns, the following would work:
import openpyxl
wb1 = openpyxl.load_workbook('B.xlsx')
ws1 = wb1.active
wb2 = openpyxl.load_workbook('E.xlsx')
ws2 = wb2.active
for src, dst in zip(ws1['A:I'], ws2['AN:AV']):
for cell_src, cell_dst in zip(src, dst):
cell_dst.value = cell_src.value
wb2.save('E.xlsx')
for row in range(1, ws1.max_row + 1):
#for cell in row:
ws1.column_dimensions.group('A', 'D', hidden=True)
sheet.cell(row=i + 2, column=k + 1).value = val
wb2.save(path2)
Should do it
Unfortunately the solutions provide were very much unacceptable as they did not work. VBA is also off the table. I am using openpyxl and the above created an error. Ideally I would like to copy to a new column, but that is beyond my skill. Instead use the below and use excel formulas to get the data where you want. I will have to spend about 4 hours redesigning my excel but worth it I suppose as I am unable to find a workaround.
## csv to xlsx
from openpyxl import Workbook
import csv
wb = Workbook()
ws = wb.active
with open('C/B.csv', 'r') as f:
for row in csv.reader(f):
ws.append(row)
wb.save('C:/B.xlsx')
###### COPY FROM B to existing E workbook
import openpyxl as xl
path1 = 'C:/B.xlsx'
path2 = 'C:/E.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
#ws2 = wb2.create_sheet(ws1.title)
#cell.value = ['A2']
for row in ws1:
for cell in row:
ws2[cell.coordinate].value = cell.value
wb2.save(path2)
The code to actually write each file runs great. The problem I'm having is that the data validation piece doesn't appear to be doing anything. No drop downs are being created in the range I'm referencing.
Thanks in advance for any and all assistance!
%%time
import pandas as pd
import xlsxwriter as ew
import csv as csv
import os
import glob
import openpyxl
#remove existing files from directory
files = glob.glob(#filename)
for f in files:
os.remove(f)
pendpath = #filename
df = pd.read_sas(pendpath)
allusers = df.UserID_NB.unique()
listuserpath = #filename
listusers = pd.read_csv(listuserpath)
listusers = listusers['USER_ID'].apply(lambda x: str(x).strip())
for id in listusers:
x = df.loc[df['UserID_NB']==id]
path = #filename
x.to_excel(path, sheet_name = str(id), index = False)
from openpyxl import load_workbook
wb = openpyxl.load_workbook(filename = path)
sheet = wb.get_sheet_by_name(str(id))
maxrow = sheet.max_row
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
rangevar = 'R1:T'+ str(maxrow)
dv.ranges.append(rangevar)
wb.save(path)
print str(id), rangevar
Code for Basic Sheet
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
sheet.title = 'testsheet'
path = '#filepath'
from openpyxl.worksheet.datavalidation import DataValidation
dv = DataValidation(type="list", formula1='"Yes,No"', allow_blank=False, showDropDown = True)
dv.ranges.append('A1')
wb.save(path)
You are missing to add the dv to the worksheet.
>>> # Add the data-validation object to the worksheet
>>> ws.add_data_validation(dv)
Read the docs about validation
I am trying to read in multiple excel files and append the data from each file into one master file. Each file will have the same headers (So I can skip the import of the first row after the initial file).
I am pretty new to both Python and the OpenPyXL module. I am able to import the first workbook without problem. My problem comes in when I need to open the subsequent file and copy the data to paste into the original worksheet.
Here is my code so far:
# Creating blank workbook
from openpyxl import Workbook
wb = Workbook()
# grab active worksheet
ws = wb.active
# Read in excel data
from openpyxl import load_workbook
wb = load_workbook('first_file.xlsx') #explicitly loading workbook, will automate later
# grab active worksheet in current workbook
ws = wb.active
#get max columns and rows
sheet = wb.get_sheet_by_name('Sheet1')
print ("Rows: ", sheet.max_row) # for debugging purposes
print ("Columns: ", sheet.max_column) # for debugging purposes
last_data_point = ws.cell(row = sheet.max_row, column = sheet.max_column).coordinate
print ("Last data point in current worksheet:", last_data_point) #for debugging purposes
#import next file and add to master
append_point = ws.cell(row = sheet.max_row + 1, column = 1).coordinate
print ("Start new data at:", append_point)
wb = load_workbook('second_file.xlsx')
sheet2 = wb.get_sheet_by_name('Sheet1')
start = ws.cell(coordinate='A2').coordinate
print("New data start: ", start)
end = ws.cell(row = sheet2.max_row, column = sheet2.max_column).coordinate
print ("New data end: ", end)
# write a value to selected cell
#sheet[append_point] = 311
#print (ws.cell(append_point).value)
#save file
wb.save('master_file.xlsx')
Thanks!
I don't really understand your code. It looks too complicated. When copying between worksheets you probably want to use ws.rows.
wb1 = load_workbook('master.xlsx')
ws2 = wb1.active
for f in files:
wb2 = load_workbook(f)
ws2 = wb2['Sheet1']
for row in ws2.rows[1:]:
ws1.append((cell.value for cell in row))
I just started working with openpyxl a couple of days ago and its a great library. However, the documentation seems to be sparse for advanced features. I have a couple of issues.
openpyxl seems to change the formula that I insert to a lower case which results in an unknown reference from excel.
furthermore, i changed the name of the sheet to accomidate the lowercase and still found a #NAME? error in the cell where the reference was at.
Can someone please show me how or where to find out how to reference a cell from another sheet in openpyxl
import openpyxl.Workbook
wb = Workbook()
ws = wb.get_active_sheet()
#shows up lowercase with name error in excel
ws.cell('A1).value = "$'Sheet'.E7 + 123"
#still shows a name error in excel
ws.cell('A2').value = "$'sheet'.E7 + 123"
Try this:
from openpyxl import Workbook
wb = Workbook()
ws = wb.create_sheet()
ws.title ='NewSheet'
ws.cell('E7').value = 7
ws = wb.create_sheet()
ws.cell('A1').value = "=NewSheet!E7 + 123"
wb.save( filename = 'temp2.xlsx' )
from openpyxl import Workbook, utils
wb = Workbook()
ws = wb.create_sheet()
ws.title ='NewSheet'
ws.cell('E7').value = 7
ws = wb.create_sheet()
ws.cell('A1').value = f"={utils.quote_sheetname(ws.title)}!E7 + 123"
wb.save( filename = 'temp2.xlsx' )
The problem with the previous answer is that it's dependant on the title of the sheet being 'NewSheet'. Using quote_sheetname()controls that.