Well I've a row which i want to copy complete.
I dont know how i can do that correctly in python.
i try to copy the code from another stackoverflow page. But if i test the code i get errors
my own code is this.
# Copy input to output
output_table = input_table.copy()
# conda install -c anaconda openpyxl
import os
import openpyxl
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from copy import copy
from copy import deepcopy
from openpyxl.styles import Font, colors, Color, PatternFill, Border, Side, Alignment, Protection
def move_cell(source_cell, dest_row, dest_col, preserve_original=False):
#param source_cell: cell to be moved to new coordinates
#param dest_row: 1-indexed destination row
#param dest_col: 1-indexed destination column
#param preserve_original: if True, does a copy instead of a move
if preserve_original:
cell_to_move = copy.copy(source_cell)
else:
cell_to_move = source_cell
worksheet = cell_to_move.parent
source_address = (cell_to_move.row, cell_to_move.col_idx)
dest_address = (dest_row, dest_col)
cell_to_move.row = dest_row
cell_to_move.col_idx = dest_col
worksheet._cells[dest_address] = cell_to_move
if not preserve_original:
del worksheet._cells[source_address]
return cell_to_move
var_data_path = "C:/"
#var_path_excel_file = var_data_path + os.path.sep + "data.xlsx"
wb = load_workbook(var_path_excel_file)
ws = wb["test"]
# activate the ws 'data'
for s in range(len(wb.sheetnames)):
if wb.sheetnames[s] == ws:
break
wb.active = s
mr = ws.max_row
mc = ws.max_column
# copying the cell values from source
# excel file to destination excel file
for j in range (1, mc + 1):
c = ws.cell(row = 10, column = j)
#ws.cells is my idea. But it don't copy the style and dont manipulate the formulas. i.e. =D10 to =D12 . Dependent from the row. Move cell function is the function which i copied from another great user. But i get errors for that. i test this all with the row 10. I want to copy the row 10 to 15. My error is function' object has no attribute 'copy' Traceback (most recent call #last): File "<string>", line 85, in <module>
# File "<string>", line 19, in move_cell
#AttributeError: 'function' object has no attribute 'copy'
ws.cell(15, column = j).value = c.value
move_cell(c, 15, j, preserve_original=True)
#ws[15] = ws[10]
wb.save(var_path_excel_file)
In openpyxl, how to move or copy a cell range with formatting, merged cells, formulas and hyperlinks
Related
I need to copy cells like hyperlinks from one Excel file to another. I can't find anything relating to this problem. I can copy cells values but it's not what I need.
I tried to modify some examples of coping cells from one book to another, but it wasn't success
To copy the value in each cell from a source workbook (in this example 'foo1.xlsx') to a new workbook (destination workbook) and have the destination cells link back to the source cells
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.hyperlink import Hyperlink
source_path = "foo1.xlsx"
source_sheet = 'Sheet1'
source_wb = load_workbook(source_path)
source_ws = source_wb[source_sheet]
### Create a new workbook and worksheet to copy data to and rename the
### sheet to 'Sheet1'
destination_wb = Workbook()
destination_ws = destination_wb.active
destination_ws.title = 'Sheet1'
### Loop thru the rows and cells in the source sheet
for row in source_ws.iter_rows():
for source_cell in row:
cell_coord = source_cell.coordinate
# Skipping empty cells.
# Otherwise these cells in the destination workbook will be
# filled with the source filename.
if source_cell.value is None:
continue
### Create hyperlink to source cell
hyperlink = Hyperlink(target=source_path,
ref=cell_coord,
location = f'{source_sheet}!{cell_coord}')
### Copy source cell value to the destination sheet
destination_ws.cell(source_cell.row, source_cell.column).value = source_cell.value
### Update destination cell with hyperlink to source cell
destination_ws.cell(source_cell.row, source_cell.column).hyperlink = hyperlink
### Save new workbook specifying file name
destination_wb.save('foo2.xlsx')
###################################################
Change code to put full path to cell...
Instead of adding origin cell and hyperlink to it, set the cell value to the link path. Change the 8 lines from and including
### Create hyperlink to source cell
to
### Set full path to the original cell
destination_ws.cell(source_cell.row, source_cell.column).value = \
f'{source_path}#{source_sheet}!{cell_coord}'
moken's solution is more convenient and reliable.
This is the code to store hyperlink to a cell from a different file:
import os
from openpyxl import load_workbook, Workbook
def main():
input_workbook_path = r"c:\excel_books\input book.xlsx"
output_workbook_path = r"c:\excel_books\output book.xlsx"
input_wb = load_workbook(input_workbook_path)
output_wb = Workbook()
sheet_in = input_wb["Sheet1"]
sheet_out = output_wb["Sheet"]
cell_index = "B12"
anchor = "CLICK HERE"
# =HYPERLINK("[c:\excel_books\input book.xlsx]Sheet1!B12","CLICK HERE")
external_cell_link = f'=HYPERLINK("[{input_workbook_path}]{sheet_in.title}!{cell_index}", "{anchor}")'
sheet_out["A2"].value = external_cell_link
output_wb.save(output_workbook_path)
if __name__ == '__main__':
main()
This code is for getting the value from a cell from a different file
import os
from openpyxl import load_workbook, utils, Workbook
def construct_link(workbook_absolute_path, sheet_name, cell_index):
"""
The function onstructs full path to the cell in the external
book, e.g. - ='c:\excel_books\[input book.xlsx]Sheet1'!C1
"""
# Adding square brackets arround filename in the path.
# Before - c:\excel_books\input book.xlsx
# After - c:\excel_books\[input book.xlsx]
filename = os.path.basename(workbook_absolute_path)
dirname = os.path.dirname(workbook_absolute_path)
full_path = os.path.join(dirname, f"[{filename}]")
return f"={utils.quote_sheetname(full_path + sheet_name)}!{cell_index}"
def main():
input_workbook_path = r"c:\excel_books\input book.xlsx"
output_workbook_path = r"c:\excel_books\output book.xlsx"
input_wb = load_workbook(input_workbook_path)
output_wb = Workbook()
sheet_in = input_wb["Sheet1"]
sheet_out = output_wb["Sheet"]
external_cell_link = construct_link(
input_workbook_path,
sheet_in.title,
"C1")
sheet_out["A2"].value = external_cell_link
output_wb.save(output_workbook_path)
if __name__ == '__main__':
main()
This link might be helpful - Control when external references (links) are updated
I need to open an xlsx document and color it. But I don't understand why it shows cell error it. My algorithm works like this:
Open xlsx, writer = pd.ExcelWriter(path, engine='xlsxwriter')
worksheet = writer.sheets['Sheet1']
col_style = Font(name = "Reem Kufi", size = 12, color = "DB3B22", italic =True)
for i in range(2,40):
worksheet.cell(row = i, column = 3).font = col_style
Error:- 'Worksheet' object has no attribute 'cell'
you will need to use Openpyxl's loadworkbook instead of ExcelWriter to achieve what you are looking for. Updated code here. Note that I have only changed the initial open file and sheet using the new code and required libraries and not changed rest of your code.
from openpyxl.styles import Font
from openpyxl import load_workbook
writer = load_workbook(filename='YourFile.xlsx')
worksheet = writer['Sheet1']
col_style = Font(name = "Reem Kufi", size = 12, color = "DB3B22", italic =True)
for i in range(2,40):
worksheet.cell(row = i, column = 3).font = col_style
writer.save('YourFile.xlsx')
First of all, I am new to python (practically I have learned only from Sololearn, that too only up to half course). So I request you to give me a little bit detailed answer.
My task has following broad steps:-
Delete old .xlsx file(if any)
Convert two .xls files into .xlsx file using win32, delete the first row and then delete .xls file [weird xls files already downloaded into source directory + xlrd,pyexcel show error (unsupported format or corrupt) file in opening .xls file (online analysis of file predicts it to be html/htm) ]
Get data from xlsx file
First, delete old worksheet on google spreadsheet to remove old data. Create a new worksheet with the same name. Insert data into new worksheet on the google spreadsheet.
Open second sheet(which imports data from the first sheet) and update one cell in Dummy Sheet to make sure google spreadsheet is synchronised in the background.
Now, I wrote a code by combining many codes and by using a lot of google.
The code is working fine but it takes on an avg about 65 seconds to complete the whole process.
My question has 3 parts:-
Is there any way to directly access data from .xls file?
Is there any way this code's performance can be improved.
Any other more efficient method for completing the above-said task?
My Code:-
import time
import win32com.client as win32
import os
import openpyxl
from openpyxl.utils import get_column_letter
import gspread
from oauth2client.service_account import ServiceAccountCredentials
start = time.time()
# set input-output file locations
source_dir = "C:\\Users\\XYZ\\Downloads"
output_dir = "C:\\Users\\XYZ\\Excels"
# use creds to create a client to interact with the Google Drive API
# make sure to share files with email contained in json file
scope = ['https://spreadsheets.google.com/feeds']
# code will not work without json file
creds = ServiceAccountCredentials.from_json_keyfile_name("C:\\Users\\XYZ\\your.json", scope)
gc = gspread.authorize(creds)
# following code is to open any spreadsheet by name
sh = gc.open("First Sheet")
def save_as_xlsx(input_file,output_dir,output_file_name) :
# call excel using win32, then open .xls file
# delete first row and then save as .xlsx
excel = win32.gencache.EnsureDispatch('Excel.Application')
wb = excel.Workbooks.Open(input_file)
wbk = excel.ActiveWorkbook
sheet = wbk.Sheets(1)
sheet.Rows(1).Delete()
wb.SaveAs(output_dir + '\\' + output_file_name, FileFormat = 51)
#FileFormat = 51 is for .xlsx extension. FileFormat = 56 is for .xls extension
wb.Close()
excel.Application.Quit()
return True
def get_the_data_from_xlsx(output_dir,output_file_name) :
# use openpyxl.load to find out last cell of file
# store cell values in list called data
wb = openpyxl.load_workbook(output_dir + '\\' + output_file_name)
sheet = wb.active
max_row_no = sheet.max_row
max_column_no = sheet.max_column
max_column = get_column_letter(max_column_no)
last_cell = str(max_column) + str(max_row_no)
cell_addresses = sheet['A1' : last_cell]
data = []
for i in cell_addresses :
for e in i :
data.append(e.value)
return (data,last_cell)
def insert_data_into_spreadsheet(name_of_worksheet,data,last_cell) :
# Find a workbook by name in already opened spreadsheet
# delete the worksheet to clear old data
# create worksheet with same name to maintain import connections in sheets.
worksheet = sh.worksheet(name_of_worksheet)
sh.del_worksheet(worksheet)
worksheet = sh.add_worksheet(title=name_of_worksheet, rows="500", cols="30")
# store range of cells for spreadsheet in list named cell_list
cell_list = worksheet.range('A1' + ':' + str(last_cell))
# attach all the values from data list as per the cell_list
a = 0
for cell in cell_list :
cell.value = data[a]
a = a + 1
# update all cells stored in cell_list in one go
worksheet.update_cells(cell_list)
def delete_file(directory,file_initials) :
for filename in os.listdir(directory) :
if filename.startswith(file_initials) :
os.unlink(directory +"\\" + filename)
# check if files are in source_dir
for filename in os.listdir(source_dir) :
# check for file1.xls and set input_file name if any file exists.
if filename.startswith("file1"):
input_file = source_dir + "\\file1.xls"
output_file1 = "output_file1.xlsx"
# detect and delete any old file in output directory
delete_file(output_dir,"output_file1")
if save_as_xlsx(input_file,output_dir,output_file1) == True :
# delete the file from source directory after work is done
delete_file(source_dir,'file1')
# get data from new xlsx file
data_from_xlsx = get_the_data_from_xlsx(output_dir,output_file1)
data_to_spreadsheet = data_from_xlsx[0]
last_cell = data_from_xlsx[1]
# insert updated data into spreadsheet
insert_data_into_spreadsheet("file1_data",data_to_spreadsheet,last_cell)
# repeat the same process for 2nd file
if filename.startswith('file2'):
input_file = source_dir + "\\file2.xls"
output_file2 = "output_file2.xlsx"
delete_file(output_dir,"output_file2")
if save_as_xlsx(input_file,output_dir,output_file2) == True :
delete_file(source_dir,'file2')
data_from_xlsx = get_the_data_from_xlsx(output_dir,output_file2)
data_to_spreadsheet = data_from_xlsx[0]
last_cell = data_from_xlsx[1]
insert_data_into_spreadsheet("file2_data",data_to_spreadsheet,last_cell)
# open spreadsheet by name and open Dummy worksheet
# update one cell to sync the sheet with other sheets
sh = gc.open("second sheet")
worksheet = sh.worksheet("Dummy")
worksheet.update_acell('B1', '=Today()')
end = time.time()
print(end-start)
I have a .xlsx file with 11 worksheets and I need to insert the contents of a text file (tab delim, roughly 30 columns with 100 rows) from Row 3 onwards. I tried the code below but I end up with errors. (using bash/Linux)
#!/usr/bin/env python
import csv
from openpyxl.reader.excel import load_workbook
from xlrd import open_workbook
from xlutils import copy as xl_copy
with open('S12_final.txt') as tab_file: #open tab text file
tab_reader = csv.reader(tab_file, delimiter='\t')
xls_readable_book = load_workbook('S12.xlsx') #load workbook
xls_writeable_book = xl_copy.copy(xls_readable_book)
xls_writeable_sheet = xls_writeable_book.get_sheet_by_name('Filtered') #write data on this sheet
for row_index, row in enumerate(tab_reader):
xls_writeable_sheet.write(row_index, 0, row[0])
xls_writeable_sheet.write(row_index, 1, row[1])
xls_writeable_book.save('S12.xlsx') #save excel file
Errors:
> Traceback (most recent call last): File "./tab2excel_a.py", line 23,
> in <module>
> xls_writeable_book = xl_copy.copy(xls_readable_book) File "/usr/local/lib/python2.7/dist-packages/xlutils-1.6.0-py2.7.egg/xlutils/copy.py",
> line 19, in copy
> w File "/usr/local/lib/python2.7/dist-packages/xlutils-1.6.0-py2.7.egg/xlutils/filter.py",
> line 937, in process
> reader(chain[0]) File "/usr/local/lib/python2.7/dist-packages/xlutils-1.6.0-py2.7.egg/xlutils/filter.py",
> line 61, in __call__
> filter.workbook(workbook,filename) File "/usr/local/lib/python2.7/dist-packages/xlutils-1.6.0-py2.7.egg/xlutils/filter.py",
> line 287, in workbook
> self.wtbook.dates_1904 = rdbook.datemode AttributeError: 'Workbook' object has no attribute 'datemode'
Any suggestions ?
It seems (and by no means I'm knowledgeable about those particular libraries) that you are attempting to input an openpyxl object into an xlutils method in these lines:
xls_readable_book = load_workbook('S12.xlsx') #load workbook
xls_writeable_book = xl_copy.copy(xls_readable_book)
So the interpreter complains that this "unknown" object has no attribute datemode. As so try using xlrd open_workbook method since it seems to return a Book object which, according to the documentation, is fully compatible with xlrd.copy methods:
xls_readable_book = open_workbook('S12.xlsx') #load workbook
xls_writeable_book = xl_copy.copy(xls_readable_book)
Consider this openpyxl example:
from openpyxl.workbook.workbook import Workbook # as _Workbook
import csv
wb = Workbook()
wb.create_sheet('Filtered')
ws = wb['Filtered']
with open('test/S12_final.csv') as tab_file:
tab_reader = csv.reader(tab_file, delimiter='\t')
# Skipt first 2 Lines
[next(tab_reader) for skip in range(2)]
# Append list(rowData) after Sheets last accessed Row
for rowData in tab_reader:
ws.append(rowData)
wb.save('test/S12.xlsx')
Tested with Python: 3.4.2 - openpyxl: 2.4.1 - LibreOffice: 4.3.3.2
I am trying to read in multiple excel files and append the data from each file into one master file. Each file will have the same headers (So I can skip the import of the first row after the initial file).
I am pretty new to both Python and the OpenPyXL module. I am able to import the first workbook without problem. My problem comes in when I need to open the subsequent file and copy the data to paste into the original worksheet.
Here is my code so far:
# Creating blank workbook
from openpyxl import Workbook
wb = Workbook()
# grab active worksheet
ws = wb.active
# Read in excel data
from openpyxl import load_workbook
wb = load_workbook('first_file.xlsx') #explicitly loading workbook, will automate later
# grab active worksheet in current workbook
ws = wb.active
#get max columns and rows
sheet = wb.get_sheet_by_name('Sheet1')
print ("Rows: ", sheet.max_row) # for debugging purposes
print ("Columns: ", sheet.max_column) # for debugging purposes
last_data_point = ws.cell(row = sheet.max_row, column = sheet.max_column).coordinate
print ("Last data point in current worksheet:", last_data_point) #for debugging purposes
#import next file and add to master
append_point = ws.cell(row = sheet.max_row + 1, column = 1).coordinate
print ("Start new data at:", append_point)
wb = load_workbook('second_file.xlsx')
sheet2 = wb.get_sheet_by_name('Sheet1')
start = ws.cell(coordinate='A2').coordinate
print("New data start: ", start)
end = ws.cell(row = sheet2.max_row, column = sheet2.max_column).coordinate
print ("New data end: ", end)
# write a value to selected cell
#sheet[append_point] = 311
#print (ws.cell(append_point).value)
#save file
wb.save('master_file.xlsx')
Thanks!
I don't really understand your code. It looks too complicated. When copying between worksheets you probably want to use ws.rows.
wb1 = load_workbook('master.xlsx')
ws2 = wb1.active
for f in files:
wb2 = load_workbook(f)
ws2 = wb2['Sheet1']
for row in ws2.rows[1:]:
ws1.append((cell.value for cell in row))