I'm trying to read a string from a text file and write it into an excel sheet without overwriting. I found somewhere that to update excel sheets, openpyxl in used. But my script just overwrites the entire sheet. I want other data to be the same.
python script:
from openpyxl import Workbook
file_name="D:\\a.txt"
content={}
with open(file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb=Workbook()
ws=wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save("D:\\Reports.xlsx")
Excel sheet before script:
Excel sheet after script :
How do I write the data to excel with overwriting other things ? Help.
Overwriting is due to both saving the file with wb.save() and your hard coded starting row number r = 2.
1) If you don't care of overwriting the rows each time you execute your script you could use something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= {}
with open(path + file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save(path + "Reports.xlsx")
2) If you care about overwriting rows and the column numbers (3 & 4) you could try something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= []
with open(path + file_name) as f:
for line in f:
key, value = line.split(":")
content.append(['','', key, value]) # adding empty cells in col 1 + 2
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
for row in content:
ws.append(row)
wb.save(path + "Reports.xlsx")
Related
I would like to create the code which extracts the certaine data and its file name from multiple excels to .txt format.
I wrote the code as below, but it shows the all file names in folder to each data in .txt format.
How to extract the each file name..?
My goal is to have the extracted data displayed as "filename" "data from column10" in .txt format.
import pathlib
import openpyxl
import os.path
import glob
f = open('data.txt', 'w')
path = pathlib.Path(r"..\file")
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row + 1):
if sheet["A" + str(row)].fill.start_color.index == 'FFFF0000':
file_path = glob.glob(r"C:\\python\\file\*.xlsx")
name_list = [os.path.splitext(os.path.basename(file))[0] for file in file_path]
f.write(str(name_list) + " ")
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()
Please see if this is what you are looking for. As mentioned by Vlad, I don't believe you need to use glob twice. The path_obj already has the file name. So, update you code like below and see if it is what you are looking for...
import glob, os
import pathlib
import openpyxl
from openpyxl.styles import PatternFill
f = open('data.txt', 'w')
path = pathlib.Path(r"SampleDir") ##My test dir, you can change it
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row + 1):
if sheet["A" + str(row)].fill.start_color.index == 'FFFF0000':
#f.write(str(path_obj) + " ") ## This will add the file name incl. path
f.write(os.path.basename(path_obj) + " ") ## REPLACE for only filename
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()
I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?
I am wanting to copy and paste data from a csv to an excel so I can later filter that table. I have done all these steps in VBA but I've noticed that VBA can be buggy so am wanting to migrate to Python.
I have converted the csv to an excel and I have successfully copied the converted xlsx file to the excel document.
My question is, how do I copy and paste to a specific starting column. As I have other data I need to copy at cell AN1.
I have tried the below.. I am able to write to one specific cell but I am wanting to post the data...
for row in ws1:
for cell in row:
ws2['K1'].value
#ws2[cell.coordinate].value = cell.value
wb2.save(path2)
Entirety...
## csv to xlsx
from openpyxl import Workbook
import csv
wb = Workbook()
ws = wb.active
with open('C:/B.csv', 'r') as f:
for row in csv.reader(f):
ws.append(row)
wb.save('C:/B.xlsx')
###### COPY FROM B to existing E workbook
import openpyxl as xl
path1 = 'C:/B.xlsx'
path2 = 'C:/E.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
#ws2 = wb2.create_sheet(ws1.title)
#cell.value = ['A2']
for row in ws1:
for cell in row:
ws2.cell(row=1, column=1).value = cell.value
wb2.save(path2)
Copying columns between two different workbooks using openpyxl could be done as follows:
import openpyxl
wb1 = openpyxl.load_workbook('B.xlsx')
ws1 = wb1.active
wb2 = openpyxl.load_workbook('E.xlsx')
ws2 = wb2.active
for src, dst in zip(ws1['B:B'], ws2['AN:AN']):
dst.value = src.value
wb2.save('E.xlsx')
For a range of columns, the following would work:
import openpyxl
wb1 = openpyxl.load_workbook('B.xlsx')
ws1 = wb1.active
wb2 = openpyxl.load_workbook('E.xlsx')
ws2 = wb2.active
for src, dst in zip(ws1['A:I'], ws2['AN:AV']):
for cell_src, cell_dst in zip(src, dst):
cell_dst.value = cell_src.value
wb2.save('E.xlsx')
for row in range(1, ws1.max_row + 1):
#for cell in row:
ws1.column_dimensions.group('A', 'D', hidden=True)
sheet.cell(row=i + 2, column=k + 1).value = val
wb2.save(path2)
Should do it
Unfortunately the solutions provide were very much unacceptable as they did not work. VBA is also off the table. I am using openpyxl and the above created an error. Ideally I would like to copy to a new column, but that is beyond my skill. Instead use the below and use excel formulas to get the data where you want. I will have to spend about 4 hours redesigning my excel but worth it I suppose as I am unable to find a workaround.
## csv to xlsx
from openpyxl import Workbook
import csv
wb = Workbook()
ws = wb.active
with open('C/B.csv', 'r') as f:
for row in csv.reader(f):
ws.append(row)
wb.save('C:/B.xlsx')
###### COPY FROM B to existing E workbook
import openpyxl as xl
path1 = 'C:/B.xlsx'
path2 = 'C:/E.xlsx'
wb1 = xl.load_workbook(filename=path1)
ws1 = wb1.worksheets[0]
wb2 = xl.load_workbook(filename=path2)
ws2 = wb2.worksheets[0]
#ws2 = wb2.create_sheet(ws1.title)
#cell.value = ['A2']
for row in ws1:
for cell in row:
ws2[cell.coordinate].value = cell.value
wb2.save(path2)
I wrote a simple program for testing with openpyxl where I simply open the .xlsx file, input data into a certain cell, then close the program and run it again, inputting data in a different cell, but when I open the .xlsx after running the program for the second.
My assumption is that openpyxl clears the entire .xlsx file everytime you open it again, is there a way to avoid this?
Here is my code:
from openpyxl import Workbook
wb = Workbook()
dest_filename = 'teste.xlsx'
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Here is the .xlsx file after running the program for the first time
Here is the .xlsx file after running the program for the second time
You have not read the excel file at all:
Use this to read the existing workbook:
from openpyxl import Workbook,load_workbook
import os
dest_filename = 'teste.xlsx'
if os.path.isfile(dest_filename):
wb = load_workbook(filename = dest_filename)
else:
wb = Workbook()
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Output:
I am trying to read in multiple excel files and append the data from each file into one master file. Each file will have the same headers (So I can skip the import of the first row after the initial file).
I am pretty new to both Python and the OpenPyXL module. I am able to import the first workbook without problem. My problem comes in when I need to open the subsequent file and copy the data to paste into the original worksheet.
Here is my code so far:
# Creating blank workbook
from openpyxl import Workbook
wb = Workbook()
# grab active worksheet
ws = wb.active
# Read in excel data
from openpyxl import load_workbook
wb = load_workbook('first_file.xlsx') #explicitly loading workbook, will automate later
# grab active worksheet in current workbook
ws = wb.active
#get max columns and rows
sheet = wb.get_sheet_by_name('Sheet1')
print ("Rows: ", sheet.max_row) # for debugging purposes
print ("Columns: ", sheet.max_column) # for debugging purposes
last_data_point = ws.cell(row = sheet.max_row, column = sheet.max_column).coordinate
print ("Last data point in current worksheet:", last_data_point) #for debugging purposes
#import next file and add to master
append_point = ws.cell(row = sheet.max_row + 1, column = 1).coordinate
print ("Start new data at:", append_point)
wb = load_workbook('second_file.xlsx')
sheet2 = wb.get_sheet_by_name('Sheet1')
start = ws.cell(coordinate='A2').coordinate
print("New data start: ", start)
end = ws.cell(row = sheet2.max_row, column = sheet2.max_column).coordinate
print ("New data end: ", end)
# write a value to selected cell
#sheet[append_point] = 311
#print (ws.cell(append_point).value)
#save file
wb.save('master_file.xlsx')
Thanks!
I don't really understand your code. It looks too complicated. When copying between worksheets you probably want to use ws.rows.
wb1 = load_workbook('master.xlsx')
ws2 = wb1.active
for f in files:
wb2 = load_workbook(f)
ws2 = wb2['Sheet1']
for row in ws2.rows[1:]:
ws1.append((cell.value for cell in row))