I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?
Related
I would like to create the code which extracts the certaine data and its file name from multiple excels to .txt format.
I wrote the code as below, but it shows the all file names in folder to each data in .txt format.
How to extract the each file name..?
My goal is to have the extracted data displayed as "filename" "data from column10" in .txt format.
import pathlib
import openpyxl
import os.path
import glob
f = open('data.txt', 'w')
path = pathlib.Path(r"..\file")
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row + 1):
if sheet["A" + str(row)].fill.start_color.index == 'FFFF0000':
file_path = glob.glob(r"C:\\python\\file\*.xlsx")
name_list = [os.path.splitext(os.path.basename(file))[0] for file in file_path]
f.write(str(name_list) + " ")
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()
Please see if this is what you are looking for. As mentioned by Vlad, I don't believe you need to use glob twice. The path_obj already has the file name. So, update you code like below and see if it is what you are looking for...
import glob, os
import pathlib
import openpyxl
from openpyxl.styles import PatternFill
f = open('data.txt', 'w')
path = pathlib.Path(r"SampleDir") ##My test dir, you can change it
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row + 1):
if sheet["A" + str(row)].fill.start_color.index == 'FFFF0000':
#f.write(str(path_obj) + " ") ## This will add the file name incl. path
f.write(os.path.basename(path_obj) + " ") ## REPLACE for only filename
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()
Hey I wanted to get quick Output for slicing my Source XLSX file for collect Data from Cell on index: 11, but my script seems to working very slow.
Expected output is Collected items from column index(11) Cell when on column index(16) Cell value = None. Script check from begin every row on column index(16) if value == None, but on my file i have thousands positions before script start collecting Data,
Can I speed up this process or find faster way?
XLUtils.py:
import openpyxl
def getRowCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_row)
def getColumnCount(file,sheetName):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return(sheet.max_column)
def readData(file,sheetName,rownum,columnno):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
return sheet.cell(row=rownum, column=columnno).value
def writeData(file,sheetName,rownum,columno,data):
workbook = openpyxl.load_workbook(file)
sheet = workbook.get_sheet_by_name(sheetName)
sheet.cell(row=rownum, column=columno).value = data
workbook.save(file)
My Script:
import pandas as pd
import XLUtils
from openpyxl import Workbook
from datetime import datetime
def LISTING_GENERATOR():
#This function Create product list file for current day
i = 1
i_range = 50
r = 1
x = 1
rows = 50
LISTED_DATE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,16)
if LISTED_DATE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(ASIN)
wb.save('Product_'+TODAY_DATE + '.xlsx')
print('File has been created: ',FILE)
for r in range(2,rows+1):
CHECK_ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 11)
if CHECK_ASIN == None:
print('No more ASIN avaiable')
break
FE = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r, 16)
if FE != None:
print('Product last added: ',FE)
if FE == None:
ASIN = XLUtils.readData(PRODUCT_RESEARCH,'Sheet1',r,11)
print(f'ASIN nr. {i}: {ASIN}')
LIST.append(ASIN)
XLUtils.writeData(FILE,'Sheet',x,1,ASIN)
XLUtils.writeData(PRODUCT_RESEARCH,'Sheet1',r,16,TODAY_DATE_XLSX)
x+=1
i+=1
if i >= i_range:
print(f'List of {i_range} items, has been Created.')
break
else:
print('Error: product on the list')
XLUtils.writeData(FILE,'Sheet',x,1,' ')
print('Created list:\n',LIST)
print('__________________________________')
ALL_ITEMS = (TODAY_DATE + '.xlsx')
print('CSV file has been named: ', ALL_ITEMS)
DATA_XLS = pd.read_excel(FILE, 'Sheet', dtype=str, index_col=None)
DATA_XLS.to_csv(PRODUCT_NAME+'.csv', encoding='utf-8', index=False)
if __name__ == '__main__':
#---Product_list_generator ---# Variable --
wb = Workbook()
LIST = []
TODAY_DATE = datetime.today().strftime('%d_%m_%Y')
TODAY_DATE_XLSX = datetime.today().strftime('%d/%m/%Y')
PRODUCT_RESEARCH = ('Product_Research_copy2.xlsx') #<--- xlsx File
FILE = ('Product_'+TODAY_DATE + '.xlsx')
PRODUCT_NAME = ('Product_'+TODAY_DATE)
LISTING_GENERATOR()
I'm working on a program to split excel files into sections of 1000. I can't seem to get it to create a second excel file, as xlsxwriter doesn't create the second file.
from os.path import join, dirname, abspath
from xlrd.sheet import ctype_text
import csv
import os
import sys
import xlrd
import xlsxwriter
import xlwt
file_paths = sys.argv[1:]
draganddrop = ''.join(file_paths)
beginGrab = 0
counting = 0
endGrab = 1000
thousands = 0
if draganddrop == "":
fileName = raw_input("\nInput the file with extension\n>")
else:
fileName = draganddrop
stopPoint = fileName.index('.')
prepRev = fileName[stopPoint:]
preName = fileName[:stopPoint]
if prepRev == ".csv":
excelFile = xlsxwriter.Workbook(preName + '.xlsx')
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
content = csv.reader(f)
for index_col, data_in_col in enumerate(content):
for index_row, data_in_cell in enumerate(data_in_col):
worksheet.write(index_col,index_row,data_in_cell)
excelFile.close()
fileName = (preName + '.xlsx')
delMe = 1
print("Temporary Convert to xlsx done.\n")
stopPoint = fileName.index('.')
prepRev = fileName[0:stopPoint]
fname = join(dirname(abspath(__file__)), fileName)
xl_workbook = xlrd.open_workbook(fname)
sheet_names = xl_workbook.sheet_names()
xl_sheet = xl_workbook.sheet_by_name(sheet_names[0])
book = xlwt.Workbook(encoding="utf-8")
worksheet = book.add_sheet("Results", cell_overwrite_ok=True)
workbook = xlrd.open_workbook(fileName)
for sheet in workbook.sheets():
for row in range(sheet.nrows):
row = int(row)
if(int(row)>1000):
subDivide = int(row) / 1000
while(thousands != subDivide + 1):
thousands = thousands + 1
counting = 0
totalName = preName + "_" + str(thousands) + ".xlsx"
print(totalName)
excelFile = xlsxwriter.Workbook(str(totalName))
worksheet = excelFile.add_worksheet()
with open(totalName,'rb') as f:
col = xl_sheet.col_slice(0,1,10101010)
for idx, cell_obj in enumerate(col, start=beginGrab):
counting = counting + 1
if(counting == 1000):
break
cell_type_str = ctype_text.get(cell_obj.ctype, 'unknown type')
cell_obj_str = str(cell_obj)
telePhone = (cell_obj_str[7:19])
worksheet.write(idx+1, 0, "1" + telePhone)
worksheet.write(0,0, "Telephone Number")
beginGrab = thousands * 1000
endGrab = beginGrab + 1000
excelFile.close()
excelFile = None
else:
print("Mate, this is Tiny!")
print ("Ding! Job Done!")
I've been rubber ducking this and I can't find where I'm at fault.
EDIT:
SOLVED!!
By creating a sheet and then closing it, the program can then grasp it. I will probably make a git issue about this.
if prepRev == ".csv":
totalName = preName + '.xlsx'
excelFile = xlsxwriter.Workbook(totalName)
excelFile.close()
Closing it lets open see it while it still contains the same info.
excelFile = xlsxwriter.Workbook(totalName)
worksheet = excelFile.add_worksheet()
with open(fileName,'rb') as f:
Doesn't the save/close line need to be within the while loop? Otherwise it looks like it will only save either the first/last item:
while(thousands != subDivide + 1):
# write file
excelFile.close()
that line is probably the reason why you cannot read back your file and your script crashes:
fname = join(dirname(abspath('__file__')), '%s' % fileName)
'__file__' shouldn't have quotes. I'd do:
fname = join(dirname(abspath(__file__)), fileName)
I wrote a simple program for testing with openpyxl where I simply open the .xlsx file, input data into a certain cell, then close the program and run it again, inputting data in a different cell, but when I open the .xlsx after running the program for the second.
My assumption is that openpyxl clears the entire .xlsx file everytime you open it again, is there a way to avoid this?
Here is my code:
from openpyxl import Workbook
wb = Workbook()
dest_filename = 'teste.xlsx'
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Here is the .xlsx file after running the program for the first time
Here is the .xlsx file after running the program for the second time
You have not read the excel file at all:
Use this to read the existing workbook:
from openpyxl import Workbook,load_workbook
import os
dest_filename = 'teste.xlsx'
if os.path.isfile(dest_filename):
wb = load_workbook(filename = dest_filename)
else:
wb = Workbook()
ws = wb.active
ws.title = "2017"
Row = int(input('row: '))
Column = int(input('column: '))
data = input('data: ')
ws.cell(row = Row, column = Column).value = data
wb.save(filename = dest_filename)
Output:
I'm trying to read a string from a text file and write it into an excel sheet without overwriting. I found somewhere that to update excel sheets, openpyxl in used. But my script just overwrites the entire sheet. I want other data to be the same.
python script:
from openpyxl import Workbook
file_name="D:\\a.txt"
content={}
with open(file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb=Workbook()
ws=wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save("D:\\Reports.xlsx")
Excel sheet before script:
Excel sheet after script :
How do I write the data to excel with overwriting other things ? Help.
Overwriting is due to both saving the file with wb.save() and your hard coded starting row number r = 2.
1) If you don't care of overwriting the rows each time you execute your script you could use something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= {}
with open(path + file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save(path + "Reports.xlsx")
2) If you care about overwriting rows and the column numbers (3 & 4) you could try something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= []
with open(path + file_name) as f:
for line in f:
key, value = line.split(":")
content.append(['','', key, value]) # adding empty cells in col 1 + 2
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
for row in content:
ws.append(row)
wb.save(path + "Reports.xlsx")