Populate Excel from Python list - python

I want to populate a excel sheet with data read from a text file.
My script opens a text file and then puts certain attributes into a list. I then want to populate my excel sheet with data in the list.
I have many txt documents in the same location so my script loops though the files.
What is wrong with my code ? It only populates one row.
import xlwt
import os
output = 'D:\Holding_Area\\test.xls'
file_path='Y:\\Testing\\Crashes'
pathappend=[]
r=1
for a in os.listdir(file_path):
pathappend.append(file_path+'\\'+a)
def main():
for x in pathappend:
appendlist=[]
wbk = xlwt.Workbook()
sheet = wbk.add_sheet('python', cell_overwrite_ok=True)
file = open(x)
lines = file.readlines()
appendlist.append(lines[1][2:10])
appendlist.append(lines[1][13:21])
appendlist.append(lines[4][15:30])
appendlist.append(lines[10][13:22])
appendlist.append(lines[11][9:28])
appendlist.append(lines[22])
appendlist.append(lines[31][84:113])
appendlist.append(lines[27:29])
file.close()
for i,e in enumerate(appendlist):
sheet.write(r,i,e)
r+1
wbk.save(output)
main()

Issue was with the 'r+1' it should be 'r+=1'; as shown below:
import xlwt
import os
output = 'D:\Holding_Area\\test.xls'
file_path='Y:\\Testing\\Crashes'
pathappend=[]
r=1
for a in os.listdir(file_path):
pathappend.append(file_path+'\\'+a)
def main():
for x in pathappend:
appendlist=[]
wbk = xlwt.Workbook()
sheet = wbk.add_sheet('python', cell_overwrite_ok=True)
file = open(x)
lines = file.readlines()
appendlist.append(lines[1][2:10])
appendlist.append(lines[1][13:21])
appendlist.append(lines[4][15:30])
appendlist.append(lines[10][13:22])
appendlist.append(lines[11][9:28])
appendlist.append(lines[22])
appendlist.append(lines[31][84:113])
appendlist.append(lines[27:29])
file.close()
for i,e in enumerate(appendlist):
sheet.write(r,i,e)
r+=1
wbk.save(output)
main()

Related

avoid opening an unnecessary tkinder window

I'm trying to save a csv file as a .json file. But when the following code runs, always opens a small tkinter window, along with the load window. How this can be avoided?
import csv
import json
import pandas as pd
from tkinter import filedialog
# which is used to save file in any extension
from tkinter.filedialog import asksaveasfile
class Manipulate_data:
def convert_csv_to_json(self):
csv_file_path = filedialog.askopenfilename(initialdir = '/Desktop',
title = 'Select a CSV file',
filetypes = (('csv file','*.csv'),
('csv file','*.csv')))
print(csv_file_path)
#create a dictionary
data_dict = {}
#Step 2
#open a csv file handler
with open(csv_file_path, encoding = 'utf-8') as csv_file_handler:
csv_reader = csv.DictReader(csv_file_handler)
#convert each row into a dictionary
#and add the converted data to the data_variable
for rows in csv_reader:
#assuming a column named 'No'
#to be the primary key
key = rows['id']
data_dict[key] = rows
#open a json file handler and use json.dumps
#method to dump the data
#Step 3
files = [('JSON Files', '*.json')]
json_file_path = asksaveasfile(filetypes = files, defaultextension = files)
with open(json_file_path.name, 'w', encoding = 'utf-8') as json_file_handler:
#Step 4
json_file_handler.write(json.dumps(data_dict, indent = 4))
#driver code
#be careful while providing the path of the csv file
#provide the file path relative to your machine
Manipulate_data().convert_csv_to_json()

Run a python script on all files in a directory

First time posting a question here, hopefully, someone who experienced/tried this please share your insights... I've been working to get this far in the last few days and nights... now I am getting nowhere to loop this script on every file in a directory.
Bascially, these two scripts work perfectly fine it brings a pdf file and changes it to an excel workbook. Now what I need to do is going through all files from a selected directory and do the same job.
I am keep getting stuck at the opening the file stage - is this saying that the data (the pdf page - data[0]) cant be called in? or should i add more stages in to bring the dataset in...?
Do I have to create a list for the dataset so I can call in the data as you would have more than a data to call in.. is this why python can read the data[0] ???
Revised Script
# import
import os
import glob
import pdftotext
import openpyxl
from pathlib import Path
from string import ascii_uppercase
# open a pdf file
def to_excel(pdf_file):
with open(pdf_file,'rb') as f:
data = pdftotext.PDF(f)
# operate data to get titles, values
datas = data[0].split('\r\n')
finalData = list()
for item in datas:
if item != '':
finalData.append(item)
finalDataRefined = list()
for item in finalData:
if item != ' BCA Scheduled Maintenance Questions' and item != ' Do you suspect there is Asbestos at the property?' and item != ' Yes' and item != ' No' and item != '\x0c':
finalDataRefined.append(item.strip())
titles = list()
values = list()
for num, item in enumerate(finalDataRefined):
if num % 2 == 0:
titles.append(item)
else:
values.append(item)
# get an output file name
OPRAST = values[1]
filename = work_dir / f"{OPRAST}.xlxs"
# create an excel workbook
excel_file = openpyxl.Workbook()
excel_sheet = excel_file.active
excel_sheet.append([])
alphaList = list(ascii_uppercase)
for alphabet in alphaList:
excel_sheet.column_dimensions[alphabet].width = 20
excel_sheet.append(titles)
excel_sheet.append(values)
# save the excel workbook
excel_file.save(filename)
excel_file.close
# run a python script every file in a directory
alphaList = list(ascii_uppercase)
work_dir = Path(r"C:\Users\Sunny Kim\Downloads\Do Forms")
for pdf_file in work_dir.glob("*.pdf"):
to_excel(pdf_file)
I basically know what you want to do, but your code's indent is not so readable... especially it's python.
Your goal is to create a excel for each pdf file in you prefix dir? or aggregate all the pdf files together to a single excel file?
The follow coding is for the first goal.
Code logic.
get all the pdf file
loop over all the pdf file, for each:
open pdf file
some operation
export to excel file
You full code maybe like this(just guess):
# ----------------import part-------------------
import os
import glob
import pdftotext
import openpyxl
from string import ascii_uppercase
from pathlib import Path
def to_excel(pdf_file):
with open(pdf_file, 'rb') as f: # this open the pdf file
data = pdftotext.PDF(f)
# ---------------operate the data, get title and value-----------
datas = data[0].split('\r\n')
finalData = list()
for item in datas:
if item != '':
finalData.append(item)
finalDataRefined = list()
for item in finalData:
if item != ' BCA Scheduled Maintenance Questions' and item != ' Do you suspect there is Asbestos at the property?' and item != ' Yes' and item != ' No' and item != '\x0c':
finalDataRefined.append(item.strip())
titles = list()
values = list()
for num, item in enumerate(finalDataRefined):
if num % 2 == 0:
titles.append(item)
else:
values.append(item)
# ------------------get output file name---------------------
OPRAST = values[1]
filename = work_dir / f"{OPRAST}.xlxs"
# ------------------create excel file sheet------------------
excel_file = openpyxl.Workbook()
excel_sheet = excel_file.active
excel_sheet.append([])
alphaList = list(ascii_uppercase)
for alphabet in alphaList:
excel_sheet.column_dimensions[alphabet].width = 20
excel_sheet.append(titles)
excel_sheet.append(values)
# --------------------save----------------
excel_file.save(filename)
excel_file.close
# -------------------main program---------------
alphaList = list(ascii_uppercase)
work_dir = Path(r"C:\Users\Sunny Kim\Downloads\Do Forms")
for pdf_file in work_dir.glob("*.pdf"):
to_excel(pdf_file)

Openpyxl coverts numbers in CSV as text in XLSX file

I have a series of CSV file like this one. I’m trying to convert and merge them into an xlsx file with python and openpyxl with this code:
import csv
import openpyxl
import glob
csvpath = 'C:/Users/Lorenzo/Downloads/CSV/'
csvfiles = glob.glob(csvpath + '*.csv')
data = input('Inserisci data Simulazione: ')
destinationfilepath = 'C:/Users/Lorenzo/Desktop/Simulazione_' + data + '.xlsx'
wb = openpyxl.Workbook()
for i in range(len(csvfiles)):
filename = csvfiles[i]
reader = csv.reader(open(filename), delimiter=',')
csvname = filename[len(csvpath):-4]
ws1 = wb.create_sheet(csvname)
k=0
for row in reader:
if k==0:
ws1.append(row)
else:
g=0
for cell in row:
c= ws1.cell(row=k, column=g)
c.value = float(cell)
g=g+1
k=k+1
ws1['A1'] = 'Iteration'
ws1['B1'] = 'CD'
ws1['C1'] = 'CL'
ws1['D1'] = 'CL_F'
ws1['E1'] = 'CL_R'
ws1['F1'] = 'CM'
sheet = wb['Sheet']
wb.remove(sheet)
wb.save(destinationfilepath)
The code runs but in most cells (and strangely enough not in all cells) I get the error “number stored as text” despite using the command float like suggested in this and similar topics.
What is that I'm doing wrong?

Print Excel workbook using python

Suppose I have an excel file excel_file.xlsx and i want to send it to my printer using Python so I use:
import os
os.startfile('path/to/file','print')
My problem is that this only prints the first sheet of the excel workbook but i want all the sheets printed. Is there any way to print the entire workbook?
Also, I used Openpyxl to create the file, but it doesn't seem to have any option to select the number of sheets for printing.
Any help would be greatly appreciated.
from xlrd import open_workbook
from openpyxl.reader.excel import load_workbook
import os
import shutil
path_to_workbook = "/Users/username/path/sheet.xlsx"
worksheets_folder = "/Users/username/path/worksheets/"
workbook = open_workbook(path_to_workbook)
def main():
all_sheet_names = []
for s in workbook.sheets():
all_sheet_names.append(s.name)
for sheet in workbook.sheets():
if not os.path.exists("worksheets"):
os.makedirs("worksheets")
working_sheet = sheet.name
path_to_new_workbook = worksheets_folder + '{}.xlsx'.format(sheet.name)
shutil.copyfile(path_to_workbook, path_to_new_workbook)
nwb = load_workbook(path_to_new_workbook)
print "working_sheet = " + working_sheet
for name in all_sheet_names:
if name != working_sheet:
nwb.remove_sheet(nwb.get_sheet_by_name(name))
nwb.save(path_to_new_workbook)
ws_files = get_file_names(worksheets_folder, ".xlsx")
# Uncomment print command
for f in xrange(0, len(ws_files)):
path_to_file = worksheets_folder + ws_files[f]
# os.startfile(path_to_file, 'print')
print 'PRINT: ' + path_to_file
# remove worksheets folder
shutil.rmtree(worksheets_folder)
def get_file_names(folder, extension):
names = []
for file_name in os.listdir(folder):
if file_name.endswith(extension):
names.append(file_name)
return names
if __name__ == '__main__':
main()
probably not the best approach, but it should work.
As a workaround you can create separate .xlsx files where each has only one spreadsheet and then print them with os.startfile(path_to_file, 'print')
I have had this issue(on windows) and it was solved by using pywin32 module and this code block(in line 5 you can specify the sheets you want to print.)
import win32com.client
o = win32com.client.Dispatch('Excel.Application')
o.visible = True
wb = o.Workbooks.Open('/Users/1/Desktop/Sample.xlsx')
ws = wb.Worksheets([1 ,2 ,3])
ws.printout()
you could embed vBa on open() command to print the excel file to a default printer using xlsxwriter's utility mentioned in this article:
PBPYthon's Embed vBA in Excel
Turns out, the problem was with Microsoft Excel,
os.startfile just sends the file to the system's default app used to open those file types. I just had to change the default to another app (WPS Office in my case) and the problem was solved.
Seems like you should be able to just loop through and change which page is active. I tried this and it did print out every sheet, BUT for whatever reason on the first print it grouped together two sheets, so it gave me one duplicate page for each workbook.
wb = op.load_workbook(filepath)
for sheet in wb.sheetnames:
sel_sheet = wb[sheet]
# find the max row and max column in the sheet
max_row = sel_sheet.max_row
max_column = sel_sheet.max_column
# identify the sheets that have some data in them
if (max_row > 1) & (max_column > 1):
# Creating new file for each sheet
sheet_names = wb.sheetnames
wb.active = sheet_names.index(sheet)
wb.save(filepath)
os.startfile(filepath, "print")

openpyxl overwrites all data instead of update the excel sheet

I'm trying to read a string from a text file and write it into an excel sheet without overwriting. I found somewhere that to update excel sheets, openpyxl in used. But my script just overwrites the entire sheet. I want other data to be the same.
python script:
from openpyxl import Workbook
file_name="D:\\a.txt"
content={}
with open(file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb=Workbook()
ws=wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save("D:\\Reports.xlsx")
Excel sheet before script:
Excel sheet after script :
How do I write the data to excel with overwriting other things ? Help.
Overwriting is due to both saving the file with wb.save() and your hard coded starting row number r = 2.
1) If you don't care of overwriting the rows each time you execute your script you could use something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= {}
with open(path + file_name) as f:
for line in f:
(key,value)=line.split(":")
content[key]=value
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
r = 2
for item in content:
ws.cell(row=r, column=3).value = item
ws.cell(row=r, column=4).value = content[item]
r += 1
wb.save(path + "Reports.xlsx")
2) If you care about overwriting rows and the column numbers (3 & 4) you could try something like this:
from openpyxl import Workbook
from openpyxl import load_workbook
path = 'P:\Desktop\\'
file_name = "input.txt"
content= []
with open(path + file_name) as f:
for line in f:
key, value = line.split(":")
content.append(['','', key, value]) # adding empty cells in col 1 + 2
wb = load_workbook(path + 'Reports.xlsx')
ws = wb.active
for row in content:
ws.append(row)
wb.save(path + "Reports.xlsx")

Categories

Resources