Need help with setting PageBreak excel file with xlwings package in python.
According to Microsoft website: https://learn.microsoft.com/zh-tw/office/vba/api/excel.range.pagebreak
I've tried
app = xw.App(visible=True, add_book=False)
wb = app.books.open("raw_data/" + raw_file_name, update_links=False)
sht = wb.sheets['sheet1']
sht.api.Rows(24).PageBreak = 'xlPageBreakManual' # I would like to set on row 24
and the program stuck forever. Does anyone know how to solve the problem?
Thanks
xlwings does not know what xlPageBreakManual is unless its referenced. Nevertheless in this case it's not necessary. The default is to add a manual Page Break therefore syntax is only; sht.api.Rows(24).PageBreak = True. If you then check the PageBreak status on row 24; print(sht.api.Rows(24).PageBreak) it should return -4135 which is the excel constant for xlPageBreakManual.
You can use the constants to set the type of PageBreak, e.g. the line sht.api.Rows(24).PageBreak = True could also be written as sht.api.Rows(24).PageBreak = -4135 or sht.api.Rows(24).PageBreak = PageBreak.xlPageBreakManual
The page break values can be referenced from the xlwings constants using the syntax from xlwings.constants import PageBreak. My example shown below sets the page break at row 24 then shows the status of the previous, actual and ensuing rows. The previous and ensuing rows have a status of -4142 i.e. no page break while row 24 is -4135.
The code then removes the page break using xlPageBreakNone and the three rows 23-25 are all -4142 again.
For this test I used xlwings v0.26.1
import xlwings as xw
from xlwings.constants import PageBreak
### Constant values for reference
# xlPageBreakAutomatic = -4105
# xlPageBreakManual = -4135
# xlPageBreakNone = -4142
app = xw.App(visible=True, add_book=False)
sht = xw.Book("book.xlsx").sheets['Sheet1']
print("Add Page break at row 24")
sht.api.Rows(24).PageBreak = True
# sht.api.Rows(24).PageBreak = -4135 # This does the same as the line above
# sht.api.Rows(24).PageBreak = PageBreak.xlPageBreakManual # As does this line
print('Row23: ' + str(sht.api.Rows(23).PageBreak))
print('Row24: ' + str(sht.api.Rows(24).PageBreak))
print('Row25: ' + str(sht.api.Rows(25).PageBreak))
print("\nDelete Page break at row 24")
sht.api.Rows(24).PageBreak = PageBreak.xlPageBreakNone
print('Row23: ' + str(sht.api.Rows(23).PageBreak))
print('Row24: ' + str(sht.api.Rows(24).PageBreak))
print('Row25: ' + str(sht.api.Rows(25).PageBreak))
Note attempting to manually set a page break to -4105 (xlPageBreakAutomatic) fails which I would expect.
Related
I have tried this code, but this is for validation. I want a code to create a new drop down menu.
app = xw.App(visible=True)
wb = app.books.open('Test.xlsx')
sht = wb.sheets['Sheet1']
Formula1='"Dog,Cat,Bat"'
dv = sht.range('A1').api.Validation.Formula1
I have tried using openpyxl it is working but it doesn't save a file when the file is open.
xlwings is a wrapper of win32com, which is similar to VBA. With recording VBA macro for reference, the following code should work.
import xlwings as xw
app = xw.App(visible=True)
wb = app.books.open('Test.xlsx')
sht = wb.sheets['Sheet1']
Formula1='Dog,Cat,Bat' # remove the redundant "
# set up validation
sht.range('A1').api.Validation.Add(Type=3, Formula1=Formula1)
The exploring steps:
Record a macro for the default steps to create a list type data validation:
Sub Macro1()
'
' Macro1 Macro
'
'
Range("A1").Select
Application.WindowState = xlMaximized
With Selection.Validation
.Delete
.Add Type:=xlValidateList, AlertStyle:=xlValidAlertStop, Operator:= _
xlBetween, Formula1:="Dog,Cat,Bat"
.IgnoreBlank = True
.InCellDropdown = True
.InputTitle = ""
.ErrorTitle = ""
.InputMessage = ""
.ErrorMessage = ""
.ShowInput = True
.ShowError = True
End With
End Sub
Indeed, we did only two key steps and kept the rest default:
select List from validation criteria dropdown list
type the source for list
So, the above code should be simplified as
' VBA
Range("A1").Validation.Add Type:=xlValidateList, Formula1:="Dog,Cat,Bat"
Hi everyone this is my first time here, and I am a beginner in Python. I am in the middle of writing a program that returns a txt document containing information about a stock (Watchlist Info.txt), based on the input of another txt document containing the company names (Watchlist).
To achieve this, I have written 3 functions, of which 2 functions reuters_ticker() and stock_price() are completed as shown below:
def reuters_ticker(desired_search):
#from company name execute google search for and return reuters stock ticker
try:
from googlesearch import search
except ImportError:
print('No module named google found')
query = desired_search + ' reuters'
for j in search(query, tld="com.sg", num=1, stop=1, pause=2):
result = j
ticker = re.search(r'\w+\.\w+$', result)
return ticker.group()
Stock Price:
def stock_price(company, doc=None):
ticker = reuters_ticker(company)
request = 'https://www.reuters.com/companies/' + ticker
raw_main = pd.read_html(request)
data1 = raw_main[0]
data1.set_index(0, inplace=True)
data1 = data1.transpose()
data2 = raw_main[1]
data2.set_index(0, inplace=True)
data2 = data2.transpose()
stock_info = pd.concat([data1,data2], axis=1)
if doc == None:
print(company + '\n')
print('Previous Close: ' + str(stock_info['Previous Close'][1]))
print('Forward PE: ' + str(stock_info['Forward P/E'][1]))
print('Div Yield(%): ' + str(stock_info['Dividend (Yield %)'][1]))
else:
from datetime import date
with open(doc, 'a') as output:
output.write(date.today().strftime('%d/%m/%y') + '\t' + str(stock_info['Previous Close'][1]) + '\t' + str(stock_info['Forward P/E'][1]) + '\t' + '\t' + str(stock_info['Dividend (Yield %)'][1]) + '\n')
output.close()
The 3rd function, watchlist_report(), is where I am getting problems with writing the information in the format as desired.
def watchlist_report(watchlist):
with open(watchlist, 'r') as companies, open('Watchlist Info.txt', 'a') as output:
searches = companies.read()
x = searches.split('\n')
for i in x:
output.write(i + ':\n')
stock_price(i, doc='Watchlist Info.txt')
output.write('\n')
When I run watchlist_report('Watchlist.txt'), where Watchlist.txt contains 'Apple' and 'Facebook' each on new lines, my output is this:
26/04/20 275.03 22.26 1.12
26/04/20 185.13 24.72 --
Apple:
Facebook:
Instead of what I want and would expect based on the code I have written in watchlist_report():
Apple:
26/04/20 275.03 22.26 1.12
Facebook:
26/04/20 185.13 24.72 --
Therefore, my questions are:
1) Why is my output formatted this way?
2) Which part of my code do I have to change to make the written output in my desired format?
Any other suggestions about how I can clean my code and any libraries I can use to make my code nicer are also appreciated!
You handle two different file-handles - the file-handle inside your watchlist_report gets closed earlier so its being written first, before the outer functions file-handle gets closed, flushed and written.
Instead of creating a new open(..) in your function, pass the current file handle:
def watchlist_report(watchlist):
with open(watchlist, 'r') as companies, open('Watchlist Info.txt', 'a') as output:
searches = companies.read()
x = searches.split('\n')
for i in x:
output.write(i + ':\n')
stock_price(i, doc = output) # pass the file handle
output.write('\n')
Inside def stock_price(company, doc=None): use the provided filehandle:
def stock_price(company, output = None): # changed name here
# [snip] - removed unrelated code for this answer for brevity sake
if output is None: # check for None using IS
print( ... ) # print whatever you like here
else:
from datetime import date
output.write( .... ) # write whatever you want it to write
# output.close() # do not close, the outer function does this
Do not close the file handle in the inner function, the context handling with(..) of the outer function does that for you.
The main takeaway for file handling is that things you write(..) to your file are not neccessarily placed there immediately. The filehandler chooses when to actually persist data to your disk, the latests it does that is when it goes out of scope (of the context handler) or when its internal buffer reaches some threshold so it "thinks" it is now prudent to alter to data on your disc. See How often does python flush to a file? for more infos.
I want to format an Excel report automatically. I generate the raw data using Python for reports, but they need pretty formatting (colors, bold font, borders) on them before they go to a Project Manager.
My current approach is using the pywin32 package, copying the header from a Template file, and pasting it into my Output report.
I am having trouble with the .Paste() methods, for instance, if I try to specify the destination with .Paste(Destination=Range('A1:A100') it will throw an error. If I attempt output_worksheet.used_range.Paste() it will not recognize used_range as valid either.
Additionally my current code doesn't exit out of Excel.
Finally my code pastes the header into row 16 instead of starting from the top:
template_path = tests.path + r"\Box Tracking Report Regents Template.xlsx"
timestamp = datetime.datetime.now().strftime("%m-%d-%Y %I%M%p")
output_path = tests.path + r"\Box Tracking Report " + timestamp + ".xlsx"
# (... write my pandas dataframe in ...)
def paste_formatting(tab_name):
excel_instance = win32com.client.gencache.EnsureDispatch("Excel.Application")
template = excel_instance.Workbooks.Open(template_path)
template_workbook = excel_instance.Workbooks.Item(1)
template_worksheet = template_workbook.Worksheets(tab_name)
used_range = template_worksheet.UsedRange
used_range.Copy()
output_excel = excel_instance.Workbooks.Open(output_path)
output_workbook = excel_instance.Workbooks.Item(2)
output_worksheet=output_workbook.Worksheets(tab_name)
output_worksheet.Paste()
output_workbook.Close()
template_workbook.Close()
paste_formatting('Regents Box Tracking Report')
This is what I get:
With #chucklukowski's comment:
def paste_formatting(tab_name):
excel_instance = win32com.client.gencache.EnsureDispatch("Excel.Application")
template = excel_instance.Workbooks.Open(template_path)
template_workbook = excel_instance.Workbooks.Item(1)
template_worksheet = template_workbook.Worksheets(tab_name)
used_range = template_worksheet.UsedRange
used_range.Copy()
output_excel = excel_instance.Workbooks.Open(output_path)
output_workbook = excel_instance.Workbooks.Item(2)
output_worksheet=output_workbook.Worksheets(tab_name)
output_worksheet.Paste(output_worksheet.Range('A1'))
output_workbook.Save()
output_workbook.Close()
template_workbook.Close()
excel_instance.Quit()
del excel_instance
paste_formatting('Regents Box Tracking Report')
Still messy, but probably better than calling an Excel VBA macro from the template itself (which was going to be my last ditch approach).
I've done some simple .csv parsing in python but have a new file structure that's giving me trouble. The input file is from a spreadsheet converted into a .CSV file. Here is an example of the input:
Layout
Each set can have many layouts, and each layout can have many layers. Each layer has only one layer and name.
Here is the code I am using to parse it in. I suspect it's a logic/flow control problem because I've parsed things in before, just not this deep. The first header row is skipped via code. Any help appreciated!
import csv
import pprint
def import_layouts_schema(layouts_schema_file_name = 'C:\\layouts\\LAYOUT1.csv'):
class set_template:
def __init__(self):
self.set_name =''
self.layout_name =''
self.layer_name =''
self.obj_name =''
def check_layout(st, row, layouts_schema):
c=0
if st.layout_name == '':
st.layer_name = row[c+2]
st.obj_name = row[c+3]
layer = {st.layer_name : st.obj_name}
layout = {st.layout_name : layer}
layouts_schema.update({st.set_name : layout})
else:
st.layout_name = row[c+1]
st.layer_name = row[c+2]
st.obj_name = row[c+3]
layer = {st.layer_name : st.obj_name}
layout = {st.layout_name : layer}
layouts_schema.update({st.set_name : layout})
return layouts_schema
def layouts_schema_parsing(obj_list_raw1): #, location_categories, image_schema, set_location):
#------ init -----------------------------------
skipfirst = True
c = 0
firstrow = True
layouts_schema = {}
end_flag = ''
st = set_template()
#---------- start parsing here -----------------
print('Now parsing layouts schema list')
for row in obj_list_raw1:
#print ('This is the row: ', row)
if skipfirst==True:
skipfirst=False
continue
if row[c] != '':
st.set_name = row[c]
st.layout_name = row[c+1]
st.layer_name = row[c+2]
st.obj_name = row[c+3]
print('FOUND A NEW SET. SET details below:')
print('Set name:', st.set_name, 'Layout name:', st.layout_name, 'Layer name:', st.layer_name, 'Object name:', st.obj_name)
if firstrow == True:
print('First row of layouts import!')
layer = {st.layer_name : st.obj_name}
layout = {st.layout_name : layer}
layouts_schema = {st.set_name : layout}
firstrow = False
check_layout(st, row, layouts_schema)
continue
elif firstrow == False:
print('Not the first row of layout import')
layer = {st.layer_name : st.obj_name}
layout = {st.layout_name : layer}
layouts_schema.update({st.set_name : layout})
check_layout(st, row, layouts_schema)
return layouts_schema
#begin subroutine main
layouts_schema_file_name ='C:\\Users\\jason\\Documents\\RAY\\layout_schemas\\ANIBOT_LAYOUTS_SCHEMA.csv'
full_path_to_file = layouts_schema_file_name
print('============ Importing LAYOUTS schema from: ', full_path_to_file , ' ==============')
openfile = open(full_path_to_file)
reader_ob = csv.reader(openfile)
layout_list_raw1 = list(reader_ob)
layouts_schema = layouts_schema_parsing(layout_list_raw1)
print('=========== End of layouts schema import =========')
return layouts_schema
layouts_schema = import_layouts_schema()
Feel free to throw any part away that doesn't work. I suspect I've inside my head a little bit here. A for loop or another while loop may do the trick. Ultimately I just want to parse the file into a dict with the same key structure shown. i.e. the final dict's first line would look like:
{'RESTAURANT': {'RR_FACING1': {'BACKDROP': 'restaurant1'}}}
And the rest on from there. Ultimately I am goign to use this key structure and the dict for other purposes. Just can't get the parsing down!
Wouaw, that's a lot of code !
Maybe try something simpler :
with open('file.csv') as f:
keys = f.readline().split(';') # assuming ";" is your csv fields separator
for line in f:
vals = line.split(';')
d = dict(zip(keys, vals))
print(d)
Then either make a better data file (without blanks), or have the parser remembering the previous values.
While I agree with #AK47 that the code review site may be the better approach, I received so many help from SO that I'll try to give back a little: IMHO you are overthinking the problem. Please find below an approach that should get you in the right direction and doesn't even require converting from Excel to CSV (I like the xlrd module, it's very easy to use). If you already have a CSV, just exchange the loop in the process_sheet() function. Basically, I just store the last value seen for "SET" and "LAYOUT" and if they are different (and not empty), I set the new value. Hope that helps. And yes, you should think about a better data structure (redundancy is not always bad, if you can avoid empty cells :-) ).
import xlrd
def process_sheet(sheet : xlrd.sheet.Sheet):
curr_set = ''
curr_layout = ''
for rownum in range(1, sheet.nrows):
row = sheet.row(rownum)
set_val = row[0].value.strip()
layout_val = row[1].value.strip()
if set_val != '' and set_val != curr_set:
curr_set = set_val
if layout_val != '' and layout_val != curr_layout:
curr_layout = layout_val
result = {curr_set: {curr_layout: {row[2].value: row[3].value}}}
print(repr(result))
def main():
# open a workbook (adapt your filename)
# then get the first sheet (index 0)
# and call the process function
wbook = xlrd.open_workbook('/tmp/test.xlsx')
sheet = wbook.sheet_by_index(0)
process_sheet(sheet)
if __name__ == '__main__':
main()
I'm new here to StackOverflow, but I have found a LOT of answers on this site. I'm also a programming newbie, so i figured i'd join and finally become part of this community - starting with a question about a problem that's been plaguing me for hours.
I login to a website and scrape a big body of text within the b tag to be converted into a proper table. The layout of the resulting Output.txt looks like this:
BIN STATUS
8FHA9D8H 82HG9F RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
INVENTORY CODE: FPBC *SOUP CANS LENTILS
BIN STATUS
HA8DHW2H HD0138 RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8SHDNADU 00A123 #2956- INVALID STOCK COUPON CODE (MISSING).
93827548 096DBR RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
There are a bunch of pages with the exact same blocks, but i need them to be combined into an ACTUAL table that looks like this:
BIN INV CODE STATUS
HA8DHW2HHD0138 FPBC-*SOUP CANS LENTILS RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8SHDNADU00A123 FPBC-*SOUP CANS LENTILS #2956- INVALID STOCK COUPON CODE (MISSING).
93827548096DBR FPBC-*SOUP CANS LENTILS RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
8FHA9D8H82HG9F SSXR-98-20LM NM CORN CREAM RECEIVED SUCCESSFULLY AWAITING STOCKING PROCESS
Essentially, all separate text blocks in this example would become part of this table, with the inv code repeating with its Bin values. I would post my attempts at parsing this data(have tried Pandas/bs/openpyxl/csv writer), but ill admit they are a little embarrassing, as i cannot find any information on this specific problem. Is there any benevolent soul out there that can help me out? :)
(Also, i am using Python 2.7)
A simple custom parser like the following should do the trick.
from __future__ import print_function
def parse_body(s):
line_sep = '\n'
getting_bins = False
inv_code = ''
for l in s.split(line_sep):
if l.startswith('INVENTORY CODE:') and not getting_bins:
inv_data = l.split()
inv_code = inv_data[2] + '-' + ' '.join(inv_data[3:])
elif l.startswith('INVENTORY CODE:') and getting_bins:
print("unexpected inventory code while reading bins:", l)
elif l.startswith('BIN') and l.endswith('MESSAGE'):
getting_bins = True
elif getting_bins == True and l:
bin_data = l.split()
# need to add exception handling here to make sure:
# 1) we have an inv_code
# 2) bin_data is at least 3 items big (assuming two for
# bin_id and at least one for message)
# 3) maybe some constraint checking to ensure that we have
# a valid instance of an inventory code and bin id
bin_id = ''.join(bin_data[0:2])
message = ' '.join(bin_data[2:])
# we now have a bin, an inv_code, and a message to add to our table
print(bin_id.ljust(20), inv_code.ljust(30), message, sep='\t')
elif getting_bins == True and not l:
# done getting bins for current inventory code
getting_bins = False
inv_code = ''
A rather complex one, but this might get you started:
import re, pandas as pd
from pandas import DataFrame
rx = re.compile(r'''
(?:INVENTORY\ CODE:)\s*
(?P<inv>.+\S)
[\s\S]+?
^BIN.+[\n\r]
(?P<bin_msg>(?:(?!^\ ).+[\n\r])+)
''', re.MULTILINE | re.VERBOSE)
string = your_string_here
# set up the dataframe
df = DataFrame(columns = ['BIN', 'INV', 'MESSAGE'])
for match in rx.finditer(string):
inv = match.group('inv')
bin_msg_raw = match.group('bin_msg').split("\n")
rxbinmsg = re.compile(r'^(?P<bin>(?:(?!\ {2}).)+)\s+(?P<message>.+\S)\s*$', re.MULTILINE)
for item in bin_msg_raw:
for m in rxbinmsg.finditer(item):
# append it to the dataframe
df.loc[len(df.index)] = [m.group('bin'), inv, m.group('message')]
print(df)
Explanation
It looks for INVENTORY CODE and sets up the groups (inv and bin_msg) for further processing in afterwork() (note: it would be easier if you had only one line of bin/msg as you need to split the group here afterwards).
Afterwards, it splits the bin and msg part and appends all to the df object.
I had a code written for a website scrapping which may help you.
Basically what you need to do is write click on the web page go to html and try to find the tag for the table you are looking for and using the module (i am using beautiful soup) extract the information. I am creating a json as I need to store it into mongodb you can create table.
#! /usr/bin/python
import sys
import requests
import re
from BeautifulSoup import BeautifulSoup
import pymongo
def req_and_parsing():
url2 = 'http://businfo.dimts.in/businfo/Bus_info/EtaByRoute.aspx?ID='
list1 = ['534UP','534DOWN']
for Route in list1:
final_url = url2 + Route
#r = requests.get(final_url)
#parsing_file(r.text,Route)
outdict = []
outdict = [parsing_file( requests.get(url2+Route).text,Route) for Route in list1 ]
print outdict
conn = f_connection()
for i in range(len(outdict)):
insert_records(conn,outdict[i])
def parsing_file(txt,Route):
soup = BeautifulSoup(txt)
table = soup.findAll("table",{"id" : "ctl00_ContentPlaceHolder1_GridView2"})
#trtags = table[0].findAll('tr')
tdlist = []
trtddict = {}
"""
for trtag in trtags:
print 'print trtag- ' , trtag.text
tdtags = trtag.findAll('td')
for tdtag in tdtags:
print tdtag.text
"""
divtags = soup.findAll("span",{"id":"ctl00_ContentPlaceHolder1_ErrorLabel"})
for divtag in divtags:
for divtag in divtags:
print "div tag - " , divtag.text
if divtag.text == "Currently no bus is running on this route" or "This is not a cluster (orange bus) route":
print "Page not displayed Errored with below meeeage for Route-", Route," , " , divtag.text
sys.exit()
trtags = table[0].findAll('tr')
for trtag in trtags:
tdtags = trtag.findAll('td')
if len(tdtags) == 2:
trtddict[tdtags[0].text] = sub_colon(tdtags[1].text)
return trtddict
def sub_colon(tag_str):
return re.sub(';',',',tag_str)
def f_connection():
try:
conn=pymongo.MongoClient()
print "Connected successfully!!!"
except pymongo.errors.ConnectionFailure, e:
print "Could not connect to MongoDB: %s" % e
return conn
def insert_records(conn,stop_dict):
db = conn.test
print db.collection_names()
mycoll = db.stopsETA
mycoll.insert(stop_dict)
if __name__ == "__main__":
req_and_parsing()