How can i achieve a vlookup excel like functionality in Python - python

In Billing Roster - SOW.xlsx I have new column data one is named as SOW and other is named SOW Description (Match value for SOW).
And now when i open ACFC_Resource_Allocation.xlsx excel and for an example if select a value in D2 (SOW) cell from the dropdown i should get a matching value into E2 cell after the selection from dropdown.
I only have an idea than a vlookup from Excel like below should solve my case. Not sure how to achieve in python.
=VLOOKUP(D2,'[Billing Roster - SOW.xlsx]SOW List'!$A$1:$B$14,1,FALSE)
Tried below code
from openpyxl import *
from openpyxl.styles import *
import webbrowser
import pandas
from openpyxl.worksheet.datavalidation import DataValidation
# Read all Excels into pandas dataframes
sowexcel = pandas.read_excel('Billing Roster - SOW.xlsx')
#Load the existing Resource Allocation Excel
wb = load_workbook('ACFC_Resource_Allocation.xlsx')
allocationsheet = wb.active
def load():
maxrow = allocationsheet.max_row
sow_list = sowexcel['SOW #'].tolist()
column_sow = ','.join(sow_list)
validator_sow = DataValidation(type='list', formula1='"{}"'.format(column_sow), allow_blank=True)
allocationsheet.add_data_validation(validator_sow)
validator_sow.add('D2:D%s' %maxrow)
# save the file
wb.save('ACFC_Resource_Allocation.xlsx')
wb.close()
# Driver code
if __name__ == "__main__":
load()
file_open = webbrowser.open('ACFC_Resource_Allocation.xlsx')

Related

How to copy cells in Excel like hyperlinks from one Book to another using Python?

I need to copy cells like hyperlinks from one Excel file to another. I can't find anything relating to this problem. I can copy cells values but it's not what I need.
I tried to modify some examples of coping cells from one book to another, but it wasn't success
To copy the value in each cell from a source workbook (in this example 'foo1.xlsx') to a new workbook (destination workbook) and have the destination cells link back to the source cells
from openpyxl import load_workbook, Workbook
from openpyxl.worksheet.hyperlink import Hyperlink
source_path = "foo1.xlsx"
source_sheet = 'Sheet1'
source_wb = load_workbook(source_path)
source_ws = source_wb[source_sheet]
### Create a new workbook and worksheet to copy data to and rename the
### sheet to 'Sheet1'
destination_wb = Workbook()
destination_ws = destination_wb.active
destination_ws.title = 'Sheet1'
### Loop thru the rows and cells in the source sheet
for row in source_ws.iter_rows():
for source_cell in row:
cell_coord = source_cell.coordinate
# Skipping empty cells.
# Otherwise these cells in the destination workbook will be
# filled with the source filename.
if source_cell.value is None:
continue
### Create hyperlink to source cell
hyperlink = Hyperlink(target=source_path,
ref=cell_coord,
location = f'{source_sheet}!{cell_coord}')
### Copy source cell value to the destination sheet
destination_ws.cell(source_cell.row, source_cell.column).value = source_cell.value
### Update destination cell with hyperlink to source cell
destination_ws.cell(source_cell.row, source_cell.column).hyperlink = hyperlink
### Save new workbook specifying file name
destination_wb.save('foo2.xlsx')
###################################################
Change code to put full path to cell...
Instead of adding origin cell and hyperlink to it, set the cell value to the link path. Change the 8 lines from and including
### Create hyperlink to source cell
to
### Set full path to the original cell
destination_ws.cell(source_cell.row, source_cell.column).value = \
f'{source_path}#{source_sheet}!{cell_coord}'
moken's solution is more convenient and reliable.
This is the code to store hyperlink to a cell from a different file:
import os
from openpyxl import load_workbook, Workbook
def main():
input_workbook_path = r"c:\excel_books\input book.xlsx"
output_workbook_path = r"c:\excel_books\output book.xlsx"
input_wb = load_workbook(input_workbook_path)
output_wb = Workbook()
sheet_in = input_wb["Sheet1"]
sheet_out = output_wb["Sheet"]
cell_index = "B12"
anchor = "CLICK HERE"
# =HYPERLINK("[c:\excel_books\input book.xlsx]Sheet1!B12","CLICK HERE")
external_cell_link = f'=HYPERLINK("[{input_workbook_path}]{sheet_in.title}!{cell_index}", "{anchor}")'
sheet_out["A2"].value = external_cell_link
output_wb.save(output_workbook_path)
if __name__ == '__main__':
main()
This code is for getting the value from a cell from a different file
import os
from openpyxl import load_workbook, utils, Workbook
def construct_link(workbook_absolute_path, sheet_name, cell_index):
"""
The function onstructs full path to the cell in the external
book, e.g. - ='c:\excel_books\[input book.xlsx]Sheet1'!C1
"""
# Adding square brackets arround filename in the path.
# Before - c:\excel_books\input book.xlsx
# After - c:\excel_books\[input book.xlsx]
filename = os.path.basename(workbook_absolute_path)
dirname = os.path.dirname(workbook_absolute_path)
full_path = os.path.join(dirname, f"[{filename}]")
return f"={utils.quote_sheetname(full_path + sheet_name)}!{cell_index}"
def main():
input_workbook_path = r"c:\excel_books\input book.xlsx"
output_workbook_path = r"c:\excel_books\output book.xlsx"
input_wb = load_workbook(input_workbook_path)
output_wb = Workbook()
sheet_in = input_wb["Sheet1"]
sheet_out = output_wb["Sheet"]
external_cell_link = construct_link(
input_workbook_path,
sheet_in.title,
"C1")
sheet_out["A2"].value = external_cell_link
output_wb.save(output_workbook_path)
if __name__ == '__main__':
main()
This link might be helpful - Control when external references (links) are updated

Saving a pivot table as picture?

The following code selects the range of a pivot table and saves down as picture. How would it go to select the pivot table by its name (e.g., "Pivot1") and not by the range?
import win32com.client as win32
import sys
from pathlib import Path
import win32com.client as win32
from PIL import ImageGrab
excel_path = "C:/Prueba/GOOG-copia.xlsm"
excel = win32.DispatchEx('Excel.Application')
excel.Visible = False
excel.DisplayAlerts = False
wb = excel.Workbooks.Open(Filename=excel_path)
ws = wb.Worksheets('Cacaca')
win32c = win32.constants
ws.Range("A3:B8").CopyPicture(Format=win32c.xlBitmap)
img = ImageGrab.grabclipboard()
image_path = 'C:/Prueba/test.png'
img.save(image_path)
excel.Quit()
You can select named ranges by using the range method:
wb.Worksheets('Cacaca').Range("Pivot1").Select()
When using the Win32 library, you can often try the process with VBA first as the Win32 calls tend to map to VBA.
Update - Here is the code to list all pivot tables in a Workbook:
# create dictionary of Pivot tables by sheet
dd = {}
ShtCnt = excel.Sheets.Count
for s in range(ShtCnt):
#print("Sheet Name:", wb.Sheets(s+1).Name)
dd[wb.Sheets(s+1).Name] = []
cnt = wb.Sheets(s+1).PivotTables().Count
for x in range(cnt):
#print(wb.Sheets(s+1).PivotTables(x+1).Name)
dd[wb.Sheets(s+1).Name].append(wb.Sheets(s+1).PivotTables(x+1).Name)
print(dd)

Pandas and Tkinter , opening and saving files

I am trying to save the excel file as a .xlsx file then I want to import that new file back into python using pandas and numpy to allow for statistical analysis.
I want the USER to have the option of where to save the file with the 'asksaveasfile' and another dialog box 'askopenwhen opening the file for pandas and numpy.
Also, if someone can show how to convert specific columns from bytes to str. See below...
TypeError: write() argument must be str, not bytes.
Here's the end of the code:
import csv
import pandas as pd
import os
import tkinter as tk
from tkinter import filedialog
fn = filedialog.asksaveasfile(mode='w', defaultextension=".xlsx")
result = pdf_to_csv(fn)
lines = result.split('\n')
import openpyxl as pxl
wb = pxl.Workbook(fn)
ws = wb.active
for line in lines:
ws.append(line.split(';'))
# appending a list gives a complete row in xlsx
print("Successfully Saved! ")
root = tk.Tk()
root.withdraw()
dataFile=pd.read_excel(fn)#,usecols=['Last Name','First Name','Assignment Title','Department Code','Calendar Year', 'Compensation'])
dataFile.to_excel(fn)
print(fn)
df = fn
DataFrame = df
df1 = DataFrame
df1.columns = ['Last Name','First Name','Assignment Title','Department Code','Calendar Year', 'Compensation']
df1.drop(df1.index[0], inplace=True)
print(df1.head(11))

Importing Multiple HTML Files Into Excel as Separate Worksheets

I have a number of HTML files that I need to open up or import into a single Excel Workbook and simply save the Workbook. Each HTML file should be on its own Worksheet inside the Workbook.
My existing code does not work and it crashes on the workbook.Open(html) line and probably will on following lines. I can't find anything searching the web specific to this topic.
import win32com.client as win32
import pathlib as path
def save_html_files_to_worksheets(read_directory):
read_path = path.Path(read_directory)
save_path = read_path.joinpath('Single_Workbook_Containing_HTML_Files.xlsx')
excel_app = win32.gencache.EnsureDispatch('Excel.Application')
workbook = excel_app.Workbooks.Add() # create a new excel workbook
indx = 1 # used to add new worksheets dependent on number of html files
for html in read_path.glob('*.html'): # loop through directory getting html files
workbook.Open(html) # open the html in the newly created workbook - this doesn't work though
worksheet = workbook.Worksheets(indx) # each iteration in loop add new worksheet
worksheet.Name = 'Test' + str(indx) # name added worksheets
indx += 1
workbook.SaveAs(str(save_path), 51) # win32com requires string like path, 51 is xlsx extension
excel_app.Application.Quit()
save_html_files_to_worksheets(r'C:\Users\<UserName>\Desktop\HTML_FOLDER')
The following code does half of want I want, if this helps. It will convert each HTML file into a separate Excel file. I need each HTML file in one Excel file with multiple WorkSheets.
import win32com.client as win32
import pathlib as path
def save_as_xlsx(read_directory):
read_path = path.Path(read_directory)
excel_app = win32.gencache.EnsureDispatch('Excel.Application')
for html in read_path.glob('*.html'):
save_path = read_path.joinpath(html.stem + '.xlsx')
wb = excel_app.Workbooks.Open(html)
wb.SaveAs(str(save_path), 51)
excel_app.Application.Quit()
save_as_xlsx(r'C:\Users\<UserName>\Desktop\HTML_FOLDER')
Here is a link to a sample HTML file you can use, the data in the file is not real: HTML Download Link
One solution would be to open the HTML file into a temporary workbook, and copy the sheet from there into the workbook containing all of them:
workbook = excel_app.Application.Workbooks.Add()
sheet = workbook.Sheets(1)
for path in read_path.glob('*.html'):
workbook_tmp = excel_app.Application.Workbooks.Open(path)
workbook_tmp.Sheets(1).Copy(Before=sheet)
workbook_tmp.Close()
# Remove the redundant 'Sheet1'
excel_app.Application.ShowAlerts = False
sheet.Delete()
excel_app.Application.ShowAlerts = True
I believe pandas will make your job much easier.
pip install pandas
Here's an example on how to get multiple tables from a wikipedia html and input it into a Pandas DataFrame and save it to disk.
import pandas as pd
url = "https://en.wikipedia.org/wiki/List_of_American_films_of_2017"
wikitables = pd.read_html(url, header=0, attrs={"class":"wikitable"})
for idx,df in enumerate(wikitables):
df.to_csv('{}.csv'.format(idx),index=False)
For your use case, something like this should work:
import pathlib as path
import pandas as pd
def save_as_xlsx(read_directory):
read_path = path.Path(read_directory)
for html in read_path.glob('*.html'):
save_path = read_path.joinpath(html.stem + '.xlsx')
dfs_from_html = pd.read_html(html, header=0,)
for idx, df in enumerate(dfs_from_html):
df.to_excel('{}.xlsx'.format(idx),index=False)
** Make sure to set the correct html attribute in the pd.read_html function.
How about this?
Sub From_XML_To_XL()
'UpdatebyKutoolsforExcel20151214
Dim xWb As Workbook
Dim xSWb As Workbook
Dim xStrPath As String
Dim xFileDialog As FileDialog
Dim xFile As String
Dim xCount As Long
On Error GoTo ErrHandler
Set xFileDialog = Application.FileDialog(msoFileDialogFolderPicker)
xFileDialog.AllowMultiSelect = False
xFileDialog.Title = "Select a folder [Kutools for Excel]"
If xFileDialog.Show = -1 Then
xStrPath = xFileDialog.SelectedItems(1)
End If
If xStrPath = "" Then Exit Sub
Application.ScreenUpdating = False
Set xSWb = ThisWorkbook
xCount = 1
xFile = Dir(xStrPath & "\*.xml")
Do While xFile <> ""
Set xWb = Workbooks.OpenXML(xStrPath & "\" & xFile)
xWb.Sheets(1).UsedRange.Copy xSWb.Sheets(1).Cells(xCount, 1)
xWb.Close False
xCount = xSWb.Sheets(1).UsedRange.Rows.Count + 2
xFile = Dir()
Loop
Application.ScreenUpdating = True
xSWb.Save
Exit Sub
ErrHandler:
MsgBox "no files xml", , "Kutools for Excel"
End Sub

Why am i only picking the first cell out of an excel sheet | Python 3

The point of the program is to feed a list of urls and get emails from the whois data
import whois
import xlrd
import tkinter
from tkinter import filedialog
from tkinter import *
import pandas as pd
#excel sheet pull
root = Tk()
root.filename = filedialog.askopenfilename(initialdir = "/",title = "Select file",filetypes = (("xls files","*.xls"),("all files","*.*")))
workbookName = root.filename
workbook = xlrd.open_workbook(workbookName)
sheet = workbook.sheet_by_index(0)
'''
for site in sheet.cell:
whoisdataset = whois.whois(site)
for key in whoisdataset:
if "emails" in key:
enter code here`if "abuse" not in whoisdataset[key]:
print (key, whoisdataset[key])
'''
#sheet as matrix
df = pd.read_excel(workbookName)
df.as_matrix()
#Whois email pull
#if you replace "df" with a matrix, it will iterate and work
for website in df:
whoisdata = whois.whois(website)
for line in whoisdata:
if "emails" in line:
if "abuse" not in whoisdata[line]:
print (website, ":", line, whoisdata[line])
the #excel sheet pull works, and #whois email pull works if i feed it a 1 dimensional matrix, but #sheet as matrix is only giving me the first cell to work with, does anyone know how to make it give me the whole first column as a matrix?

Categories

Resources