The following code requests the user to select an excel file they'd like to import as a pandas data frame; however, it doesn't provide the ability to select which sheet (if multiple exist):
import pandas as pd
import tkinter as tk
from tkinter import filedialog
root = tk.Tk()
root.withdraw()
path = filedialog.askopenfilename()
x = pd.read_excel(path, sheet_name = 1)
x
Conditions to include in new solution:
If only one sheet exists, automatically select and upload to pandas data frame
If multiple sheets exists, allow user to choose through a dialog box which sheet they'd like to import
The solution offered by #GordonAitchJay, and implementation with Tkinter, is excellent. Definitely the way to go, if you're running the script directly with Python or in an IDE like Spyder.
However, the OP is working in Jupyter and it turns out that Jupyter and Tkinter do not get along very well. The OP expressed difficulties, and while I do get it to work at first, if I push the code for performance, I'm also noticing serious lags and hiccups. This being the case, I thought I would just add a way to make the interaction work smoothly in Jupyter by using the ipywidgets framework.
# Jupyter notebook
import pandas as pd
import ipywidgets as widgets
from IPython.display import clear_output
from ipyfilechooser import FileChooser
from ipywidgets import interact
from pathlib import Path
# get home dir of user
home = str(Path.home())
# initialize a dict for the excel file; this removes the need to set global values
dict_file = {}
# change to simply `home` if you want users to navigate through diff dirs
fc = FileChooser(f'{home}/excel')
# same here
fc.sandbox_path = f'{home}/excel'
# limit file extensions to '.xls, .xlsb, .xlsm, .xlsx'
fc.filter_pattern = ['*.xls*']
fc.title = '<b>Select Excel file</b>'
display(fc)
# create empty dropdown for sheet names
dropdown = widgets.Dropdown(options=[''], value='', description='Sheets:', disabled=False)
# create output frame for the df
out = widgets.Output(layout=widgets.Layout(display='flex', flex_flow='column', align_items='flex-start', width='100%'))
# callback func for FileChooser
def get_sheets(chooser):
# (re)populate dict
dict_file.clear()
dict_file['file'] = pd.ExcelFile(fc.value)
sheet_names = dict_file['file'].sheet_names
# only 1 sheet, we'll print this one immediate (further below)
if len(sheet_names) == 1:
# set value of the dropdown to this sheet
dropdown.options = sheet_names
dropdown.value = sheet_names[0]
# disable the dropdown; so it's just showing the selection to the user
dropdown.disabled = True
else:
# append empty string and set this as default; this way the user must always make a deliberate choice
sheet_names.append('')
dropdown.options = sheet_names
dropdown.value = sheet_names[-1]
# allow selection by user
dropdown.disabled = False
return
# bind FileChooser to callback
fc.register_callback(get_sheets)
# prompt on selection sheet
def show_df(sheet):
if sheet == '':
if out != None:
# clear previous df, when user selects a new wb
out.clear_output()
else:
# clear previous output 'out' frame before displaying new df, else they'll get stacked
out.clear_output()
with out:
df = dict_file['file'].parse(sheet_name=sheet)
if len(df) == 0:
# if sheet is empty, let the user know
display('empty sheet')
else:
display(df)
return
# func show_df is called with input of widget as param on selection sheet
interact(show_df, sheet=dropdown)
# display 'out' (with df)
display(out)
Snippet of interaction in notebook:
If that's all you need tkinter for, this will do.
It shows a simple combobox with the sheetnames. In this case, the first sheetname is named Orders.
As soon as you select an item, the window closes and it parses that sheet.
import pandas as pd
import tkinter as tk
from tkinter import ttk, filedialog
root = tk.Tk()
root.withdraw()
# path = filedialog.askopenfilename()
# limit user input to Excel file (or path == '' in case of "Cancel")
path = filedialog.askopenfilename(filetypes = [('Excel files', '*.xls*')])
# if user didn't cancel, continue
if path != '':
# Get the sheetnames first without parsing all the sheets
excel_file = pd.ExcelFile(path)
sheet_names = excel_file.sheet_names
sheet_name = None
if len(sheet_names) == 1:
sheet_name = sheet_names[0]
elif len(sheet_names) > 1:
# Show the window again
root.deiconify()
root.minsize(280, 30)
root.title('Select sheet to open')
# Create a combobox with the sheetnames as options to select
combotext = tk.StringVar(value=sheet_names[0])
box = ttk.Combobox(root,
textvariable=combotext,
values=sheet_names,
state='readonly')
box.pack()
# This function gets called when you select an item in the combobox
def callback_function(event):
# Mark sheet_name as global so it doesn't just make a new local variable
global sheet_name
sheet_name = combotext.get()
# Close tkinter so Python can continue execution after root.mainloop()
root.destroy()
root.bind('<<ComboboxSelected>>', callback_function)
root.mainloop()
# Finally, parse the selected sheet
# This is equivalent to pd.read_excel
df = excel_file.parse(sheet_name=sheet_name)
Related
I have created a Dash app to read data from a .csv file and represent it, where the user has the option to choose which variable he wants to represent.
The problem I'm facing is that the Dash app keeps freezing or is very slow, most likely due to the amount of sheer data I'm reading (the .csv files I need to read have above 2 million lines).
Is there any way I can make it faster? Maybe optimizing my code in some way?
Any help is appreciated, thanks in advance.
import pandas as pd
import numpy as np
from matplotlib import lines
import plotly.express as px
from dash import Dash, html, dcc,Input, Output
from tkinter import Tk
from tkinter.filedialog import askopenfilename
import webbrowser
print("Checkpoint 1")
def open_file():
global df, drop_list
Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file
print(filename)
newfilename = filename.replace('/', '\\\\')
print(newfilename)
df = pd.read_csv ('' + newfilename, sep=";", skiprows=4, skipfooter=2, engine='python') # Read csv file using pandas
# Detect all the different signals in the csv
signals = df["Prozesstext"].unique()
signals = pd.DataFrame(signals) # dataframe creation
signals.sort_values(by=0) # after the dataframe is created it can be sorted
drop_list = [] # list used for the dropdown menu
for each in signals[0]:
drop_list.append(each)
app = Dash(__name__)
fig = px.line([]) #figure starts with an empty chart
open_file()
print("Checkpoint 2")
app.layout = html.Div([
html.H1(id = 'H1', children = 'Reading Data from CSV', style = {'textAlign':'center','marginTop':40,'marginBottom':40}),
dcc.Dropdown(drop_list[:-1],id='selection_box'),
html.Div(id='dd-output-container'),
dcc.Graph(
id='trend1',
figure=fig
)
])
webbrowser.open("http://127.0.0.1:8050", new=2, autoraise=True)
# FIRST CALLBACK
#app.callback(
Output(component_id='trend1',component_property='figure'),
Input('selection_box', 'value'),
prevent_initial_call = True
)
def update_trend1(value):
df2 = df[df['Prozesstext'].isin([value])] #without empty spaces it can be just df.column_name
return px.line(df2, x="Zeitstempel", y="Daten", title=value, markers = True) # line chart
if __name__ == '__main__':
app.run_server()
#app.run_server(debug=True)
I suggest the following:
Build a Multi-value dropdown menu with the names of all columns in the CSV file. Look at this here.
Based on the selected columns by a user, the corresponding data will be imported from the CSV file.
It is not clear in your question how you represented the csv data on Dash. I recommend dbc.Table.
By doing this, you will minimize the cost of reading the entire data from the CSV file.
I try to find a solution for the following issue.
I would like to upload an excel sheet, consisting of multiple sheets (use case here 2). Afterwards I added tabs via Streamlit and used the aggrid component to be able to change some cells. However if I change cells in Sheet 1 and jump to tab 2 and back, changes are gone. This is not the desired output, meaning that any changes done in the cell should remain.
I tried via st.cache and st.experimental_memo however without success.
My code is below
import numpy as np
import streamlit as st
import pandas as pd
from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode, JsCode,GridOptionsBuilder
excelfile=st.sidebar.file_uploader("Select Excel-File for cleansing",key="Raw_Data")
if excelfile==None:
st.balloons()
tab1, tab2 = st.tabs(["Sheet 1", "Sheet 2"])
#st.cache()
def load_sheet1():
sheet1=pd.read_excel(excelfile,sheet_name="Sheet1")
return sheet1
#st.cache()
def load_sheet2():
sheet1=pd.read_excel(excelfile,sheet_name="Sheet2")
return sheet1
df=load_sheet1()
with tab1:
gd = GridOptionsBuilder.from_dataframe(df)
gd.configure_pagination(enabled=True)
gd.configure_default_column(editable=True, groupable=True)
gd.configure_selection(selection_mode="multiple", use_checkbox=True)
gridoptions = gd.build()
grid_table = AgGrid(
df,
gridOptions=gridoptions,
update_mode=GridUpdateMode.SELECTION_CHANGED,
theme="material",
)
df1=load_sheet2()
with tab2:
gd = GridOptionsBuilder.from_dataframe(df1)
gd.configure_pagination(enabled=True)
gd.configure_default_column(editable=True, groupable=True)
gd.configure_selection(selection_mode="multiple", use_checkbox=True)
gridoptions = gd.build()
grid_table = AgGrid(
df1,
gridOptions=gridoptions,
update_mode=GridUpdateMode.SELECTION_CHANGED,
theme="material",
)
I also can share with you my test excel file:
Sheet 1
Col1
Col2
A
C
B
D
Sheet 2
Col3
Col4
E
G
F
H
Any kind of support how to eliminate this issue would be more than awesome
EDIT: Here is a solution without the load button.
I couldn't find a way to do it without adding a button to reload the page to apply changes. Since streamlit reruns the whole code every time you interact with it is a bit tricky to rendre elements the right way. Here is your code refactored. Hope this helps !
import streamlit as st
import pandas as pd
from st_aggrid import AgGrid, GridUpdateMode, GridOptionsBuilder
# Use session_state to keep stack of changes
if 'df' not in st.session_state:
st.session_state.df = pd.DataFrame()
if 'df1' not in st.session_state:
st.session_state.df1 = pd.DataFrame()
if 'excelfile' not in st.session_state:
st.session_state.excelfile = None
#st.cache()
def load_sheet1():
sheet1 = pd.read_excel(excelfile, sheet_name="Sheet1")
return sheet1
#st.cache()
def load_sheet2():
sheet1 = pd.read_excel(excelfile, sheet_name="Sheet2")
return sheet1
def show_table(data):
if not data.empty:
gd = GridOptionsBuilder.from_dataframe(data)
gd.configure_pagination(enabled=True)
gd.configure_default_column(editable=True, groupable=True)
gd.configure_selection(selection_mode="multiple", use_checkbox=True)
gridoptions = gd.build()
grid_table = AgGrid(
data,
gridOptions=gridoptions,
# Use MODEL_CHANGED instead of SELECTION_CHANGED
update_mode=GridUpdateMode.MODEL_CHANGED,
theme="material"
)
# Get the edited table when you make changes and return it
edited_df = grid_table['data']
return edited_df
else:
return pd.DataFrame()
excelfile = st.sidebar.file_uploader("Select Excel-File for cleansing", key="Raw_Data")
if st.session_state.excelfile != excelfile:
st.session_state.excelfile = excelfile
try:
st.session_state.df = load_sheet1()
st.session_state.df1 = load_sheet2()
except:
st.session_state.df = pd.DataFrame()
st.session_state.df1 = pd.DataFrame()
tab1, tab2 = st.tabs(["Sheet 1", "Sheet 2"])
with tab1:
# Get the edited DataFrame from the ag grid object
df = show_table(st.session_state.df)
with tab2:
# Same thing here...
df1 = show_table(st.session_state.df1)
# Then you need to click on a button to make the apply changes and
# reload the page before you go to the next tab
if st.button('Apply changes'):
# Store new edited DataFrames in session state
st.session_state.df = df
st.session_state.df1 = df1
# Rerun the page so that changes apply and new DataFrames are rendered
st.experimental_rerun()
After loading your file and making your changes in the first tab hit the "apply changes" button to reload the page before moving to the second tab.
I'm trying to write a code a that gets user input via pop up and use that in different program.
below is the code which gets user input.
Excel_connection.py
import openpyxl
import tkinter as tk
class App(tk.Frame):
def __init__(self,master=None,**kw):
#Create a blank dictionary
self.answers = {}
tk.Frame.__init__(self,master=master,**kw)
tk.Label(self,text="Give Input Sheet Path").grid(row=0,column=0)
self.Input_From_User1 = tk.Entry(self)
self.Input_From_User1.grid(row=0,column=1)
tk.Button(self,text="Feed into Program",command =
self.collectAnswers).grid(row=2,column=1)
def collectAnswers(self):
self.answers['Input_Path'] = self.Input_From_User1.get()
global Input_Path
Input_Path = self.answers['Input_Path']
functionThatUsesAnswers(self.answers)
quit()
def quit():
root.destroy()
if __name__ == '__main__':
root = tk.Tk()
App(root).grid()
root.mainloop()
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the
below path
ws = wb["Sheet1"]
Below is the code where i'm importing the above program which does some operation
Execution.py
import pandas
from Excel_Connection import *
from Snowflake_Connection import *
all_rows = list(ws.rows)
cur = ctx.cursor()
# Pull information from specific cells.
for row in all_rows[1:400]:
scenario = row[1].value
query = row[2].value
if_execute = row[3].value
if if_execute == 'Y':
try:
cur.execute(query)
df = cur.fetch_pandas_all()
except:
print(scenario," Failed")
else:
print("CREATED",scenario,".csv successfully")
print("ALL INDIVDUAL REPORT GENERATED")
When I'm executing Execution.py, the program does not produce pop up window and instead the code throws below error,
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the below path
NameError: name 'Input_Path' is not defined
I tried to -
executing Excel_connection.py separately and it just worked fine.
place the code directly instead of importing the program in the Execution.py and again it worked fine as expected.
The only time I'm facing issue is when I try to import the Excel_connection.py into Excel_connection.py
Could somebody kindly help me out here.
When you import Excel_connection.py, it'll run the code in it.
So as you run Execution.py:
import pandas -> "run" the pandas stuff to define functions, etc...
from Excel_connection import * -> You import everything from Excel_connection. So the interpreter will open this file, parse, and run:
2.1 Class App is defined
2.2 runs: wb = openpyxl.load_workbook(Input_Path), which is a nonsense. Since as I see Input_Path is defined in App.collectAnswers(), which was never executed before. So there is no Input_Path to use... And you program terminates here, and tells you that.
If you run your Excel_connection.py directly, it'll work, because the if __name__ == '__main__' is True in this case and that section runs too. But if you import the file, it is false so you skip that part of the code.
You should move this to the Execution.py file before the all_rows = list(ws.rows) line
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the
below path
ws = wb["Sheet1"]
And it'll still break since we have no Input_Path, so you must define it somehow, but it is up to you how. You can create an App like how you do it in the Excel_connection.py.
But I shouldn't do that since it is a little ugly. I would do something like:
Excel_Connection.py
import openpyxl
import tkinter as tk
class App(tk.Frame):
def __init__(self,master=None,**kw):
#Create a blank dictionary
self.answers = {}
tk.Frame.__init__(self,master=master,**kw)
tk.Label(self,text="Give Input Sheet Path").grid(row=0,column=0)
self.Input_From_User1 = tk.Entry(self)
self.Input_From_User1.grid(row=0,column=1)
tk.Button(self,text="Feed into Program",command =
self.collectAnswers).grid(row=2,column=1)
def collectAnswers(self):
self.answers['Input_Path'] = self.Input_From_User1.get()
global Input_Path
Input_Path = self.answers['Input_Path']
functionThatUsesAnswers(self.answers)
self.quit()
# def quit():
# root.destroy()
def main():
root = tk.Tk()
App(root).grid()
root.mainloop()
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the below path
return wb["Sheet1"]
if __name__ == '__main__':
main()
and then
Execution.py
import pandas
import Excel_Connection
from Snowflake_Connection import *
# Now we call the main() function from Excel_Connection, which will return the worksheet for us.
ws = Excel_Connection.main()
all_rows = list(ws.rows)
cur = ctx.cursor()
# Pull information from specific cells.
for row in all_rows[1:400]:
scenario = row[1].value
query = row[2].value
if_execute = row[3].value
if if_execute == 'Y':
try:
cur.execute(query)
df = cur.fetch_pandas_all()
except:
print(scenario," Failed")
else:
print("CREATED",scenario,".csv successfully")
print("ALL INDIVDUAL REPORT GENERATED")
you can simply use
csv files
or some file format like that to store and transfer data between files.
I created a python script (NoShowCalc.py) that automates data cleaning and analysis with 3 selected excel files (booked_file_path, arrived_file_path, and vlookup_file_path). However, I want this all to be executed through a GUI, so I started a separate (GUI.py) script to create an interface with browse buttons that will get those file path names, then that will get me what I need to execute the NoShowCalc.py script. Once those excel files are selected, another button is there to execute the NoShowCalc.py script. However, I did it and it worked! But I have no idea what I changed and now the two different py files are not connecting.
Here's the script in the NoShowGUI.py script:
def open_file():
browse_text.set('Loading...')
booked_file_path = askopenfile(parent=root, mode='rb', title='Choose a file', filetype=[('CSV file', '*.csv')])
if booked_file_path:
read_csv = (booked_file_path)
browse_text.set('Loaded')
def run():
os.system('NoShow_Calc.py')
calculate_text.set("Calculating...")
#Calculate button
calculate_text = tk.StringVar()
calculate_btn = tk.Button(root, textvariable=calculate_text, command=lambda:run(), font='Calibri', fg='black', height=1, width=15)
calculate_text.set("Calculate No Show")
calculate_btn.grid(column=2, row=9)
These are the first lines in the NoShowCalc.py script:
import pandas as pd
booked = pd.read_csv(booked_file_path, parse_dates=['Appointment Date'])
arrived = pd.read_csv(arrived_file_path, parse_dates=['Appointment Date'])
vlookup = pd.read_excel(vlookup_file_path)
The error that keeps popping up is NameError: name 'booked_file_path' is not defined. I don't get how it ran before and now this error is popping up as it can't speak to the other py file anymore. What did I do wrong?
If you run script using os.system() or using module subprocess then you can't use variables from other script. They runs as separated processes and they can't share variable (or data in memory)
You can only send some text values as arguments
os.system('NoShow_Calc.py ' + booked_file_path)
and then you can get it inside NoShow_Calc using sys.argv
import pandas as pd
import sys
booked_file_path = sys.argv[1]
booked = pd.read_csv(booked_file_path, parse_dates=['Appointment Date'])
arrived = pd.read_csv(arrived_file_path, parse_dates=['Appointment Date'])
vlookup = pd.read_excel(vlookup_file_path)
If you need other variables then you have to send other values in the same way
os.system('NoShow_Calc.py ' + booked_file_path + ' ' + other_filename)
and
booked_file_path = sys.argv[1]
other_filename = sys.argv[2]
# etc.
But using os.system() you can't send result booked, arrived, vlookup from NoShow_Calc to NoShowGUI.
You could do it with subprocess but it can send it only as text - so NoShow_Calc would have to use print() to display all result and NoShowGUI would have to parse this text to expected structure - ie. list, dictionary, DataFrame
Better you should use import to load code from NoShow_Calc.py and then all code runs in the same process so all code has access to the same variables - and it doesn't need to convert to text and back from text.
To make it better I put code in function
import pandas as pd
def my_function(booked_file_path, arrived_file_path, vlookup_file_path):
booked = pd.read_csv(booked_file_path, parse_dates=['Appointment Date'])
arrived = pd.read_csv(arrived_file_path, parse_dates=['Appointment Date'])
vlookup = pd.read_excel(vlookup_file_path)
return booked, arrived, vlookup
and then in NoShowGUI you can import it and use like any other function
from NoShow_Calc import my_function
booked, arrived, vlookup = my_function(booked_file_path, arrived_file_path, vlookup_file_path)
EDIT:
I made minimal working code. I reduced it to only one filename.
NoShow_Calc.py
import pandas as pd
def calc(booked_file_path): #, arrived_file_path, vlookup_file_path):
booked = pd.read_csv(booked_file_path, parse_dates=['Appointment Date'])
#arrived = pd.read_csv(arrived_file_path, parse_dates=['Appointment Date'])
#vlookup = pd.read_excel(vlookup_file_path)
return booked #, arrived, vlookup
NoShowGUI.py
import tkinter as tk
from tkinter.filedialog import askopenfilename # instead of `askopenfile`
# adding directory with this script to `sys.path` before `import NoShow_Calc`
# to make sure that `import` will search `NoShow_Calc.py` in correct folder even when GUI will be run from different folder
import os
import sys
HOME_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(HOME_DIR)
import NoShow_Calc
print('HOME_DIR:', HOME_DIR)
def select_filename():
global booked_file_path # inform function that it has to assign value to external/global variable
text_log.insert('end', 'Selecting ...\n')
# use `askopenfilename` instead of `askopenfile`
# because I need only filename, not opened file (pandas will open it on its own)
booked_file_path = askopenfilename(parent=root,
title='Choose a file',
#initialdir='/home/furas',
filetypes=[('CSV file', '*.csv')])
if booked_file_path:
text_log.insert('end', f'Selected: {booked_file_path}\n')
else:
text_log.insert('end', f'Not selected\n')
def run():
text_log.insert('end', "Calculating...\n")
if booked_file_path is None:
text_log.insert('end', "File booked_file_path not selected !!!")
return
#elif arrived_file_path is None:
# text_log.insert('end', "File arrived_file_path not selected !!!")
# return
#elif vlookup_file_path is None:
# text_log.insert('end', "File vlookup_file_path not selected !!!")
# return
else:
root.update() # force tkinter to update text in text_log at once (not when it exits function `run`)
result = NoShow_Calc.calc(booked_file_path)# , arrived_file_path, vlookup_file_path)
text_log.insert('end', "Result:\n")
text_log.insert('end', str(result.head()) + "\n")
# --- main ---
booked_file_path = None # default value at start (so in `run` I can check `None` to see if I selecte filename)
#arrived_file_path = None
#vlookup_file_path = None
root = tk.Tk()
text_log = tk.Text(root)
text_log.grid(column=0, row=0)
select_btn = tk.Button(root, text="Select File Name", command=select_filename)
select_btn.grid(column=0, row=1)
calculate_btn = tk.Button(root, text="Calculate", command=run)
calculate_btn.grid(column=0, row=2)
root.mainloop()
So I want to run the Tk() and import there some values. Then these values will be checked from a function that will replacing the value in a particular cell in excel. This cell that matches with one imported entry. My challenge here is that the function works and replacing if I put manually the values inside the program BUT how can I make to read the values from the Tk() that I enter? I made a button so the function will be running after I have imported the values in the entry fields but still not. The "sntake" and "sngive" in the replace() function seems to not working... What am I missing?
Code:
from tkinter import *
import pandas as pd
from xlutils.copy import copy
from openpyxl import *
from openpyxl.utils.cell import get_column_letter
import openpyxl
app = Tk()
app.geometry("500x500")
app.title("S/N Management")
heading = Label(text="S/N Management",fg="black",bg="green",width="500",height="3",font="10")
heading.pack()
sngive_text = Label(text="S/N of the delivered ")
sntake_text = Label(text="S/N of the recieved ")
sngive_text.place(x=15,y=80)
sntake_text.place(x=15,y=160)
sngive = StringVar()
sntake = StringVar()
sngive_entry = Entry(textvariable=sngive,width="30")
sntake_entry = Entry(textvariable=sntake,width="30")
sngive_entry.place(x=15,y=100)
sntake_entry.place(x=15,y=180)
def replace():
wb = openpyxl.load_workbook('Tracker.xlsx')
wb.sheetnames
sheet = wb["serials"]
amountOfRows = sheet.max_row
amountOfColumns = sheet.max_column
for i in range(amountOfColumns):
for k in range(amountOfRows):
cell = str(sheet[get_column_letter(i+1)+str(k+1)].value)
if( str(cell) == sntake):
newCell = sngive
sheet[get_column_letter(i+1)+str(k+1)]=newCell
wb.save('tracker_updated.xlsx')
button = Button(app,text="Submit Data",command=replace,width="30",height="2",bg="grey")
button.place(x=140,y=420)
mainloop()