Python: Looping a certain amount of time in order different process - python

I made a script which is suppose to use Tkinter to allow to choose and load files and store their content in different objects and then process each of these documents.
I would like to make the script able to process only a certain amount of documents determined by a question (the value is stored under "File_number")
For exemple: if at the question "how many files do you want to compare?"
the user enter 3
I would like the tkinter openfile window to ask only for 3 files then keep going
I am using the If Else statement like below
but it doesn't seem to work well and the code is really not pythonic.
Is there a better/shorter way to perform the same?
Thanks
My script look like this
import pandas as pd
from pandas import *
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import pylab
import pandas.io.data
import os
import Tkinter
from Tkinter import *
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import sys
# Set up GUI
root = Tkinter.Tk(); root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number", "How many files do you want to compare?")
# Prompt for file explorer
# Also extract the file_name
process_a = 0
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc1 = tkFileDialog.askopenfilename(parent=root, title='Choose file 1')
fileloc1_name_clean = os.path.splitext(fileloc1)[0]
fileloc1_name = os.path.basename(fileloc1_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc2 = tkFileDialog.askopenfilename(parent=root, title='Choose file 2')
fileloc2_name_clean = os.path.splitext(fileloc2)[0]
fileloc2_name = os.path.basename(fileloc2_name_clean)
if process_a = File_number:
break
else:
process_a = process_a + 1
fileloc3 = tkFileDialog.askopenfilename(parent=root, title='Choose file 3')
fileloc3_name_clean = os.path.splitext(fileloc3)[0]
fileloc3_name = os.path.basename(fileloc3_name_clean)
EDIT 1
The next part of my script is:
dfa_1 = pd.read_csv(fileloc1, delimiter='\t')
dfa_nodupli = dfa_1.drop_duplicates(cols='N', take_last=False)
dfa_nodu_2pep = dfa_nodupli[(dfa_nodupli['Peptides(95%)'] > 1)]
dfa_nodu_2pep = dfa_nodu_2pep[~dfa_nodu_2pep['Name'].str.contains('Keratin')]
dfa_nodu_2pep.to_csv(fileloc1_name + ".csv")
dfb_1 = pd.read_csv(fileloc2, delimiter='\t')
dfb_nodupli = dfb_1.drop_duplicates(cols='N', take_last=False)
dfb_nodu_2pep = dfb_nodupli[(dfb_nodupli['Peptides(95%)'] > 1)]
dfb_nodu_2pep = dfb_nodu_2pep[~dfb_nodu_2pep['Name'].str.contains('Keratin')]
dfb_nodu_2pep.to_csv(fileloc2_name + ".csv")

I modified your code, so that it works, in a way you want it ( I hope).
import Tkinter
import tkFileDialog
import tkSimpleDialog
from tkFileDialog import askopenfilename
import os
# Set up GUI
def main():
root = Tkinter.Tk();
root.withdraw()
# Prompt for user info
File_number = tkSimpleDialog.askinteger("File number",
"How many files do you want to compare?")
if not File_number:
return
user_fiels = []
max_file_no = int(File_number)
current_file = 1;
while(current_file <= max_file_no):
fileloc = tkFileDialog.askopenfilename(parent=root, title='Choose file {}'.format(current_file))
if not fileloc:
continue
fileloc_name_clean = os.path.splitext(fileloc)[0]
fileloc_name = os.path.basename(fileloc_name_clean)
user_fiels.append([fileloc, fileloc_name_clean, fileloc_name])
current_file += 1
#print(fileloc_name_clean, fileloc_name)
print(user_fiels)
main()
I use while loop to get file paths as many times as you want.

Related

How can I use tqdm to add a progress bar in this function?

I have received from one colleague a Python script but (due to the large amount of data and the time processing) I would like to include a progress bar to check at each time its progress.
from Bio import SeqIO
from tqdm import tqdm
import csv
import pandas as pd
import re
import time
# Find code in "metadata_silva_simplified.txt" file
path_to_file = "metadata_silva_simplified.txt"
df = pd.read_csv("Name.csv")
counter = 0
Code=[]
Names=[]
Missing=[]
t = time.time()
for index in df.index:
#print("------------------------------------------------------------------------")
#print(str(counter) + "- " + df["0"][index])
name=str(df["0"][index])
with open(path_to_file,"r") as file:
for line in file:
coincident=0
ref=line[(line.find("|")+1):]
ref=ref[:(ref.find("|")-1)]
ref=ref.strip()
if name == ref:
#if ref.find(name) != -1:
coincident=1
position = line.find("|")-1
Code.append("kraken:taxid|" + line[:position])
Names.append(name)
#print("kraken:taxid|" + line[:position])
break
if coincident==0:
Missing.append(name)
counter += 1
if (counter%1000) == 0:
print(str(round(counter/5105.08))+"% completed")
Code = {'Code':Code,'Name':Names}
dfcodes = pd.DataFrame(Code)
dfcodes.to_csv("Codes_secondpart.csv", index=False)
missing = pd.DataFrame(Missing)
missing.to_csv("Missing_secondpart.csv", index=False)
elapsed = time.time() - t
print("Mean time per sample=" + str(elapsed/counter))
I thought incorporating the progress bar through the use of tqdm Python tool, but I don't know how to include in the previous function attached above to run it.
You already imported tqdm. Wrap your loop in a tqdm call and it should work:
for index in tqdm(df.index):

read and Record multiple audios using streamlit python

I am using streamlit
I want to prepare audio dataset, so for that I have a .csv file which contains n numbers of lines which should be spoken by a user and voice of that user should be saved in a folder
Structure of CSV FILE
Sr#
Word
1
Hello
2
World
this file should be loaded and one-by-one all the words should be shown and below that word user should record sound and then press save button to save that audio and then next word appear. I have written the code but it is not working properly
import re
from turtle import onclick
from zlib import DEFLATED
from requests import session
from streamlit_option_menu import option_menu
import streamlit as st
import pandas as pd
import os
import sounddevice as sd
import wavio
def record(duration=5, fs=48000):
sd.default.samplerate = fs
sd.default.channels = 1
myrecording = sd.rec(int(duration * fs))
sd.wait(duration)
return myrecording
def save_record(path_myrecording, myrecording, fs):
wavio.write(path_myrecording, myrecording, fs, sampwidth=2)
return None
def read_audio(file):
with open(file, "rb") as audio_file:
audio_bytes = audio_file.read()
return audio_bytes
st.header("Data Pipeline")
with st.sidebar:
choose = option_menu("Modules", ["1. Recording","2. Preprocess Data","3. Create
Dataset"],
icons=['headset', 'mic', 'clipboard'],
default_index=0)
my_list=['a','b']
if choose=="1. Recording":
st.header("Voice Recording")
st.text("Follow the following steps")
s_name=''
s_name= st.text_input("1. Enter Your Name")
if os.path.isdir("./Recordings/"+s_name):
pass
else:
os.mkdir("./Recordings/"+s_name)
file =st.file_uploader("2. Upload an csv file", type=
['csv'],accept_multiple_files=False,key='file')
# if file:
# df= pd.read_csv(file)
# for i in df['Urdu']:
# my_list.append(i)
show_next = st.button("next", disabled = False)
if "current_index" not in st.session_state:
st.session_state.current_index = 0
st.session_state.load_state=True
# Whenever someone clicks on the button
if show_next and st.session_state.load_state :
# Show next element in list
st.write(my_list[st.session_state.current_index])
Record=st.button('Record', key='rec')
if st.session_state.get('rec')!=True:
st.session_state['rec']=True
if st.session_state['rec']==True:
print('i am here')
record_state = st.text("Recording...")
duration = 1 # seconds
fs = 48000
myrecording = record(duration, fs)
path_myrecording = f"./temp/AMAD.wav"
save_record(path_myrecording, myrecording, fs)
st.audio(read_audio(path_myrecording))
path="./Recordings/"+s_name+"/1.wav"
os.remove(path_myrecording)
save_record(path, myrecording, fs)
st.success("Recording Saved")
# Update and store the index
st.session_state.current_index += 1
if len(my_list) == st.session_state.current_index :
st.session_state.load_state = False
st.session_state

Python code doesn't call class from another program when Import statement is used

I'm trying to write a code a that gets user input via pop up and use that in different program.
below is the code which gets user input.
Excel_connection.py
import openpyxl
import tkinter as tk
class App(tk.Frame):
def __init__(self,master=None,**kw):
#Create a blank dictionary
self.answers = {}
tk.Frame.__init__(self,master=master,**kw)
tk.Label(self,text="Give Input Sheet Path").grid(row=0,column=0)
self.Input_From_User1 = tk.Entry(self)
self.Input_From_User1.grid(row=0,column=1)
tk.Button(self,text="Feed into Program",command =
self.collectAnswers).grid(row=2,column=1)
def collectAnswers(self):
self.answers['Input_Path'] = self.Input_From_User1.get()
global Input_Path
Input_Path = self.answers['Input_Path']
functionThatUsesAnswers(self.answers)
quit()
def quit():
root.destroy()
if __name__ == '__main__':
root = tk.Tk()
App(root).grid()
root.mainloop()
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the
below path
ws = wb["Sheet1"]
Below is the code where i'm importing the above program which does some operation
Execution.py
import pandas
from Excel_Connection import *
from Snowflake_Connection import *
all_rows = list(ws.rows)
cur = ctx.cursor()
# Pull information from specific cells.
for row in all_rows[1:400]:
scenario = row[1].value
query = row[2].value
if_execute = row[3].value
if if_execute == 'Y':
try:
cur.execute(query)
df = cur.fetch_pandas_all()
except:
print(scenario," Failed")
else:
print("CREATED",scenario,".csv successfully")
print("ALL INDIVDUAL REPORT GENERATED")
When I'm executing Execution.py, the program does not produce pop up window and instead the code throws below error,
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the below path
NameError: name 'Input_Path' is not defined
I tried to -
executing Excel_connection.py separately and it just worked fine.
place the code directly instead of importing the program in the Execution.py and again it worked fine as expected.
The only time I'm facing issue is when I try to import the Excel_connection.py into Excel_connection.py
Could somebody kindly help me out here.
When you import Excel_connection.py, it'll run the code in it.
So as you run Execution.py:
import pandas -> "run" the pandas stuff to define functions, etc...
from Excel_connection import * -> You import everything from Excel_connection. So the interpreter will open this file, parse, and run:
2.1 Class App is defined
2.2 runs: wb = openpyxl.load_workbook(Input_Path), which is a nonsense. Since as I see Input_Path is defined in App.collectAnswers(), which was never executed before. So there is no Input_Path to use... And you program terminates here, and tells you that.
If you run your Excel_connection.py directly, it'll work, because the if __name__ == '__main__' is True in this case and that section runs too. But if you import the file, it is false so you skip that part of the code.
You should move this to the Execution.py file before the all_rows = list(ws.rows) line
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the
below path
ws = wb["Sheet1"]
And it'll still break since we have no Input_Path, so you must define it somehow, but it is up to you how. You can create an App like how you do it in the Excel_connection.py.
But I shouldn't do that since it is a little ugly. I would do something like:
Excel_Connection.py
import openpyxl
import tkinter as tk
class App(tk.Frame):
def __init__(self,master=None,**kw):
#Create a blank dictionary
self.answers = {}
tk.Frame.__init__(self,master=master,**kw)
tk.Label(self,text="Give Input Sheet Path").grid(row=0,column=0)
self.Input_From_User1 = tk.Entry(self)
self.Input_From_User1.grid(row=0,column=1)
tk.Button(self,text="Feed into Program",command =
self.collectAnswers).grid(row=2,column=1)
def collectAnswers(self):
self.answers['Input_Path'] = self.Input_From_User1.get()
global Input_Path
Input_Path = self.answers['Input_Path']
functionThatUsesAnswers(self.answers)
self.quit()
# def quit():
# root.destroy()
def main():
root = tk.Tk()
App(root).grid()
root.mainloop()
wb = openpyxl.load_workbook(Input_Path) # trying to open the open the input sheet from the below path
return wb["Sheet1"]
if __name__ == '__main__':
main()
and then
Execution.py
import pandas
import Excel_Connection
from Snowflake_Connection import *
# Now we call the main() function from Excel_Connection, which will return the worksheet for us.
ws = Excel_Connection.main()
all_rows = list(ws.rows)
cur = ctx.cursor()
# Pull information from specific cells.
for row in all_rows[1:400]:
scenario = row[1].value
query = row[2].value
if_execute = row[3].value
if if_execute == 'Y':
try:
cur.execute(query)
df = cur.fetch_pandas_all()
except:
print(scenario," Failed")
else:
print("CREATED",scenario,".csv successfully")
print("ALL INDIVDUAL REPORT GENERATED")
you can simply use
csv files
or some file format like that to store and transfer data between files.

Tkinter askopenfilename path

I am trying to build a Tkinter UI with a button where the user chooses a main file and based on information in that file, the script would get some information about some secondary files. Upon choosing the main file, I am trying to use the file path of the main file to get the file path of the secondary files. However, because I am calling a function "open_main_file()" twice, I get two dialog boxes where the user will have to choose the main file twice instead of once. How can I circumvent that?
My code looks like this:
import pandas as pd
from tkinter import *
from os.path import dirname
from tkinter import filedialog
root = Tk()
def open_main_file():
return filedialog.askopenfilename()
def parse_main_file():
return pd.read_parquet(open_main_file(), engine='pyarrow')
def get_some_ids():
return parse_main_file()['some_ids'].iloc[0]
def get_list_of_other_files():
other_files_path = dirname(dirname(dirname(open_main_file())))
list_of_other_files = []
for f in get_some_ids():
list_of_other_files.append(glob.glob(other_files_path + '/identifier=' + str(f) + '/*'))
return map(''.join, list_of_other_files)
myButton = Button(root, text='Open File...', command=get_list_of_other_files).pack()
root.mainloop()
try not to use a function in other functions. In this case, you are calling a function one time in another one, and one time in the main code. That is why you get file dialog 2 times. Use the returned item and give it to the next function. You can code like this:
import pandas as pd
from tkinter import *
from os.path import dirname
from tkinter import filedialog
root = Tk()
def open_main_file(): # This function is not necessary
return filedialog.askopenfilename()
def parse_main_file(main_file_path):
return pd.read_parquet(main_file_path, engine='pyarrow')
def get_some_ids(main_file_path):
return parse_main_file(main_file_path)['some_ids'].iloc[0]
def get_list_of_other_files():
main_file_path = filedialog.askopenfilename()
other_files_path = dirname(dirname(dirname(main_file_path)))
list_of_other_files = []
for f in get_some_ids(main_file_path):
list_of_other_files.append(glob.glob(other_files_path + '/identifier=' + str(f) + '/*'))
return map(''.join, list_of_constituent_files)
myButton = Button(root, text='Open File...', command=get_list_of_other_files).pack()
root.mainloop()

how to connect input and function module in main module and can we return list from a function?

I have three modules: GetInput, Main and Converter. In the GetInput file there are all the inputs values and excel data in the form of list. In the Converter file I am using those input values from Getinput file and in the main file I am connecting both these files here. I am doing this so that my code can look more organized.
GetInput.py:
import pandas as pd
import numpy as np
import time
def getInputs():
df = pd.read_excel('input.xlsx')
actual = df['actual'].values.tolist()
schedule = df['schedule'].values.tolist()
freq = df['frequency'].values.tolist()
ACP = df['acp'].values.tolist()
modelInput = {
'actual': actual, 'schedule': schedule, 'freq': freq, 'ACP': ACP,'df' : df
}
return modelInput
Converter.py
import pandas as pd
def fun(modelInput):
underdraw = []
overdraw = []
for i,j, in zip(schedule, actual):
dev = j - i
if dev < 0:
underdraw.append(dev)
else:
underdraw.append(0)
if dev > 0:
overdraw.append(dev)
else:
overdraw.append(0)
df['underdraw'] = pd.Series(underdraw)
df['overdraw'] = pd.Series(overdraw)
df.to_excel('mainfile.xlsx')
Main.py
import pandas as pd
import numpy as np
from convert import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
This whole program works when I run it in a single module but it throw errors when I try divide my code into separate modules. Basically it throw error in GetInput.py and in Converter.py (df is not defined) file. I know its a very basic thing but I don't know how to make it work. There is no desired output for this program, I am already getting an output when I run it in a single file. I just want to divide my code in this format as I mentioned above: GetIput File, Converter File and Main File.
Keep all the files in same directory or else mention the file paths at the top of main code using os module.
You have misspelled the following in the main code:
from convert import *
from GetInputs import *
It should be:
from Converter import *
from GetInput import *
I have tested this using the following:
MainModule.py
from Converter import *
from GetInputs import *
def fun1():
inpu = getInputs()
con = fun(inpu)
fun1()
Converter.py
import pandas as pd
def fun(modelInput):
print("HIE" + modelInput)
GetInputs.py
def getInputs():
return "modelInput"

Categories

Resources