Adding simple menu in python - python

All, I wrote a small python script to parse out data from a log file. I was able to parse out what I need. Now I am trying to create a menu so that user can choose which data they want to parse out rather than all of the log content. I am having a little struggle trying to figure out how to do it, could someone please help me start on making a menu. I am a newbie to Python.
This is what I have so far:
import re
with open('temp.log') as f:
lines = f.readlines()
data = []
for line in lines:
date = re.match(r'\d{2} \w+ \d{2}', line).group()
time = line.split()[3]
ids = line.split()[4]
try:
agent = re.search(r'agent:\s(.*?),', line).group()
except:
agent = 'agent:'
try:
errID = re.search(r'ErrIdText:\s(.*?),', line).group()
except:
errID = 'ErrIdText:'
try:
clear = re.search(r'clearedID:\s(.*?)\)', line).group()
except:
clear = 'clearedID:'
row = [date, time, ids, agent, errID, clear]
data.append(row)
for row in data:
print(row)
So I want to make a menu so user can choose if they only want to parse out the date and the agent name for example.

You can use click to implement your menu through the command line. It will parse the arguments and you will be able to filter out the operations. It is also easy to understand and implement for simple stuff. For example:
import re
import click
date_pattern = re.compile(r'\d{2} \w+ \d{2}')
agent_pattern = re.compile(r'agent:\s(.*?),')
err_pattern = re.compile(r'ErrIdText:\s(.*?),')
clear_pattern = re.compile(r'clearedID:\s(.*?)\)')
#click.command()
#click.option('--filter-agent', is_flag=True, default=False, help='Filter agent')
#click.option('--filter-err-id', is_flag=True, default=False, help='Filter Error ID')
#click.option('--filter-cleared-id', is_flag=True, default=False, help='Filter Cleared ID')
#click.argument('filename')
def get_valid_rows(filter_agent, filter_err_id, filter_cleared_id, filename):
with open(filename) as f:
lines = f.readlines()
data = []
for line in lines:
date = date_pattern.match(line).group()
time = line.split()[3]
ids = line.split()[4]
row = [date, time, ids]
if filter_agent:
try:
agent = agent_pattern.search(line).group()
except:
agent = 'agent:'
row.append(agent)
if filter_err_id:
try:
errID = err_pattern.search(line).group()
except:
errID = 'ErrIdText:'
row.append(errID)
if filter_cleared_id:
try:
clear = clear_pattern.search(line).group()
except:
clear = 'clearedID:'
row.append(clear)
data.append(row)
# Do everything else
if __name__ == "__main__":
get_valid_rows()
It'll even generate a well-formatted help message for you
Usage: parselog.py [OPTIONS] FILENAME
Options:
--filter-agent Filter agent
--filter-err-id Filter Error ID
--filter-cleared-id Filter Cleared ID
--help Show this message and exit.
You could edit it to your liking to achieve exactly what you want.

That's a very large question, but what you need is either a UI (like Tkinter or Pyqt) or a command line interface (which you could implement yourself, or build using a library like docopt).
However, the command-line option will be a lot simpler to implement.

Related

Modify a function from another function in Python

I hope everyone's having a good day!
So I have this code that loads a text file, reads all the data, assigns each line to a different variable. I want to be able to change (for example) the current_user.config(text=User1) in FileRead function to current_user.config(text=User2) whenever I call the function NextAccount so I can sort of print each set of user and pass on screen (or do something with them).
Edit: Should've mentioned I'm a beginner so I'm probably not doing this the best way. My program is basically supposed to read around 30 combinations of user/pass and I want to display the first one first and then use a button to navigate through (Next account, previous account). I wanted to assign each to a different variable just because I want to use pyautogui to copy paste these combinations to a field in another program
from tkinter import *
from tkinter import filedialog as fd
file_path = ''
datalist = []
def OpenFile():
global file_path
file_path = fd.askopenfilename()
FileRead()
def FileRead():
data = open(file_path)
datalist = data.readlines()
User1 = datalist[0]
Pass1 = datalist[1]
User2 = datalist[2]
Pass2 = datalist[3]
User3 = datalist[4]
Pass3 = datalist[5]
#.....so on
current_user.config(text=User1) #<<<THESE TWO VALUES WHEN function NextAccount is called
current_pass.config(text=Pass1) #<<<
data.close()
def NextAccount():
#I want THIS func to be able to change the FileRead function...
window = Tk()
window.geometry('600x600')
window.config(bg='black')
file_button = Button(window,text='Select File', command=OpenFile)
file_button.pack()
current_user = Label(window)
current_user.pack()
current_pass = Label(window)
current_pass.pack()
next_acc_button = Button(window,command= NextAcc)
window.mainloop()
One way of accomplishing what you're after might be for NextAccount to pop the first user/password from the list. This is easier IMO if your OpenFile function gives you a list of [(user1, pass1), ...] rather than [user1, pass1, ...].
I might structure it something like this:
datalist = []
def FileRead(file_path: str) -> list[tuple[str, str]]:
"""Reads file_path, returns list of (user, passwd) tuples."""
with open(file_path) as data:
datalist = data.readlines()
return [
(user, passwd)
for user, passwd in zip(datalist[::2], datalist[1::2])
]
def OpenFile() -> None:
"""Asks user for a filename, read user/password data, and
add all data from the file into datalist."""
file_path = fd.askopenfilename()
datalist.extend(FileRead(file_path))
def NextAccount() -> None:
"""Print the current user/password and pop it from datalist."""
print(datalist.pop(0))
I'm not sure to understand well what are you asking for.
First of all, if you read a config file, maybe you should have a look on configparser, your code will be more readable as it is a json like way to get config.
If I understand well, you want to go through all the users you get with your config file and change which one you call ?
If yes, put your users into a list and create an interator on that list.
user1 = {"username": "user1", "password": "1234"}
user2 = {"username": "user2", "password": "4567"}
users = [user1, user2]
itr_users = iter(users)
then, when you call your function, just call itr_users.next() to get the next item of the users list and do your stuff. You should be able to access users informations this way
def next_item():
curr_user = next(itr_users)
curr_user["username"]
# First call
# > user1
# Second call
# > user2
In this scenario, I would rather try to:
Give the FileRead function a parameter that indicates which User and Pass to use, like:
def FileRead(n):
data = open(file_path)
datalist = data.readlines()
user_pass_list = [(datalist[i], datalist[i+1]) for i in range( ... )]
#.....so on
current_user.config(text=user_pass_list[n][0]) #<<<THESE TWO VALUES WHEN function NextAccount is called
current_pass.config(text=user_pass_list[n][1]) #<<<
data.close()
Or set a global variable that the FileRead function will use:
n_user_pass = 0
def FileRead():
data = open(file_path)
datalist = data.readlines()
user_pass_list = [(datalist[i], datalist[i+1]) for i in range( ... )]
#.....so on
current_user.config(text=user_pass_list[n][0]) #<<<THESE TWO VALUES WHEN function NextAccount is called
current_pass.config(text=user_pass_list[n][1]) #<<<
data.close()
def NextAccount():
global n_user_pass
n_user_pass = ...
I changed the way you stored your user and passes, to make it into a list [(user1, pass1), ... ] that you can access through indices

How to scrape a link from a multipart email in python

I have a program which logs on to a specified gmail account and gets all the emails in a selected inbox that were sent from an email that you input at runtime.
I would like to be able to grab all the links from each email and append them to a list so that i can then filter out the ones i don't need before outputting them to another file. I was using a regex to do this which requires me to convert the payload to a string. The problem is that the regex i am using doesn't work for findall(), it only works when i use search() (I am not too familiar with regexes). I was wondering if there was a better way to extract all links from an email that doesn't involve me messing around with regexes?
My code currently looks like this:
print(f'[{Mail.timestamp}] Scanning inbox')
sys.stdout.write(Style.RESET)
self.search_mail_status, self.amount_matching_criteria = self.login_session.search(Mail.CHARSET,search_criteria)
if self.amount_matching_criteria == 0 or self.amount_matching_criteria == '0':
print(f'[{Mail.timestamp}] No mails from that email address could be found...')
Mail.enter_to_continue()
import main
main.main_wrapper()
else:
pattern = '(?P<url>https?://[^\s]+)'
prog = re.compile(pattern)
self.amount_matching_criteria = self.amount_matching_criteria[0]
self.amount_matching_criteria_str = str(self.amount_matching_criteria)
num_mails = re.search(r"\d.+",self.amount_matching_criteria_str)
num_mails = ((num_mails.group())[:-1]).split(' ')
sys.stdout.write(Style.GREEN)
print(f'[{Mail.timestamp}] Status code of {self.search_mail_status}')
sys.stdout.write(Style.RESET)
sys.stdout.write(Style.YELLOW)
print(f'[{Mail.timestamp}] Found {len(num_mails)} emails')
sys.stdout.write(Style.RESET)
num_mails = self.amount_matching_criteria.split()
for message_num in num_mails:
individual_response_code, individual_response_data = self.login_session.fetch(message_num, '(RFC822)')
message = email.message_from_bytes(individual_response_data[0][1])
if message.is_multipart():
print('multipart')
multipart_payload = message.get_payload()
for sub_message in multipart_payload:
string_payload = str(sub_message.get_payload())
print(prog.search(string_payload).group("url"))
Ended up using this for loop with a recursive function and a regex to get the links, i then removed all links without a the substring that you can input earlier on in the program before appending to a set
for message_num in self.amount_matching_criteria.split():
counter += 1
_, self.individual_response_data = self.login_session.fetch(message_num, '(RFC822)')
self.raw = email.message_from_bytes(self.individual_response_data[0][1])
raw = self.raw
self.scraped_email_value = email.message_from_bytes(Mail.scrape_email(raw))
self.scraped_email_value = str(self.scraped_email_value)
self.returned_links = prog.findall(self.scraped_email_value)
for i in self.returned_links:
if self.substring_filter in i:
self.link_set.add(i)
self.timestamp = time.strftime('%H:%M:%S')
print(f'[{self.timestamp}] Links scraped: [{counter}/{len(num_mails)}]')
The function used:
def scrape_email(raw):
if raw.is_multipart():
return Mail.scrape_email(raw.get_payload(0))
else:
return raw.get_payload(None,True)

monitoring a text site (json) using python

IM working on a program to grab variant ID from this website
https://www.deadstock.ca/collections/new-arrivals/products/nike-air-max-1-cool-grey.json
Im using the code
import json
import requests
import time
endpoint = "https://www.deadstock.ca/collections/new-arrivals/products/nike-air-max-1-cool-grey.json"
req = requests.get(endpoint)
reqJson = json.loads(req.text)
for id in reqJson['product']:
name = (id['title'])
print (name)
I dont know what to do here in order to grab the Name of the items. If you visit the link you will see that the name is under 'title'. If you could help me with this that would be awesome.
I get the error message "TypeError: string indices must be integers" so im not too sure what to do.
Your biggest problem right now is that you are adding items to the list before you're checking if they're in it, so everything is coming back as in the list.
Looking at your code right now, I think what you want to do is combine things into a single for loop.
Also as a heads up you shouldn't use a variable name like list as it is shadowing the built-in Python function list().
list = [] # You really should change this to something else
def check_endpoint():
endpoint = ""
req = requests.get(endpoint)
reqJson = json.loads(req.text)
for id in reqJson['threads']: # For each id in threads list
PID = id['product']['globalPid'] # Get current PID
if PID in list:
print('checking for new products')
else:
title = (id['product']['title'])
Image = (id['product']['imageUrl'])
ReleaseType = (id['product']['selectionEngine'])
Time = (id['product']['effectiveInStockStartSellDate'])
send(title, PID, Image, ReleaseType, Time)
print ('added to database'.format(PID))
list.append(PID) # Add PID to the list
return
def main():
while(True):
check_endpoint()
time.sleep(20)
return
if __name__ == "__main__":
main()

Creating loop for __main__

I am new to Python, and I want your advice on something.
I have a script that runs one input value at a time, and I want it to be able to run a whole list of such values without me typing the values one at a time. I have a hunch that a "for loop" is needed for the main method listed below. The value is "gene_name", so effectively, i want to feed in a list of "gene_names" that the script can run through nicely.
Hope I phrased the question correctly, thanks! The chunk in question seems to be
def get_probes_from_genes(gene_names)
import json
import urllib2
import os
import pandas as pd
api_url = "http://api.brain-map.org/api/v2/data/query.json"
def get_probes_from_genes(gene_names):
if not isinstance(gene_names,list):
gene_names = [gene_names]
#in case there are white spaces in gene names
gene_names = ["'%s'"%gene_name for gene_name in gene_names]**
api_query = "?criteria=model::Probe"
api_query= ",rma::criteria,[probe_type$eq'DNA']"
api_query= ",products[abbreviation$eq'HumanMA']"
api_query= ",gene[acronym$eq%s]"%(','.join(gene_names))
api_query= ",rma::options[only$eq'probes.id','name']"
data = json.load(urllib2.urlopen(api_url api_query))
d = {probe['id']: probe['name'] for probe in data['msg']}
if not d:
raise Exception("Could not find any probes for %s gene. Check " \
"http://help.brain- map.org/download/attachments/2818165/HBA_ISH_GeneList.pdf? version=1&modificationDate=1348783035873 " \
"for list of available genes."%gene_name)
return d
def get_expression_values_from_probe_ids(probe_ids):
if not isinstance(probe_ids,list):
probe_ids = [probe_ids]
#in case there are white spaces in gene names
probe_ids = ["'%s'"%probe_id for probe_id in probe_ids]
api_query = "? criteria=service::human_microarray_expression[probes$in%s]"% (','.join(probe_ids))
data = json.load(urllib2.urlopen(api_url api_query))
expression_values = [[float(expression_value) for expression_value in data["msg"]["probes"][i]["expression_level"]] for i in range(len(probe_ids))]
well_ids = [sample["sample"]["well"] for sample in data["msg"] ["samples"]]
donor_names = [sample["donor"]["name"] for sample in data["msg"] ["samples"]]
well_coordinates = [sample["sample"]["mri"] for sample in data["msg"] ["samples"]]
return expression_values, well_ids, well_coordinates, donor_names
def get_mni_coordinates_from_wells(well_ids):
package_directory = os.path.dirname(os.path.abspath(__file__))
frame = pd.read_csv(os.path.join(package_directory, "data", "corrected_mni_coordinates.csv"), header=0, index_col=0)
return list(frame.ix[well_ids].itertuples(index=False))
if __name__ == '__main__':
probes_dict = get_probes_from_genes("SLC6A2")
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
whoa, first things first. Python ain't Java, so do yourself a favor and use a nice """xxx\nyyy""" string, with triple quotes to multiline.
api_query = """?criteria=model::Probe"
,rma::criteria,[probe_type$eq'DNA']
...
"""
or something like that. you will get white spaces as typed, so you may need to adjust.
If, like suggested, you opt to loop on the call to your function through a file, you will need to either try/except your data-not-found exception or you will need to handle missing data without throwing an exception. I would opt for returning an empty result myself and letting the caller worry about what to do with it.
If you do opt for raise-ing an Exception, create your own, rather than using a generic exception. That way your code can catch your expected Exception first.
class MyNoDataFoundException(Exception):
pass
#replace your current raise code with...
if not d:
raise MyNoDataFoundException(your message here)
clarification about catching exceptions, using the accepted answer as a starting point:
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
#keep track of your input data
search_data = line.strip()
try:
probes_dict = get_probes_from_genes(search_data)
except MyNoDataFoundException, e:
#and do whatever you feel you need to do here...
print "bummer about search_data:%s:\nexception:%s" % (search_data, e)
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
You may want to create a file with Gene names, then read content of the file and call your function in the loop. Here is an example below
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
probes_dict = get_probes_from_genes(line.strip())
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)

Python refresh file from disk

I have a python script that calls a system program and reads the output from a file out.txt, acts on that output, and loops. However, it doesn't work, and a close investigation showed that the python script just opens out.txt once and then keeps on reading from that old copy. How can I make the python script reread the file on each iteration? I saw a similar question here on SO but it was about a python script running alongside a program, not calling it, and the solution doesn't work. I tried closing the file before looping back but it didn't do anything.
EDIT:
I already tried closing and opening, it didn't work. Here's the code:
import subprocess, os, sys
filename = sys.argv[1]
file = open(filename,'r')
foo = open('foo','w')
foo.write(file.read().rstrip())
foo = open('foo','a')
crap = open(os.devnull,'wb')
numSolutions = 0
while True:
subprocess.call(["minisat", "foo", "out"], stdout=crap,stderr=crap)
out = open('out','r')
if out.readline().rstrip() == "SAT":
numSolutions += 1
clause = out.readline().rstrip()
clause = clause.split(" ")
print clause
clause = map(int,clause)
clause = map(lambda x: -x,clause)
output = ' '.join(map(lambda x: str(x),clause))
print output
foo.write('\n'+output)
out.close()
else:
break
print "There are ", numSolutions, " solutions."
You need to flush foo so that the external program can see its latest changes. When you write to a file, the data is buffered in the local process and sent to the system in larger blocks. This is done because updating the system file is relatively expensive. In your case, you need to force a flush of the data so that minisat can see it.
foo.write('\n'+output)
foo.flush()
I rewrote it to hopefully be a bit easier to understand:
import os
from shutil import copyfile
import subprocess
import sys
TEMP_CNF = "tmp.in"
TEMP_SOL = "tmp.out"
NULL = open(os.devnull, "wb")
def all_solutions(cnf_fname):
"""
Given a file containing a set of constraints,
generate all possible solutions.
"""
# make a copy of original input file
copyfile(cnf_fname, TEMP_CNF)
while True:
# run minisat to solve the constraint problem
subprocess.call(["minisat", TEMP_CNF, TEMP_SOL], stdout=NULL,stderr=NULL)
# look at the result
with open(TEMP_SOL) as result:
line = next(result)
if line.startswith("SAT"):
# Success - return solution
line = next(result)
solution = [int(i) for i in line.split()]
yield solution
else:
# Failure - no more solutions possible
break
# disqualify found solution
with open(TEMP_CNF, "a") as constraints:
new_constraint = " ".join(str(-i) for i in sol)
constraints.write("\n")
constraints.write(new_constraint)
def main(cnf_fname):
"""
Given a file containing a set of constraints,
count the possible solutions.
"""
count = sum(1 for i in all_solutions(cnf_fname))
print("There are {} solutions.".format(count))
if __name__=="__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {} cnf.in".format(sys.argv[0]))
You take your file_var and end the loop with file_var.close().
for ... :
ga_file = open(out.txt, 'r')
... do stuff
ga_file.close()
Demo of an implementation below (as simple as possible, this is all of the Jython code needed)...
__author__ = ''
import time
var = 'false'
while var == 'false':
out = open('out.txt', 'r')
content = out.read()
time.sleep(3)
print content
out.close()
generates this output:
2015-01-09, 'stuff added'
2015-01-09, 'stuff added' # <-- this is when i just saved my update
2015-01-10, 'stuff added again :)' # <-- my new output from file reads
I strongly recommend reading the error messages. They hold quite a lot of information.
I think the full file name should be written for debug purposes.

Categories

Resources