For loop function call file parsing - python

I recognize that this code is wildly inefficient.
I'm at a complete loss here, and I'm planning to remove the function and just make the code procedural in main. But I'm hoping someone can explain what I'm seeing here. The loop in main() runs and calls matchName(). matchName() executes it's loop then, when it should return for the next "vtRow", instead it just stops executing. So the output is the first record of vtData and every record from adData.
import csv, re
def main():
#1st word
oneWord = re.compile( '\A([\w]+)' )
#1st 3
first3 = re.compile( '\A([\w]{3})' )
#last 3
last3 = re.compile( '(?=([\w]{3})$)' )
mArray = [ oneWord, first3, last3 ]
adFile = open('adData.csv', 'rb')
adFields = ('lName','fName','cNum','addy','city','state','zip','phone','sex')
adData = csv.reader(adFile, dialect='excel')
vtFile = open('data360.csv','rb')
vtFields = ('ref','fName','lName')
vtData = csv.reader(vtFile, dialect='excel')
for vtRow in vtData:
matchName(vtRow, adData, mArray) # appears that this runs once and exits
def matchName(curVtRow, adData, mArr):
lName = curVtRow[4].lower()
fName = curVtRow[3].lower()
Posib = []
for row in adData:
cName = row[0].lower()
print "vt " + lName + " ; ad " + cName
return 1
if __name__ == "__main__":
main()

The issue is that looping with adData causes adFile to be read, and so after the first call to matchName() the file will have been read all the way and thus adData won't be looped over as adData.next() won't result in anything (and thus the print statement will not be executed). I suggest placing adFile.seek(0) after the call to matchName(). Note that just recreating adData won't work; I discovered recently that a csv reader updates its underlying object's file position rather than keeping track of it on its own.

Related

Weird sequence when writing into txt document

Hi everyone this is my first time here, and I am a beginner in Python. I am in the middle of writing a program that returns a txt document containing information about a stock (Watchlist Info.txt), based on the input of another txt document containing the company names (Watchlist).
To achieve this, I have written 3 functions, of which 2 functions reuters_ticker() and stock_price() are completed as shown below:
def reuters_ticker(desired_search):
#from company name execute google search for and return reuters stock ticker
try:
from googlesearch import search
except ImportError:
print('No module named google found')
query = desired_search + ' reuters'
for j in search(query, tld="com.sg", num=1, stop=1, pause=2):
result = j
ticker = re.search(r'\w+\.\w+$', result)
return ticker.group()
Stock Price:
def stock_price(company, doc=None):
ticker = reuters_ticker(company)
request = 'https://www.reuters.com/companies/' + ticker
raw_main = pd.read_html(request)
data1 = raw_main[0]
data1.set_index(0, inplace=True)
data1 = data1.transpose()
data2 = raw_main[1]
data2.set_index(0, inplace=True)
data2 = data2.transpose()
stock_info = pd.concat([data1,data2], axis=1)
if doc == None:
print(company + '\n')
print('Previous Close: ' + str(stock_info['Previous Close'][1]))
print('Forward PE: ' + str(stock_info['Forward P/E'][1]))
print('Div Yield(%): ' + str(stock_info['Dividend (Yield %)'][1]))
else:
from datetime import date
with open(doc, 'a') as output:
output.write(date.today().strftime('%d/%m/%y') + '\t' + str(stock_info['Previous Close'][1]) + '\t' + str(stock_info['Forward P/E'][1]) + '\t' + '\t' + str(stock_info['Dividend (Yield %)'][1]) + '\n')
output.close()
The 3rd function, watchlist_report(), is where I am getting problems with writing the information in the format as desired.
def watchlist_report(watchlist):
with open(watchlist, 'r') as companies, open('Watchlist Info.txt', 'a') as output:
searches = companies.read()
x = searches.split('\n')
for i in x:
output.write(i + ':\n')
stock_price(i, doc='Watchlist Info.txt')
output.write('\n')
When I run watchlist_report('Watchlist.txt'), where Watchlist.txt contains 'Apple' and 'Facebook' each on new lines, my output is this:
26/04/20 275.03 22.26 1.12
26/04/20 185.13 24.72 --
Apple:
Facebook:
Instead of what I want and would expect based on the code I have written in watchlist_report():
Apple:
26/04/20 275.03 22.26 1.12
Facebook:
26/04/20 185.13 24.72 --
Therefore, my questions are:
1) Why is my output formatted this way?
2) Which part of my code do I have to change to make the written output in my desired format?
Any other suggestions about how I can clean my code and any libraries I can use to make my code nicer are also appreciated!
You handle two different file-handles - the file-handle inside your watchlist_report gets closed earlier so its being written first, before the outer functions file-handle gets closed, flushed and written.
Instead of creating a new open(..) in your function, pass the current file handle:
def watchlist_report(watchlist):
with open(watchlist, 'r') as companies, open('Watchlist Info.txt', 'a') as output:
searches = companies.read()
x = searches.split('\n')
for i in x:
output.write(i + ':\n')
stock_price(i, doc = output) # pass the file handle
output.write('\n')
Inside def stock_price(company, doc=None): use the provided filehandle:
def stock_price(company, output = None): # changed name here
# [snip] - removed unrelated code for this answer for brevity sake
if output is None: # check for None using IS
print( ... ) # print whatever you like here
else:
from datetime import date
output.write( .... ) # write whatever you want it to write
# output.close() # do not close, the outer function does this
Do not close the file handle in the inner function, the context handling with(..) of the outer function does that for you.
The main takeaway for file handling is that things you write(..) to your file are not neccessarily placed there immediately. The filehandler chooses when to actually persist data to your disk, the latests it does that is when it goes out of scope (of the context handler) or when its internal buffer reaches some threshold so it "thinks" it is now prudent to alter to data on your disc. See How often does python flush to a file? for more infos.

python Multiple if statements in while loop only first if statement runs

I'm trying to get if statements to run and it seems only the 1st one will run the others are just skipped. I have tried adding else and dummy action and still skips over the other. I have been playing with the for a couple days and can't seem to get it to check all the if statements.
#!/usr/bin/env python
#version 1.1 Beta
import maidenhead as mh
from math import radians, sin, cos, acos
import re
import telnetlib
import time
import sys
HOST = "xxxxx" #set your cluster host-name here
PORT = 7300 #cluster port
user = "xxxx" #cluster user
mygrid = "fn10"
sys.stdout.write('\33]0;KM4OUS Cluster Connector\a')
sys.stdout.flush()
tn = telnetlib.Telnet(HOST,PORT)
tn.read_until(b"login: ")
tn.write(user.encode('ascii') + b"\n")
count = 0
timer = 0
tsb = "y"
fsb = "y"
tcw = "y"
fcw = "y"
print("Welcome to the KM4OUS Cluster Connector")
while (count < 1):
fw = open("cluster.csv", "w")
push = tn.read_very_eager()
fw.write(push)
fw.close()
f = open('cluster.csv','r')
#
if fcw == "y":
regexd = re.compile(r'(DX de .+\s70[2-9].+Z\s\w\w\w\w)')
for z in f:
ftcw = regexd.findall(z)
for ftc in ftcw:
ftc = re.sub('\'|\,|\(|\)','', str(ftc))
print(ftc)," \n40M CW DIGI"
if tsb == "y":
regexa = re.compile(r'(DX de .+\s142[2-9].+Z\s\w\w\w\w)|(DX de .+\s143[0-5].+Z\s\w\w\w\w)')
for x in f:
twssb = regexa.findall(x)
for tws in twssb:
tws = re.sub('\'|\,|\(|\)|\"','', str(tws))
print(tws),"\n20M SSB"
if tcw == "y":
regexb = re.compile(r'(DX de .+\s140[2-9].Z\s\w\w\w\w)')
for xc in f:
twcw = regexb.findall(xc)
for twc in twcw:
twc = re.sub('\'|\,|\(|\)|\"','', str(twc))
print(twc)," \n20M CW DIGI"
if fsb == "y":
regexc = re.compile(r'(DX de .+\s717[8-9].Z\s\w\w\w\w)|(DX de .+\s72[0-9].Z\s\w\w\w\w)|(DX de .+\s718.Z\s\w\w\w\w)')
for y in f:
forty = regexc.findall(y)
for fts in forty:
fts = re.sub('\'|\,|\(|\|\")','', str(fts))
print(fts)," \n40M SSB"
f.close()
time.sleep(5)
if timer == 30:
print ("\n" + time.ctime() + "\nKM4OUS CLuster Connector\n")
#cw.close()
count = 0
timer = timer+1
if anyone has a simple solution I would like to hear it. This seems like something that should be simple but it's not working as expected. I looked up all the info on if elif and else and can't find anything that's close to this.
The issue with your code is not with the if statements, but with the for loop you have over the file f. That kind of loop will consume the contents of the file the first time you run it. If you try looping over the file again in one of the later if blocks, there will be nothing to loop over.
There are a few ways you can fix the issue.
One is to open the file in the if blocks, rather than once at the top of the while. This way, each of the inner loops gets its own file to iterate over:
# don't open f up here
if fcw == "y":
regexd = re.compile(r'(DX de .+\s70[2-9].+Z\s\w\w\w\w)')
with open('cluster.csv','r') as f:
for z in f:
...
if tsb == "y":
regexa = re.compile(r'(DX de .+\s142[2-9].+Z\s\w\w\w\w)|(DX de .+\s143[0-5].+Z\s\w\w\w\w)')
for x in f:
twssb = regexa.findall(x)
for tws in twssb:
...
if tcw == "y":
regexb = re.compile(r'(DX de .+\s140[2-9].Z\s\w\w\w\w)')
with open('cluster.csv','r') as f:
for xc in f:
...
In that code I'm using a with statement in each place I open f, as that will close the file automatically when the block ends. I would recommend using with for your other files as well.
Another approach would be to rewind the file object to the start before you loop over it. You can do this with f.seek(0). I'd put one before each of your loops, as it doesn't hurt to seek to the start of the file if you're already there. Note that the ability to seek forwards and backwards is an idiosyncratic feature of files in Python. Most iterators cannot do that. They are one-shot and once you've consumed them, there's no way to go back.
A final option is to read the lines from your file into a list or other data structure and iterate over that instead of over the file. You can either do f.readlines() or just list(f) to get a list of lines, and you can iterate over the list as many times as you want. Or, since you're writing the contents of the file based on the push variable at the top of the while loop, maybe you can skip the file all together and just use something like push.splitlines().
Put print statements under each if statement and you will see that they are in fact running.
The problem is that once you iterate over the file object once, it reaches the end. The next time you iterate, it's already at the end and does nothing.
You either need to reset the file location, or close the file and read it again each time. Read the I/O section of the Python tutorial. https://docs.python.org/3/tutorial/inputoutput.html

Creating loop for __main__

I am new to Python, and I want your advice on something.
I have a script that runs one input value at a time, and I want it to be able to run a whole list of such values without me typing the values one at a time. I have a hunch that a "for loop" is needed for the main method listed below. The value is "gene_name", so effectively, i want to feed in a list of "gene_names" that the script can run through nicely.
Hope I phrased the question correctly, thanks! The chunk in question seems to be
def get_probes_from_genes(gene_names)
import json
import urllib2
import os
import pandas as pd
api_url = "http://api.brain-map.org/api/v2/data/query.json"
def get_probes_from_genes(gene_names):
if not isinstance(gene_names,list):
gene_names = [gene_names]
#in case there are white spaces in gene names
gene_names = ["'%s'"%gene_name for gene_name in gene_names]**
api_query = "?criteria=model::Probe"
api_query= ",rma::criteria,[probe_type$eq'DNA']"
api_query= ",products[abbreviation$eq'HumanMA']"
api_query= ",gene[acronym$eq%s]"%(','.join(gene_names))
api_query= ",rma::options[only$eq'probes.id','name']"
data = json.load(urllib2.urlopen(api_url api_query))
d = {probe['id']: probe['name'] for probe in data['msg']}
if not d:
raise Exception("Could not find any probes for %s gene. Check " \
"http://help.brain- map.org/download/attachments/2818165/HBA_ISH_GeneList.pdf? version=1&modificationDate=1348783035873 " \
"for list of available genes."%gene_name)
return d
def get_expression_values_from_probe_ids(probe_ids):
if not isinstance(probe_ids,list):
probe_ids = [probe_ids]
#in case there are white spaces in gene names
probe_ids = ["'%s'"%probe_id for probe_id in probe_ids]
api_query = "? criteria=service::human_microarray_expression[probes$in%s]"% (','.join(probe_ids))
data = json.load(urllib2.urlopen(api_url api_query))
expression_values = [[float(expression_value) for expression_value in data["msg"]["probes"][i]["expression_level"]] for i in range(len(probe_ids))]
well_ids = [sample["sample"]["well"] for sample in data["msg"] ["samples"]]
donor_names = [sample["donor"]["name"] for sample in data["msg"] ["samples"]]
well_coordinates = [sample["sample"]["mri"] for sample in data["msg"] ["samples"]]
return expression_values, well_ids, well_coordinates, donor_names
def get_mni_coordinates_from_wells(well_ids):
package_directory = os.path.dirname(os.path.abspath(__file__))
frame = pd.read_csv(os.path.join(package_directory, "data", "corrected_mni_coordinates.csv"), header=0, index_col=0)
return list(frame.ix[well_ids].itertuples(index=False))
if __name__ == '__main__':
probes_dict = get_probes_from_genes("SLC6A2")
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
whoa, first things first. Python ain't Java, so do yourself a favor and use a nice """xxx\nyyy""" string, with triple quotes to multiline.
api_query = """?criteria=model::Probe"
,rma::criteria,[probe_type$eq'DNA']
...
"""
or something like that. you will get white spaces as typed, so you may need to adjust.
If, like suggested, you opt to loop on the call to your function through a file, you will need to either try/except your data-not-found exception or you will need to handle missing data without throwing an exception. I would opt for returning an empty result myself and letting the caller worry about what to do with it.
If you do opt for raise-ing an Exception, create your own, rather than using a generic exception. That way your code can catch your expected Exception first.
class MyNoDataFoundException(Exception):
pass
#replace your current raise code with...
if not d:
raise MyNoDataFoundException(your message here)
clarification about catching exceptions, using the accepted answer as a starting point:
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
#keep track of your input data
search_data = line.strip()
try:
probes_dict = get_probes_from_genes(search_data)
except MyNoDataFoundException, e:
#and do whatever you feel you need to do here...
print "bummer about search_data:%s:\nexception:%s" % (search_data, e)
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)
You may want to create a file with Gene names, then read content of the file and call your function in the loop. Here is an example below
if __name__ == '__main__':
with open(r"/tmp/genes.txt","r") as f:
for line in f.readlines():
probes_dict = get_probes_from_genes(line.strip())
expression_values, well_ids, well_coordinates, donor_names = get_expression_values_from_probe_ids(probes_dict.keys())
print get_mni_coordinates_from_wells(well_ids)

Python refresh file from disk

I have a python script that calls a system program and reads the output from a file out.txt, acts on that output, and loops. However, it doesn't work, and a close investigation showed that the python script just opens out.txt once and then keeps on reading from that old copy. How can I make the python script reread the file on each iteration? I saw a similar question here on SO but it was about a python script running alongside a program, not calling it, and the solution doesn't work. I tried closing the file before looping back but it didn't do anything.
EDIT:
I already tried closing and opening, it didn't work. Here's the code:
import subprocess, os, sys
filename = sys.argv[1]
file = open(filename,'r')
foo = open('foo','w')
foo.write(file.read().rstrip())
foo = open('foo','a')
crap = open(os.devnull,'wb')
numSolutions = 0
while True:
subprocess.call(["minisat", "foo", "out"], stdout=crap,stderr=crap)
out = open('out','r')
if out.readline().rstrip() == "SAT":
numSolutions += 1
clause = out.readline().rstrip()
clause = clause.split(" ")
print clause
clause = map(int,clause)
clause = map(lambda x: -x,clause)
output = ' '.join(map(lambda x: str(x),clause))
print output
foo.write('\n'+output)
out.close()
else:
break
print "There are ", numSolutions, " solutions."
You need to flush foo so that the external program can see its latest changes. When you write to a file, the data is buffered in the local process and sent to the system in larger blocks. This is done because updating the system file is relatively expensive. In your case, you need to force a flush of the data so that minisat can see it.
foo.write('\n'+output)
foo.flush()
I rewrote it to hopefully be a bit easier to understand:
import os
from shutil import copyfile
import subprocess
import sys
TEMP_CNF = "tmp.in"
TEMP_SOL = "tmp.out"
NULL = open(os.devnull, "wb")
def all_solutions(cnf_fname):
"""
Given a file containing a set of constraints,
generate all possible solutions.
"""
# make a copy of original input file
copyfile(cnf_fname, TEMP_CNF)
while True:
# run minisat to solve the constraint problem
subprocess.call(["minisat", TEMP_CNF, TEMP_SOL], stdout=NULL,stderr=NULL)
# look at the result
with open(TEMP_SOL) as result:
line = next(result)
if line.startswith("SAT"):
# Success - return solution
line = next(result)
solution = [int(i) for i in line.split()]
yield solution
else:
# Failure - no more solutions possible
break
# disqualify found solution
with open(TEMP_CNF, "a") as constraints:
new_constraint = " ".join(str(-i) for i in sol)
constraints.write("\n")
constraints.write(new_constraint)
def main(cnf_fname):
"""
Given a file containing a set of constraints,
count the possible solutions.
"""
count = sum(1 for i in all_solutions(cnf_fname))
print("There are {} solutions.".format(count))
if __name__=="__main__":
if len(sys.argv) == 2:
main(sys.argv[1])
else:
print("Usage: {} cnf.in".format(sys.argv[0]))
You take your file_var and end the loop with file_var.close().
for ... :
ga_file = open(out.txt, 'r')
... do stuff
ga_file.close()
Demo of an implementation below (as simple as possible, this is all of the Jython code needed)...
__author__ = ''
import time
var = 'false'
while var == 'false':
out = open('out.txt', 'r')
content = out.read()
time.sleep(3)
print content
out.close()
generates this output:
2015-01-09, 'stuff added'
2015-01-09, 'stuff added' # <-- this is when i just saved my update
2015-01-10, 'stuff added again :)' # <-- my new output from file reads
I strongly recommend reading the error messages. They hold quite a lot of information.
I think the full file name should be written for debug purposes.

Python - Changing File Object within function

Sorry - My questions is how can I change a file object within a function from a different function?
I've been trying to work out this error in my first python script for too long now, Dr Google and the forums aren't helping me too much, but I'm hoping you can.
I have a looping function that generates alot of data and I would like to output it to a text file, and create a new text file after the third loop.
I have 2 functions defined, one to create the data hashes, the other to create the new files.
The new files are being created as expected (aaa.txt, baa.txt...etc) but the "hashit" function only ever writes to the first file (aaa.txt) even though the others are being created.
I have tried fo.close() fo.flush(), as well as referencing fo in the functions but can't seem to make it work. Also I've moved the fo.write from the function to the main body.
I have included a cut down version of the code that I've been using to troubleshoot this issue, the real one has several more loops increasing the string length.
Thanks in advance
import smbpasswd, hashlib
base = '''abcdefghijklmnopqrstuvwxyz '''
# base length 95
print(base)
baselen = len(base)
name = 'aaa.txt'
fo = open(name, "w")
print "Name of the file: ", fo.name
print "Closed or not : ", fo.closed
print "Opening mode : ", fo.mode
print "Softspace flag : ", fo.softspace
pw01 = 0
pw02 = 0
pw03 = 0
def hashit(passwd):
#2
# Need to install module
# sudo apt-get install python-smbpasswd
hex_dig_lm = smbpasswd.lmhash(passwd)
hex_dig_ntlm = smbpasswd.nthash(passwd)
#print '%s:%s' % smbpasswd.hash(passwd)
hash_md5 = hashlib.md5(passwd)
hex_dig_md5 = hash_md5.hexdigest()
print(passwd)
print(hex_dig_lm)
print(hex_dig_ntlm)
print(hex_dig_md5)
hashstring = passwd +","+ hex_dig_lm +","+ hex_dig_md5 + '\n'
fo.write(hashstring);
def newfile(name):
fo.flush()
fo = open(name, "a")
print("-------newfile------")
print "Name of the file: ", fo.name
print "Closed or not : ", fo.closed
print('NewFile : ' + name)
raw_input("\n\nPress the enter key to exit.")
# add 3rd digit
while (pw03 < baselen):
pwc03 = base[pw03]
name = pwc03 + 'aa.txt'
fo.close
newfile(name);
pw03 += 1
while (pw02 < baselen):
pwc02 = base[pw02]
pw02 += 1
while (pw01 < baselen):
pwc01 = base[pw01]
pw01 += 1
passwd = pwc03 + pwc02 + pwc01
hashit(passwd);
else:
pw01 = 0
else:
pw02 = 0
else:
pw03 = 0
In your newfile() function, add this line first:
global fo

Categories

Resources