Here is the line of code where I am getting an error
def Data('C:/Users/username/Desktop/d.txt'):
fp = open('C:/Users/username/Desktop/d.txt')
I get the following error:
File "read.py", line 17
def DataExtract('C:/Users/username/Desktop/d.txt'):
^
SyntaxError: invalid syntax
Not sure what the problem is and what is causing the invalid syntax
Thank you!
Here is the code I am working on:
def DataExtract('C:/Users/username/Desktop/d.txt'):
fp = open('C:/Users/username/Desktop/d.txt')
data = {}
line = fp.read().split("\n")[2:7:5]:
while line:
name, _, cont = line.partition(":")
keys, _, values = cont.partition("=")
keys = keys.split(",")
values = values.split(",")
temp_d = {}
for i in range(len(keys)):
key = key[i].strip()
val = values[i].strip()
temp_d[key] = float(val)
data[name] = temp_d
line = fp.readline()
fp.close()
return data
x = DataExtract('C:/Users/username/Desktop/d.txt')
mul_p = x['d1']['p'] * x['d2']['p']
print(mul_p)
As mentioned earlier in the comments by #LhasaDad.
I am observing two errors in your attached code.
First Error:
First is the "def" statement. The function needs a variable placeholder not the value of the variable. For example:
>>> def fun('hello'):
File "<stdin>", line 1
def fun('hello'):
^
SyntaxError: invalid syntax
>>> def fun(5):
File "<stdin>", line 1
def fun(5):
^
SyntaxError: invalid syntax
The correct way is
>>> def fun(var):
... print(var)
...
>>> #Then call the function using below method
...
>>> fun('hello')
hello
>>> fun(5)
5
Second Error:
There should always be and indentation (tab or 4 spaces gap after a def statement)
So in your case hopefully this will work
def DataExtract(file_path):
fp = open(file_path)
data = {}
line = fp.read().split("\n")[2:7:5]
while line:
name, _, cont = line.partition(":")
keys, _, values = cont.partition("=")
keys = keys.split(",")
values = values.split(",")
temp_d = {}
for i in range(len(keys)):
key = keys[i].strip()
val = values[i].strip()
temp_d[key] = float(val)
data[name] = temp_d
line = fp.readline()
fp.close()
# Now call the function as
DataExtract('C:/Users/username/Desktop/d.txt')
You can also read how to use python functions from def
https://www.w3schools.com/python/python_functions.asp
Related
I'm trying to run the below python script (vcf2treemix.py) with the command
<./vcf2treemix.py -vcf allsamples14_filtered_1_autosomes38_bisnps.vcf.gz -pop allsamples14.clust.pop>
I got this error with both python 2 and 3
######### error ###
Traceback (most recent call last):
File "./vcf2treemix.py", line 99, in <module>
main()
File "./vcf2treemix.py", line 95, in main
pop_obj = get_pops(pop_file)
File "./vcf2treemix.py", line 34, in get_pops
pops[fields[0]] = fields[1].split()
IndexError: list index out of range
######### vcf2treemix.py ###
#!/usr/bin/python
# vcf2treemix.py
# Converts a vcf file into TreeMix input
import argparse
from collections import OrderedDict
parser = argparse.ArgumentParser(description="Parsing statistical output of"
" VCFtools")
parser.add_argument("-vcf", dest="vcf_file", help="/mnt/ursus/GROUP-sbifh3/c1845371/whole_genome/data_dog/align_out/treemix/allsamples14_filtered_1_autosomes38_bisnps_main.vcf.gz",
required=True)
parser.add_argument("-pop", dest="pop_file", help="/mnt/ursus/GROUP-sbifh3/c1845371/whole_genome/data_dog/align_out/treemix/allsamples14.clust.pop",
required=True)
arg = parser.parse_args()
def get_pops(pop_file):
"""
Returns a dictionary with pop identifier as key and taxa as a list of
strings. In the pop file, each populations should be in one line, starting
withe pop name, a colon and the corresponding taxa separated by whitespace.
E.g.:
pop1: taxon1 taxon2 taxon3
"""
pops = OrderedDict()
with open(pop_file) as fh:
for line in fh:
fields = line.strip().split(":")
pops[fields[0]] = fields[1].split()
return pops
def vcf2treemix(vcf_file, pop_obj):
"""
Converts a vcf file into treemix format.
"""
vcf_fh = open(vcf_file)
output_name = vcf_file.strip(".vcf") + ".tmix"
output_fh = open(output_name, "w")
# Write header for tmix file
output_fh.write("{}\n".format(" ".join([x for x in pop_obj.keys()])))
for line in vcf_fh:
# Skip header
if line.startswith("##"):
pass
# Get taxon positions
elif line.startswith("#CHROM"):
taxa_pos = line.strip().split()
# Ignore empty lines
elif line.strip() != "":
fields = line.strip().split()
# Ignore loci with more than two alleles
if len(fields[4]) > 1:
continue
# Get allele counts for each populations
temp_pop = OrderedDict((x, [0,0]) for x in pop_obj.keys())
for pop, taxa in pop_obj.items():
for taxon in taxa:
# Get taxon genotype
gen = fields[taxa_pos.index(taxon)]
# Skip if gen is missing data
if gen == "./.":
continue
temp_pop[pop][0] += gen.count("0")
temp_pop[pop][1] += gen.count("1")
# Write current locus to file
output_fh.write("{}\n".format(" ".join([str(x[0]) + "," + str(x[1]) for x in temp_pop.values()])))
vcf_fh.close()
output_fh.close()
def main():
# Args
vcf_file = arg.vcf_file
pop_file = arg.pop_file
pop_obj = get_pops(pop_file)
vcf2treemix(vcf_file, pop_obj)
main()
I have zero experience with python and I just run the script to manipulate genetic data.
Any help will be highly appreciable.
Thanks
Ali
I tried python 2 and 3 and I expect the script to work straightforward. I think there is no problem with the input data.
I am trying to run calculations between values in a .csv file. However, when I try to execute the code, it gives an error that the while loop is not iterable. My code is:
import csv
import math
counter_chime = 1
chime_cra = 0
chime_cdec = 0
fermi_cra = 0
fermi_cdec = 0
grbfrb_overlap = []
frbcode = ""
grbcode = ""
dis = 0
counter_fermi = 0
class adata():
def __init__(self, filename):
with open(filename, "r") as f_input:
csv_input = csv.reader(f_input)
self.details = list(csv_input)
def get_col_row(self, col, row):
return self.details[row][col]
chimed = adata('chime2.csv')
fermid = adata('fermi.csv')
while counter_chime < 600:
frbcode = chimed.get_col_row(0, counter_chime)
chime_cra = chimed.get_col_row(1, counter_chime)
chime_cdec = chimed.get_col_row(2, counter_chime)
chime_cra = float(chime_cra)
chime_cdec = float(chime_cdec)
sum = sum(chime_cra + chime_cdec)
print (sum)
counter_chime +=1
The error this returns is:
Traceback (most recent call last):
File "C:\Users\kaoet\Desktop\CODING\trial1.py", line 32, in <module>
sum = sum(chime_cra + chime_cdec)
TypeError: 'float' object is not iterable
I am not sure what the problem is.
As chime_cra and chime_cdec are float, you do not need to use sum to have their sum.
You could do sum([chime_cra, chime_cdec]), that is an iterable for sum and it would not complain at the first loop.
It is not necessary, though, you should just use chime_cra + chime_cdec.
Furthermore, that line of code would later lead to another error because you are re-defining sum when you use it as a variable name. Instead, you should find a meaningful name for your sum result.
I have a .txt file formatted like this
60
4
20
YF
X : YF+XF+Y
Y : XF-YF-X
I need each line to be a separate variable and the last two to be broken into a key and value in a dictionary. I currently have this:
class LSystem:
def __init__(self,filename):
#complete this method
self.rules = {}
file = open(filename)
for i, line in enumerate(filename):
if i == 0:
self.angle = line
elif i == 1:
self.iteration = line
elif i == 2:
self.distance = line
elif i == 3:
self.axiom = line
elif i >= 4:
(key,val)= line.split
self.rules[key] = val
file.close()
This gives me this error:
Traceback (most recent call last):
File "lab10.py", line 65, in <module>
main()
File "lab10.py", line 10, in main
sys = lsystem.LSystem("arrowheadcurve.txt")
File "/Users/alongo/Dropbox/Freshman Fall Semester/CS 110/Labs/lab-10-fall18-antmelon/lsystem.py", line 17, in __init__
(key,val)= line.split
TypeError: cannot unpack non-iterable builtin_function_or_method object
How do you go about fixing this?
As #Carcigenicate commented, line.split does not actually call the split() function. You need to invoke it by including the parentheses:
(key,val) = line.split()
But note that split() will split on white space characters. For your input this will result in a list containing three items, and unpacking that into only two variables will also fail.
I assume that you should be splitting on the : (further assuming that : can not be present else where in the expression). Try this:
(key, val) = line.split(' : ')
I have included the surrounding spaces in the delimiter to the leading and trailing spaces are not present in the result. If the white space is inconsistent you can handle it like this:
key, val = [s.strip() for s in line.split(':')]
Also, fix the file iteration by using the file object, not the filename string, and open it in a with statement (so that it will be guaranteed to be properly closed):
with open(filename) as f:
for i, line in enumerate(f):
line = line.strip() # remove leading and trailing white space
if i == 0:
self.angle = line
You've missed few small things that I'm commenting in the following code:
class LSystem:
def __init__(self, filename):
self.rules = {}
your_file = open(filename) # Try to avoid using keyword as variable names
for i, line in enumerate(your_file): # you should enumerate over your file object not its string name
if i == 0:
self.angle = line
elif i == 1:
self.iteration = line
elif i == 2:
self.distance = line
elif i == 3:
self.axiom = line
elif i >= 4:
key, val = line.split(' : ') # You should call split passing your desired separator
f.close()
I am new to python and trying to submit my HW on Coursera Data Science course. The environment there is VM running Python 2.7.3, the file tweet_sentiment.py I am trying to run has the following script within it:
import sys
import json
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
# hw()
# lines(sent_file)
# lines(tweet_file)
myfile = open(sys.argv[1], 'r')
lines = myfile.readlines()
mydict = {}
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
twit_file = open(sys.argv[2], 'r')
twit_lines = twit_file.readlines()
mylist = []
for line in twit_lines:
mylist.append(json.loads(line))
for listik in mylist:
twit_value = 0
twit_text = listik["text"]
twit_words = twit_text.split()
for word in twit_words:
if word in mydict:
twit_value = twit_value + 1
print float(twit_value)
if __name__ == '__main__':
main()
When running $ python tweet_sentiment.py I am getting the following error:
File "tweet_sentiment.py", line 25
key, value = line.split("\t")
^
IndentationError: expected an indented block
Thanks for any hints!
Sergey
Be careful! You're mixing tabs and spaces for indenting.
Often a tab is displayed as the equivalent of 8 spaces. So, when using the common practice of 4 spaces it looks like 2 levels of indentation, but is really only one.
When I examine your code in the editor, I can see that you've got in at least two places. Replace those tabs with 4 spaces.
As it says, you have an indentation error. Line 25 should be corrected to this:
def main():
...
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
You have to indent the line after a for block. Your code should look like:
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
Your code must be like this:
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
It's the same for all other for's.
Try:
import sys
import json
def hw():
print 'Hello, world!'
def lines(fp):
print str(len(fp.readlines()))
def main():
sent_file = open(sys.argv[1])
tweet_file = open(sys.argv[2])
# hw()
# lines(sent_file)
# lines(tweet_file)
myfile = open(sys.argv[1], 'r')
lines = myfile.readlines()
mydict = {}
for line in lines:
key, value = line.split("\t")
mydict[key] = int(value)
twit_file = open(sys.argv[2], 'r')
twit_lines = twit_file.readlines()
mylist = []
for line in twit_lines:
mylist.append(json.loads(line))
for listik in mylist:
twit_value = 0
twit_text = listik["text"]
twit_words = twit_text.split()
for word in twit_words:
if word in mydict:
twit_value = twit_value + 1
print float(twit_value)
if __name__ == '__main__':
main()
The indentation for the for statements is messed up. The for block has to be indented. Also, as stated before, Python will throw an error if you mix tabs and spaces. Use either all spaces(replace each tab with 4 spaces) or all tabs(replace every 4 spaces with a tab).
Use vim to open it. And then type the commoand :retab
It then shows the line not correctly indented.
I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person