My Exif code failed in my project so the photos came out without a "date taken" field. Luckly ive saved them with the date taken as the tittle eg. "-Forward14-07-2021-08-23-25.jpg". I have thousands of photos like this. I can add that individually using this python code.
from datetime import datetime
import piexif
s = "13"
m = "17"
h = "09"
dd = "14"
mm = "07"
yyyy = "2021"
name = "-Forward"
filename = str(name) + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(h) + "-" + str(m) + "-" + str(s) + ".jpg"
exif_dict = piexif.load(filename)
new_date = datetime(int(yyyy), int(mm), int(dd), int(h), int(m),
int(s)).strftime("%Y:%m:%d %H:%M:%S")
exif_dict['0th'][piexif.ImageIFD.DateTime] = new_date
exifif_dict['Exif'][piexif.ExifIFD.DateTimeOriginal] = new_date
exif_dict['Exif'][piexif.ExifIFD.DateTimeDigitized] = new_date
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, filename)
As soon as I try it as a loop it does not work.
import PIL.Image
import piexif
from datetime import datetime
from PIL import Image
#import os
#do not change these:
s = "0"
m = "0"
h = "07"
#change these:
dd = "14"
mm = "07"
yyyy = "2021"
Dir = "D:\Python codes\test" #insert file directory here##
#run code twice, once for pavement and once for forward photos
name = "-Forward" #"-Pavement"
#for filename in os.listdir(dir):
for h in range (7,17):
for m in range (1,61):
for s in range (1,61):
if (int(s) < 10):
s = "0" + str(s)
if (int(m) < 10):
m = "0" + str(m)
if (int(h) < 10):
h = "0" + str(h)
else:
s = int(s)
m = int(m)
h = int(h)
try:
filename = str(name) + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(h) + "-" + str(m) + "-" + str(s) + ".jpg"
exif_dict = piexif.load(filename)
new_date = datetime(int(yyyy), int(mm), int(dd), int(h), int(m), int(s)).strftime("%Y:%m:%d %H:%M:%S")
exif_dict['0th'][piexif.ImageIFD.DateTime] = new_date
exif_dict['Exif'][piexif.ExifIFD.DateTimeOriginal] = new_date
exif_dict['Exif'][piexif.ExifIFD.DateTimeDigitized] = new_date
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, filename)
##insert right directory
# im = Image.open(Dir + name + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(h) + "-" +str(m) + "-" +str(s))
#im.save(Dir + name + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(h) + "-" +str(m) + "-" +str(s), exif=exif_bytes, quality="keep", optimize=True)
s = int(s) + 1
except:
s = int(s) + 1
m = int(m) + 1
h = int(h) + 1
I've done some more research and found out that the try: and except: were the problem. The working code can be seen below for those who are interested. Thanks for the help!
import PIL.Image
from os import path
import piexif
from datetime import datetime
from PIL import Image
import os
# do not change these:
s = 0
M = 0
h = 7
# change these:
dd = "14"
mm = "07"
yyyy = "2021"
# run code twice, once for pavement and once for forward photos
name = "-Forward" # "-Pavement"
# for filename in os.listdir(dir):
for a in range(7, 17):
M = 0
for b in range(1, 61):
s = 0
for c in range(1, 61):
ss = str(s)
MM = str(M)
hh = str(h)
if s < 10:
ss = "0" + str(s)
if M < 10:
MM = "0" + str(M)
if h < 10:
hh = "0" + str(h)
filename = str(name) + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(hh) + "-" + str(
MM) + "-" + str(ss) + ".jpg"
#try:
if path.exists(filename):
# filename = str(name) + str(dd) + "-" + str(mm) + "-" + str(yyyy) + "-" + str(h) + "-" + str(m) +
# "-" + str(s) + ".jpg"
exif_dict = piexif.load(filename)
new_date = datetime(int(yyyy), int(mm), int(dd), h, M, s).strftime("%Y:%m:%d %H:%M:%S")
exif_dict['0th'][piexif.ImageIFD.DateTime] = new_date
exif_dict['Exif'][piexif.ExifIFD.DateTimeOriginal] = new_date
exif_dict['Exif'][piexif.ExifIFD.DateTimeDigitized] = new_date
exif_bytes = piexif.dump(exif_dict)
piexif.insert(exif_bytes, filename)
# #insert right directory im = Image.open(Dir + name + str(dd) + "-" + str(mm) + "-" + str(yyyy) +
# "-" + str(h) + "-" +str(m) + "-" +str(s)) im.save(Dir + name + str(dd) + "-" + str(mm) + "-" + str(
# yyyy) + "-" + str(h) + "-" +str(m) + "-" +str(s), exif=exif_bytes, quality="keep", optimize=True)
s += 1
#except:
s += 1
M += 1
h += 1
Related
My code throws error I really can't understand why. There it is:
File "alon.py", line 152
fig.write_image("files/table_" + product_name + ".pdf")
^
IndentationError: unindent does not match any outer indentation level
If I remove this line, it works. Can't see how it is unintended. It is under if type(product_data) is dict: statement. On the same level like the last line of code before it. What can cause such a behaviour ?
import MySQLdb
from plotly import graph_objs as go
import numpy as np
import os
from plotly.subplots import make_subplots
from PyPDF2 import PdfFileMerger
from datetime import datetime, timedelta
# Database connect
db = MySQLdb.connect(host="localhost",
user="root",
passwd="abc9110033969",
db="alon")
today = datetime.today().strftime('%Y-%m-%d')
one_week = (datetime.today() - timedelta(days=7)).strftime('%Y-%m-%d')
two_week = (datetime.today() - timedelta(days=14)).strftime('%Y-%m-%d')
three_week = (datetime.today() - timedelta(days=21)).strftime('%Y-%m-%d')
four_week = (datetime.today() - timedelta(days=28)).strftime('%Y-%m-%d')
# Functions
def load_post_views(table, today, one_week, two_week, three_week, four_week):
product_views_dict = dict()
cursor = db.cursor()
cursor.execute(
"SELECT client_id, product_id, referrer, `date`" +
" FROM " + table +
" WHERE `date`>='"+four_week+"'")
social_dict = {
"facebook": 0,
"twitter": 0,
"instagram": 0,
"linkedin": 0,
"pinterest": 0,
"website": 0,
}
for x in range(0, cursor.rowcount):
row = cursor.fetchone()
network = ""
period = ""
client_id = row[0]
product_id = row[1]
referrer = row[2]
date = str(row[3])
email_cursor = db.cursor()
email_cursor.execute("SELECT address FROM c8ty_connections_email WHERE entry_id=" + str(client_id))
email = email_cursor.fetchone()
product_cursor = db.cursor()
product_cursor.execute("SELECT post_title FROM c8ty_posts WHERE id=" + str(product_id))
product_name = product_cursor.fetchone()
# Add client ID key
if client_id not in product_views_dict:
product_views_dict[client_id] = dict()
# Add product ID key to client ID parent key
if product_id not in product_views_dict[client_id]:
product_views_dict[client_id][product_id] = {
today + " - " + one_week: social_dict,
one_week + " - " + two_week: social_dict,
two_week + " - " + three_week: social_dict,
three_week + " - " + four_week: social_dict
}
# Find referrer
if "facebook" in referrer:
network = "facebook"
elif "twitter" in referrer:
network = "twitter"
elif "instagram" in referrer:
network = "instagram"
elif "linkedin" in referrer:
network = "linkedin"
elif "pinterest" in referrer:
network = "pinterest"
else:
network = "website"
# Check view period
if date <= today and date > one_week:
period = today + " - " + one_week
if date <= one_week and date > two_week:
period = one_week + " - " + two_week
if date <= two_week and date > three_week:
period = two_week + " - " + three_week
if date <= three_week and date > four_week:
period = three_week + " - " + four_week
product_views_dict[client_id][product_id][period][network] += 1
product_views_dict[client_id]["email"] = email[0]
product_views_dict[client_id][product_id]["product"] = product_name[0]
return product_views_dict
# Init
product_views_dict = load_post_views("an_product_view", today, one_week, two_week, three_week, four_week)
brochure_views_dict = load_post_views("an_brochure_view", today, one_week, two_week, three_week, four_week)
for clinetID, product_info in product_views_dict.items():
client_email = product_info["email"]
for productID, product_data in product_info.items():
if type(product_data) is dict:
product_name = product_data['product']
table_data = [
[
today + " - " + one_week,
one_week + " - " + two_week,
two_week + " - " + three_week,
three_week + " - " + four_week,
today + " - " + four_week
]
]
for network in ["website", "facebook", "twitter", "instagram", "linkedin", "pinterest"]:
table_data.append([
product_data[today + " - " + one_week][network],
product_data[one_week + " - " + two_week][network],
product_data[two_week + " - " + three_week][network],
product_data[three_week + " - " + four_week][network],
sum([
int(product_data[today + " - " + one_week][network]),
int(product_data[one_week + " - " + two_week][network]),
int(product_data[two_week + " - " + three_week][network]),
int(product_data[three_week + " - " + four_week][network])
])
])
fig = make_subplots(rows=5, cols=2)
# Create product table
fig.add_trace(
go.Table(
header=dict(values=["Period", "Website", "Facebook", "Twitter", "Instagram", "LinkedIn", "Pinterest", "Total"]),
cells=dict(values=table_data)
)
)
# Craete folder if doesn't exist
if not os.path.exists("files"):
os.mkdir("files")
# Write pdf
fig.write_image("files/table_" + product_name + ".pdf")
db.close()
exit()
Check in your source file that you aren't mixing spaces and tabs? It should be consistent, and only one or the other. I recommend spaces to comply with PEP8.
I am trying to scrape data from soccerway.com and checking whether the page is a completed game/game to be played with each instance being written to a seperate csv file. I am running through 10,000 pages and so have written it using Pools. However, I am getting empty lists from the append function and cannot write anything to the csv files.
I tried writing straight to the file instead of list appending however this gave incomplete files
import requests
from bs4 import BeautifulSoup
import time
import numpy as np
import uuid
import time
from multiprocessing import Pool
import sys, os
fixturesA = []
linksA = []
statsA = []
def parse(url):
try:
#print(url)
delays = [0.25,0.5,0.75,1]
delay = np.random.choice(delays)
#time.sleep(delay)
#r = requests.get(url)
r = requests.get(url, timeout = 10)
soup = BeautifulSoup(r.content, "html.parser")
teams = soup.findAll('h3', attrs = {'class' : 'thick'})
homeTeam = teams[0].text.strip()
awayTeam = teams[2].text.strip()
middle = teams[1].text.strip()
dds = soup.findAll('dd')
date = dds[1].text.strip()
gameWeek = dds[2].text.strip()
if ':' not in middle:
middle = middle.split(" - ")
homeGoals = 0
awayGoals = 0
homeGoals = middle[0]
try:
awayGoals = middle[1]
except Exception as e:
homeGoals = "-1"
awayGoals = "-1"
matchGoals = int(homeGoals) + int(awayGoals)
if(matchGoals >= 0):
if(int(homeGoals) > 0 and int(awayGoals) > 0):
btts = "y"
else:
btts = "n"
halfTimeScore = dds[4].text.strip().split(" - ")
firstHalfHomeGoals = halfTimeScore[0]
firstHalfAwayConc = halfTimeScore[0]
firstHalfAwayGoals = halfTimeScore[1]
firstHalfHomeConc = halfTimeScore[1]
firstHalfTotalGoals = int(firstHalfHomeGoals) + int(firstHalfAwayGoals)
secondHalfHomeGoals = int(homeGoals) - int(firstHalfHomeGoals)
secondHalfAwayConc = int(homeGoals) - int(firstHalfHomeGoals)
secondHalfAwayGoals = int(awayGoals) - int(firstHalfAwayGoals)
secondHalfHomeConc = int(awayGoals) - int(firstHalfAwayGoals)
secondHalfTotalGoals = matchGoals - firstHalfTotalGoals
homeTeamContainers = soup.findAll('div', attrs = {'class' : 'container left'})
homeTeamStarting = homeTeamContainers[2]
homeTeamBench = homeTeamContainers[3]
homeTeamYellows = len(homeTeamStarting.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/700/img/events/YC.png' })) + len(homeTeamBench.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/699/img/events/YC.png' }))
homeTeamReds = len(homeTeamStarting.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/700/img/events/RC.png' })) + len(homeTeamBench.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/699/img/events/RC.png' }))
homeTeamCards = homeTeamYellows + homeTeamReds
awayTeamContainers = soup.findAll('div', attrs = {'class' : 'container right'})
awayTeamStarting = awayTeamContainers[2]
awayTeamBench = awayTeamContainers[3]
awayTeamYellows = len(awayTeamStarting.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/700/img/events/YC.png' })) + len(awayTeamBench.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/699/img/events/YC.png' }))
awayTeamReds = len(awayTeamStarting.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/700/img/events/RC.png' })) + len(awayTeamBench.findAll('img', attrs = {'src' : 'https://s1.swimg.net/gsmf/699/img/events/RC.png' }))
awayTeamCards = awayTeamYellows + awayTeamReds
matchCards = homeTeamCards + awayTeamCards
try:
iframe = soup.findAll('iframe')
iframeSrc = iframe[1]['src']
url = 'https://us.soccerway.com/' + iframeSrc
c = requests.get(url,timeout = 10)
soupC = BeautifulSoup(c.content, "html.parser")
cornerContainer = soupC.findAll('td', attrs = {'class' : 'legend left value'})
homeCorners = cornerContainer[0].text.strip()
awayCornersConc = homeCorners
cornerContainer = soupC.findAll('td', attrs = {'class' : 'legend right value'})
awayCorners = cornerContainer[0].text.strip()
homeCornersConc = awayCorners
matchCorners = int(homeCorners) + int(awayCorners)
print("Got Score . " + homeTeam + " vs " + awayTeam+" . " + gameWeek )
statsA.append(homeTeam + "," + awayTeam + "," + gameWeek + "," + homeGoals + "," + awayGoals + "," + str(matchGoals) + "," + btts + "," + firstHalfHomeGoals + "," + firstHalfHomeConc + "," + firstHalfAwayGoals + "," + firstHalfAwayConc + "," + str(firstHalfTotalGoals) + "," + str(secondHalfHomeGoals) + "," + str(secondHalfHomeConc) + "," + str(secondHalfAwayGoals) + "," + str(secondHalfAwayConc) + "," + str(secondHalfTotalGoals) + "," + str(homeTeamCards) + "," + str(awayTeamCards) + "," + str(matchCards) + "," + homeCorners + "," + awayCorners + "," + homeCornersConc + "," + awayCornersConc + "," + str(matchCorners)+","+dds[0].text.strip() + "\n")
return None
except Exception as e:
print("Got Score no corners. " + homeTeam + " vs " + awayTeam+" . " + gameWeek + " NO FRAME")
statsA.append(homeTeam + "," + awayTeam + "," + gameWeek + "," + homeGoals + "," + awayGoals + "," + str(matchGoals) + "," + btts + "," + firstHalfHomeGoals + "," + firstHalfHomeConc + "," + firstHalfAwayGoals + "," + firstHalfAwayConc + "," + str(firstHalfTotalGoals) + "," + str(secondHalfHomeGoals) + "," + str(secondHalfHomeConc) + "," + str(secondHalfAwayGoals) + "," + str(secondHalfAwayConc) + "," + str(secondHalfTotalGoals) + "," + str(homeTeamCards) + "," + str(awayTeamCards) + "," + str(matchCards) + "," + "" + "," + "" + "," + "" + "," + "" + "," + ""+","+dds[0].text.strip() + "\n")
return None
else:
fixturesA.append(homeTeam + "," + awayTeam + "," + gameWeek + "," + date + "\n")
linksA.append(url + "\n")
print(homeTeam + " vs " + awayTeam + " at " + middle + " GW:" + gameWeek)
return None
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
linksA.append(url + "\n")
print(url)
return None
stats = open('Statsv2.csv','a',encoding='utf-8')
fixtures = open('fixturesv2.csv','w',encoding='utf-8')
with open('links.txt') as f:
content = f.readlines()
content = [x.strip() for x in content]
links = open('links.txt','w')
if __name__ == '__main__':
start_time = time.time()
p = Pool(20) # Pool tells how many at a time
records = p.map(parse, content)
p.terminate()
p.join()
print("--- %s seconds ---" % (time.time() - start_time))
I assume you are running Windows? Then the answer is that multi-processing in Windows creates copies instead of forks. So you have your main process with the lists and you get your working processes (from pool) with their own separate set of lists.
The workers most likely fill their list correctly, but the lists in the main-process don't get any data and so are staying empty. And the workers do not return anything. So, as you write your files in the main-process, you get empty files.
An easy way to solve this is creating pipes or queues between the main process and the workers to allow communication between the threads. You could also use shared arrays like they are provided by the multiprocessing class, but than you would need to know the length during creation.
see documentation: Multiprocessing
as pointed out by #RaJa you're not actually doing anything the parent/controlling process can see. the easiest is just to return values from the mapped function
for example, parse() could return tuple at the end like:
def parse(url):
# do work
return url, homeTeam, awayTeam, gameWeek, homeGoals, awayGoals # ...
then the parent process can receive the values and do useful things like saving them to a CSV file:
import csv
with Pool(20) as pool:
records = pool.map(parse, content)
with open('stats.csv', 'w') as fd:
out = csv.writer(fd)
out.writerow([
'url', 'hometeam', 'awayteam',
# and the remaining column names for the header
])
out.writerows(records)
I have a function which create list of url. Function works fine. But I do not like what this function looks like:
def prepareadres(city,y,m,d):
month = [31,28,31,30,31,30,31,31,30,31,30,31]
d = d + 1
d0 = 0
m0 = 0
if (d > month[0]):
d = d - month[0]
m = m + 1
if (d > month[1]):
d = d - month[1]
m = m + 1
if (d > month[2]):
d = d - month[2]
m = m + 1
if (d > month[3]):
d = d - month[3]
m = m + 1
if (d > month[4]):
d = d - month[4]
m = m + 1
if (d > month[5]):
d = d - month[5]
m = m + 1
if (d > month[6]):
d = d - month[6]
m = m + 1
if (d > month[7]):
d = d - month[7]
m = m + 1
if (d > month[8]):
d = d - month[8]
m = m + 1
if (d > month[9]):
d = d - month[9]
m = m + 1
if (d > month[10]):
d = d - month[10]
m = m + 1
if (d > month[11]):
d = d - month[11]
m = m + 1
if d == sum(month):
print("year complete")
#print(df.iloc[0, :-2])
if (m < 10) and (d < 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m0) + str(m) + "-" + str(d0) + str(d)
elif (m < 10) and (d >= 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-"+ str(m0) + str(m) + "-" + str(d)
elif (m >= 10) and (d < 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m) + "-" + str(d0) + str(d)
else:
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m) + "-" + str(d)
#print(adres)
return adres
Code looks awful as I mention.I used many if statements and want to simplify this. Can you suggest more elegant ways of writing this definition, avoiding the use of if statements?
Use a for loop to iterate over the months:
def prepareadres(city,y,m,d):
months = [31,28,31,30,31,30,31,31,30,31,30,31]
d = d + 1
d0 = 0
m0 = 0
for month in months:
if d > month:
d = d - month
m += 1
if d == sum(months):
print("year complete")
#print(df.iloc[0, :-2])
if (m < 10) and (d < 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m0) + str(m) + "-" + str(d0) + str(d)
elif (m < 10) and (d >= 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-"+ str(m0) + str(m) + "-" + str(d)
elif (m >= 10) and (d < 10):
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m) + "-" + str(d0) + str(d)
else:
adres = "https://sinoptik.ua/погода-" + city + "/" + str(y) + "-" + str(m) + "-" + str(d)
#print(adres)
return adres
I have a complex equation which is generated into a .txt file. I would like to import this equation (which is all the text in the .txt file) and make a function from it, which can be subsequently fit.
Does anybody know how I might go about this? The equation to be fitted is at the the very bottom. My feeble attempt to import is below...
myfile1= open("dummyfile.txt", 'r')
def fcn(J1,J2,T,k,g):
return myfile1.read()
"dummyfile.txt" contents:
B**2*N*(12.0*g**2*sp.exp(2.0*J2/(T*k)) + 60.0*g**2*sp.exp(6.0*J2/(T*k)) + 168.0*g**2*sp.exp(12.0*J2/(T*k)) + 360.0*g**2*sp.exp(20.0*J2/(T*k)) + 30.0*g**2*sp.exp((2.0*J1 + 4.0*J2)/(T*k)) + 168.0*g**2*sp.exp((4.0*J1 + 8.0*J2)/(T*k)) + 360.0*g**2*sp.exp((6.0*J1 + 14.0*J2)/(T*k)) + 180.0*g**2*sp.exp((8.0*J1 + 12.0*J2)/(T*k)) + 660.0*g**2*sp.exp((8.0*J1 + 22.0*J2)/(T*k)) + 660.0*g**2*sp.exp((12.0*J1 + 18.0*J2)/(T*k)) + 1092.0*g**2*sp.exp((16.0*J1 + 26.0*J2)/(T*k)) + 546.0*g**2*sp.exp((18.0*J1 + 24.0*J2)/(T*k)) + 1680.0*g**2*sp.exp((24.0*J1 + 32.0*J2)/(T*k)) + 1224.0*g**2*sp.exp((32.0*J1 + 40.0*J2)/(T*k)))/(3*T*k*(6*sp.exp(2.0*J2/(T*k)) + 10*sp.exp(6.0*J2/(T*k)) + 14*sp.exp(12.0*J2/(T*k)) + 18*sp.exp(20.0*J2/(T*k)) + 5*sp.exp((2.0*J1 + 4.0*J2)/(T*k)) + 14*sp.exp((4.0*J1 + 8.0*J2)/(T*k)) + 18*sp.exp((6.0*J1 + 14.0*J2)/(T*k)) + 9*sp.exp((8.0*J1 + 12.0*J2)/(T*k)) + 22*sp.exp((8.0*J1 + 22.0*J2)/(T*k)) + 22*sp.exp((12.0*J1 + 18.0*J2)/(T*k)) + 26*sp.exp((16.0*J1 + 26.0*J2)/(T*k)) + 13*sp.exp((18.0*J1 + 24.0*J2)/(T*k)) + 30*sp.exp((24.0*J1 + 32.0*J2)/(T*k)) + 17*sp.exp((32.0*J1 + 40.0*J2)/(T*k)) + 1))
You can do that with exec().
Code:
def build_function(filename):
with open(filename, 'rU') as f:
eqn = f.read().strip()
exec("def fcn(J1, J2, T, k, g):\n return ({})".format(eqn))
return locals()['fcn']
Test Code:
fcn = build_function('file1')
print(fcn(1, 2, 3, 4, 5))
File1:
J2 + T*k
Results:
14
I am receiving the following error when running a script to parse contents of an XML file.
if iteration.findtext("Iteration_query-def") in ecdict:
KeyError: 'XLOC_000434'
I was under the impression that using "if in dict" would mean that if the key is not found in the dictionary, the script will continue past the if statement and proceed with the rest of the code. Below is the problematic section of the code I am using. I realise this is quite a basic question, but I am unsure what else I can say, and I don't understand why I am receiving this error.
import xml.etree.ElementTree as ET
tree = ET.parse('507.FINAL_14.2.14_2_nr.out_fmt5.out')
blast_iteration = tree.find("BlastOutput_iterations")
for iteration in blast_iteration.findall("Iteration"):
query = iteration.findtext("Iteration_query-def").strip().strip("\n")
if query in score:
continue
if iteration.findtext("Iteration_message") == "No hits found":
if iteration.findtext("Iteration_query-def") in tair:
tairid = tair[iteration.findtext("Iteration_query-def")][0]
tairdes = tair[iteration.findtext("Iteration_query-def")][1]
else:
tairid = "-"
tairdes = "-"
goterms = ""
ecterms = ""
if iteration.findtext("Iteration_query-def") in godict:
for x in godict[iteration.findtext("Iteration_query-def")][:-1]:
goterms = goterms + x + ";"
goterms = goterms + godict[iteration.findtext("Iteration_query-def")][-1]
else:
goterms = "-"
if iteration.findtext("Iteration_query-def") in ecdict:
for x in ecdict[iteration.findtext("Iteration_query-def")][:-1]:
ecterms = ecterms + x + ";"
ecterms = ecterms + ecdict[iteration.findtext("Iteration_query-def")][-1]
else:
ecterms = "-"
if iteration.findtext("Iteration_query-def") in godescr:
desc = godescr[iteration.findtext("Iteration_query-def")]
else:
desc = "-"
n += 1
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
print p
Hope you can help,
Thanks.
edit: I should say that godict and ecdict were previously created as follows - I can submit the entire code if needs be:
godict = {}
ecdict = {}
godescr = {}
f = open("507.FINAL_14.2.14_2_nr.out_fmt5.out.annot")
for line in f:
line = line.split("\t")
if len(line) > 2:
godescr[line[0]] = line[2]
line[1] = line[1].strip("\n")
if line[1].startswith("EC"):
if line[0] in ecdict:
a = ecdict[line[0]]
a.append(line[1])
ecdict[line[0]] = a
else:
ecdict[line[0]] = [line[1]]
else:
if line[0] in godict:
a = godict[line[0]]
a.append(line[1])
godict[line[0]] = a
else:
godict[line[0]] = [line[1]]
Traceback:
Traceback (most recent call last):
File "2d.test.py", line 170, in <module>
p = "PvOAK_up"+str(n) + "\t" + tranlen[iteration.findtext("Iteration_query-def")] + "\t" + orflen[iteration.findtext("Iteration_query-def")] + "\t" + "-" + "\t" + "-" + "\t" + tairid + "\t" + tairdes + "\t" + goterms + "\t" + ecterms + "\t" + desc + "\t" + str(flower[query][2]) + "\t" + str('{0:.2e}'.format(float(flower[query][1]))) + "\t" + str('{0:.2f}'.format(float(flower[query][0]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][2]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][1]))) + "\t" + str('{0:.2f}'.format(float(leaf[query][0])))
KeyError: 'XLOC_000434'