import urllib.request
import json
from collections import Counter
def count_coauthors(author_id):
coauthors_dict = {}
url_str = ('https://api.semanticscholar.org/graph/v1/author/47490276?fields=name,papers.authors')
respons = urllib.request.urlopen(url_str)
text = respons.read().decode()
for line in respons:
print(line.decode().rstip())
data = json.loads(text)
print(type(data))
print(list(data.keys()))
print(data["name"])
print(data["authorId"])
name = []
for lines in data["papers"]:
for authors in lines["authors"]:
name.append(authors.get("name"))
print(name)
count = dict()
names = name
for i in names:
if i not in count:
count[i] = 1
else:
count[i] += 1
print(count)
c = Counter(count)
top = c.most_common(10)
print(top)
return coauthors_dict
author_id = '47490276'
cc = count_coauthors(author_id)
top_coauthors = sorted(cc.items(), key=lambda item: item[1], reverse=True)
for co_author in top_coauthors[:10]:
print(co_author)
This is how my code looks this far, there are no error. I need to get rid of the rest of the text when I run it, so it should look like this:
('Diego Calvanese', 47)
('D. Lanti', 28)
('Martín Rezk', 21)
('Elem Güzel Kalayci', 18)
('B. Cogrel', 17)
('E. Botoeva', 16)
('E. Kharlamov', 16)
('I. Horrocks', 12)
('S. Brandt', 11)
('V. Ryzhikov', 11)
I have tried using rstrip and split on my 'c' variable but it doesn't work. Im only allowed importing what I already have imported and must use the link which is included.
Tips on simplifying or bettering the code is also appreciated!
("Extend the program below so that it prints the names of the top-10 coauthors together with the numbers of the coauthored publications")
From what I understand you are not quite sure where your successful output originates from. It is not the 5 lines at the end.
Your result is printed by the print(top) on line 39. This top variable is what you want to return from the function, as the coauthors_dict you are currently returning never actually gets any data written to it.
You will also have to slightly adjust your sorted(...) as you now have a list and not a dictionary, but you should then get the correct result.
If I understand correctly you are wanting this function to return a count of each distinct co-author (excluding the author), which it seems like you already have in your count variable, which you don't return. The variable you DO return is empty.
Instead consider:
import urllib.request
import json
from collections import Counter
def count_coauthors(author_id):
url_str = (f'https://api.semanticscholar.org/graph/v1/author/{author_id}?fields=name,papers.authors')
response = urllib.request.urlopen(url_str)
text = response.read().decode()
data = json.loads(text)
names = [a.get("name") for l in data["papers"] for a in l["authors"] if a['authorId'] != author_id]
#The statement above can be written long-hand like:
#names=[]
#for l in data["papers"]:
# for a in l["authors"]:
# if a['authorId'] != author_id:
# names.append(a.get("name"))
return list(Counter(names).items())
author_id = '47490276'
cc = count_coauthors(author_id)
top_coauthors = sorted(cc, key=lambda item: item[1], reverse=True)
for co_author in top_coauthors[:10]:
print(co_author)
('Diego Calvanese', 47)
('D. Lanti', 28)
('Martín Rezk', 21)
('Elem Güzel Kalayci', 18)
('B. Cogrel', 17)
('E. Botoeva', 16)
('E. Kharlamov', 16)
('I. Horrocks', 12)
('S. Brandt', 11)
('V. Ryzhikov', 11)
You might also consider moving the top N logic into the function as an optional paramter:
import urllib.request
import json
from collections import Counter
def count_coauthors(author_id, top=0):
url_str = (f'https://api.semanticscholar.org/graph/v1/author/{author_id}?fields=name,papers.authors')
response = urllib.request.urlopen(url_str)
text = response.read().decode()
data = json.loads(text)
names = [a.get("name") for l in data["papers"] for a in l["authors"] if a['authorId'] != author_id]
name_count = list(Counter(names).items())
top = top if top!=0 else len(name_count)
return sorted(name_count, key=lambda x: x[1], reverse=True)[:top]
author_id = '47490276'
for auth in count_coauthors(author_id, top=10):
print(auth)
Im making a thing for school , and i have no idea how to loop through user keys , my test json is
{
"Users":{
"admins":[{"Username":"Showierdata9978","id":4}],
"Mods":[{"Username":"sssss","id":5}],
"normal":[{"username":"ssaaa","id":7},{"username":"wwdaw","id":78,{"username":"wadwass","id":9}]
},
"Data":{
}
}
What this code is for is a json data saving structure , but i have no idea what future someone testing it will input into the key
current actual code is
def read(self,io,itemid = None):
if type(itemid) == type(None):
if exists(io):
if pathlib.Path(io).suffix == ".json":
with open(io) as f :
if f.read(1).__str__() == '{':
dict1 = j.load(f)
for key in dict1:
if dict1[key] has keys: #added after for what i want to do
loop_again()
else :
raise NotJsonFormat
else:
raise NotJsonFile
else:
raise FileDoesNotExist
in the writing to the file side of it i have this odd code that is writing python to a file thanks to another stack overflow post
self.picode = self.GeneratePyCode(Data, io)
open(
"DataSaver/DataSaver/CodeGenerator/CustumWrite.py",
'w').write("")
with open(
"DataSaver/DataSaver/CodeGenerator/CustumWrite.py",
'a') as f:
for NL in self.picode:
f.write(f"{NL}")
from .CodeGenerator.CustumWrite import Write
Write()
def GeneratePyCode(self, data, fp):
g = gen()
key = ""
for a in self.KeyStructure:
key = f'{a}{key}'
g += 'import json\n'
g += 'def Write():\n'
g += f'\tdict1 = dict(json.load(open("{fp}","r")))'
g += f'\n\tdict1{key}.update(\n\t\t{data}\n\t\t)\n'
g += f"\tf = open('{fp}','w')\n"
g += '\tjson.dump(dict1,f)\n'
I have a list of data points that contains a measurement every 5 minutes for 24 hours. I need to create a new list with the average of that measurement for each hour in the list. What's the best way to accomplish that?
Date Amount
2015-03-14T00:00:00.000-04:00 12545.869
2015-03-14T00:05:00.000-04:00 12467.326
2015-03-14T00:10:00.000-04:00 12416.948
2015-03-14T00:15:00.000-04:00 12315.698
2015-03-14T00:20:00.000-04:00 12276.38
2015-03-14T00:25:00.000-04:00 12498.696
2015-03-14T00:30:00.000-04:00 12426.145
2015-03-14T00:35:00.000-04:00 12368.659
2015-03-14T00:40:00.000-04:00 12322.785
2015-03-14T00:45:00.000-04:00 12292.719
2015-03-14T00:50:00.000-04:00 12257.965
2015-03-14T00:55:00.000-04:00 12221.375
2015-03-14T01:00:00.000-04:00 12393.725
2015-03-14T01:05:00.000-04:00 12366.674
2015-03-14T01:10:00.000-04:00 12378.578
2015-03-14T01:15:00.000-04:00 12340.754
2015-03-14T01:20:00.000-04:00 12288.511
2015-03-14T01:25:00.000-04:00 12266.136
2015-03-14T01:30:00.000-04:00 12236.639
2015-03-14T01:35:00.000-04:00 12181.668
2015-03-14T01:40:00.000-04:00 12171.992
2015-03-14T01:45:00.000-04:00 12164.298
2015-03-14T01:50:00.000-04:00 12137.282
2015-03-14T01:55:00.000-04:00 12116.486
2015-03-14T02:00:02.000-04:00 12090.439
2015-03-14T02:05:00.000-04:00 12085.924
2015-03-14T02:10:00.000-04:00 12034.78
2015-03-14T02:15:00.000-04:00 12037.367
2015-03-14T02:20:00.000-04:00 12006.649
2015-03-14T02:25:00.000-04:00 11985.588
2015-03-14T02:30:00.000-04:00 11999.41
2015-03-14T02:35:00.000-04:00 11943.121
2015-03-14T02:40:00.000-04:00 11934.346
2015-03-14T02:45:00.000-04:00 11928.568
2015-03-14T02:50:00.000-04:00 11918.63
2015-03-14T02:55:00.000-04:00 11885.698
2015-03-14T03:00:00.000-04:00 11863.065
2015-03-14T03:05:00.000-04:00 11883.256
2015-03-14T03:10:00.000-04:00 11870.095
2015-03-14T03:15:00.000-04:00 11849.104
2015-03-14T03:20:00.000-04:00 11849.18
2015-03-14T03:25:00.000-04:00 11834.229
2015-03-14T03:30:00.000-04:00 11826.603
2015-03-14T03:35:00.000-04:00 11823.516
2015-03-14T03:40:00.000-04:00 11849.386
2015-03-14T03:45:00.000-04:00 11832.385
2015-03-14T03:50:00.000-04:00 11847.059
2015-03-14T03:55:00.000-04:00 11831.807
2015-03-14T04:00:00.000-04:00 11844.027
2015-03-14T04:05:00.000-04:00 11873.114
2015-03-14T04:10:00.000-04:00 11904.105
2015-03-14T04:15:00.000-04:00 11879.018
2015-03-14T04:20:00.000-04:00 11899.658
2015-03-14T04:25:00.000-04:00 11887.808
2015-03-14T04:30:00.000-04:00 11879.875
2015-03-14T04:35:00.000-04:00 11924.149
2015-03-14T04:40:00.000-04:00 11929.499
2015-03-14T04:45:00.000-04:00 11932.086
2015-03-14T04:50:00.000-04:00 11989.847
2015-03-14T04:55:00.000-04:00 12000.971
This is a beautiful use of itertools.groupby because you can actually take advantage of the generators it returns instead of instantly making them lists or something:
import itertools, pprint
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
d[key] = sum(v for (d,v) in gen)
pprint.pprint(d)
And for average instead of sum:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
#d[key] = sum(v for (d,v) in gen)
d[key] = avg(v for (d,v) in gen)
pprint.pprint(d)
Output:
{0: 148410.565,
1: 147042.743,
2: 143850.52000000002,
3: 142159.685,
4: 142944.15699999998}
Where the key of the dictionary ([0,1,2,3,4]) corresponds to the hour of the timestamp.
Input:
lst = [
['2015-03-14T00:00:00.000-04:00', 12545.869 ],
['2015-03-14T00:05:00.000-04:00', 12467.326],
['2015-03-14T00:10:00.000-04:00', 12416.948],
['2015-03-14T00:15:00.000-04:00', 12315.698],
['2015-03-14T00:20:00.000-04:00', 12276.38],
['2015-03-14T00:25:00.000-04:00', 12498.696],
['2015-03-14T00:30:00.000-04:00', 12426.145],
['2015-03-14T00:35:00.000-04:00', 12368.659],
['2015-03-14T00:40:00.000-04:00', 12322.785],
['2015-03-14T00:45:00.000-04:00', 12292.719],
['2015-03-14T00:50:00.000-04:00', 12257.965],
['2015-03-14T00:55:00.000-04:00', 12221.375],
['2015-03-14T01:00:00.000-04:00', 12393.725],
['2015-03-14T01:05:00.000-04:00', 12366.674],
['2015-03-14T01:10:00.000-04:00', 12378.578],
['2015-03-14T01:15:00.000-04:00', 12340.754],
['2015-03-14T01:20:00.000-04:00', 12288.511],
['2015-03-14T01:25:00.000-04:00', 12266.136],
['2015-03-14T01:30:00.000-04:00', 12236.639],
['2015-03-14T01:35:00.000-04:00', 12181.668],
['2015-03-14T01:40:00.000-04:00', 12171.992],
['2015-03-14T01:45:00.000-04:00', 12164.298],
['2015-03-14T01:50:00.000-04:00', 12137.282],
['2015-03-14T01:55:00.000-04:00', 12116.486],
['2015-03-14T02:00:02.000-04:00', 12090.439],
['2015-03-14T02:05:00.000-04:00', 12085.924],
['2015-03-14T02:10:00.000-04:00', 12034.78],
['2015-03-14T02:15:00.000-04:00', 12037.367],
['2015-03-14T02:20:00.000-04:00', 12006.649],
['2015-03-14T02:25:00.000-04:00', 11985.588],
['2015-03-14T02:30:00.000-04:00', 11999.41],
['2015-03-14T02:35:00.000-04:00', 11943.121],
['2015-03-14T02:40:00.000-04:00', 11934.346],
['2015-03-14T02:45:00.000-04:00', 11928.568],
['2015-03-14T02:50:00.000-04:00', 11918.63],
['2015-03-14T02:55:00.000-04:00', 11885.698],
['2015-03-14T03:00:00.000-04:00', 11863.065],
['2015-03-14T03:05:00.000-04:00', 11883.256],
['2015-03-14T03:10:00.000-04:00', 11870.095],
['2015-03-14T03:15:00.000-04:00', 11849.104],
['2015-03-14T03:20:00.000-04:00', 11849.18],
['2015-03-14T03:25:00.000-04:00', 11834.229],
['2015-03-14T03:30:00.000-04:00', 11826.603],
['2015-03-14T03:35:00.000-04:00', 11823.516],
['2015-03-14T03:40:00.000-04:00', 11849.386],
['2015-03-14T03:45:00.000-04:00', 11832.385],
['2015-03-14T03:50:00.000-04:00', 11847.059],
['2015-03-14T03:55:00.000-04:00', 11831.807],
['2015-03-14T04:00:00.000-04:00', 11844.027],
['2015-03-14T04:05:00.000-04:00', 11873.114],
['2015-03-14T04:10:00.000-04:00', 11904.105],
['2015-03-14T04:15:00.000-04:00', 11879.018],
['2015-03-14T04:20:00.000-04:00', 11899.658],
['2015-03-14T04:25:00.000-04:00', 11887.808],
['2015-03-14T04:30:00.000-04:00', 11879.875],
['2015-03-14T04:35:00.000-04:00', 11924.149],
['2015-03-14T04:40:00.000-04:00', 11929.499],
['2015-03-14T04:45:00.000-04:00', 11932.086],
['2015-03-14T04:50:00.000-04:00', 11989.847],
['2015-03-14T04:55:00.000-04:00', 12000.971],
]
Edit: per discussion in comments, what about:
import itertools, pprint
def avg(gf):
_sum = 0
for (i,e) in enumerate(gf): _sum += e
return float(_sum) / (i+1)
d = {}
for (key,gen) in itertools.groupby(lst, key=lambda l: int(l[0][11:13])):
vals = list(gen) # Unpack generator
key = vals[0][0][:13]
d[key] = avg(v for (d,v) in vals)
pprint.pprint(d)
You can do this pretty easily using a variety of tools, but I'll use a simple loop for simplicity sake:
>>> with open("listfile.txt", "r") as e:
>>> list_ = e.read().splitlines()
>>> list_ = list_[1:] # Grab all but the first line
>>>
>>> dateValue = dict()
>>> for row in list_:
>>> date, value - row.split()
>>> if ":00:" in date:
>>> # Start new value
>>> amount = int(value)
>>>
>>> elif ":55:" in date:
>>> # End new value
>>> date = date.split(':') # Grab only date and hour info
>>> dateValue[date] = amount / 12. # Returns a float, remove the period to return an integer
>>> del amount # Just in case the data isn't uniform, so it raises an error
>>>
>>> else:
>>> date += int(value)
If you want to export it to lists, just do:
>>> listDate = list()
>>> listAmount = list()
>>> for k in sorted(dateValue.keys() ):
>>> v = dateValue.get(k)
>>>
>>> listDate.append(k)
>>> listAmount.append(v)
quick and dirty way
reads= [
'2015-03-14T00:00:00.000-04:00 12545.869',
'2015-03-14T00:05:00.000-04:00 12467.326',
'2015-03-14T00:10:00.000-04:00 12416.948',
'2015-03-14T00:15:00.000-04:00 12315.698',
'2015-03-14T00:20:00.000-04:00 12276.38',
'2015-03-14T00:25:00.000-04:00 12498.696',
'2015-03-14T00:30:00.000-04:00 12426.145',
'2015-03-14T00:35:00.000-04:00 12368.659',
'2015-03-14T00:40:00.000-04:00 12322.785',
'2015-03-14T00:45:00.000-04:00 12292.719',
'2015-03-14T00:50:00.000-04:00 12257.965',
'2015-03-14T00:55:00.000-04:00 12221.375',
'2015-03-14T01:00:00.000-04:00 12393.725',
'2015-03-14T01:05:00.000-04:00 12366.674',
'2015-03-14T01:10:00.000-04:00 12378.578',
'2015-03-14T01:15:00.000-04:00 12340.754',
'2015-03-14T01:20:00.000-04:00 12288.511',
'2015-03-14T01:25:00.000-04:00 12266.136',
'2015-03-14T01:30:00.000-04:00 12236.639',
'2015-03-14T01:35:00.000-04:00 12181.668',
'2015-03-14T01:40:00.000-04:00 12171.992',
'2015-03-14T01:45:00.000-04:00 12164.298',
'2015-03-14T01:50:00.000-04:00 12137.282',
'2015-03-14T01:55:00.000-04:00 12116.486'
]
sums = {}
for read in reads:
hour = read.split(':')[0]
value = float(read.split().pop())
if hour in sums:
sums[hour] += value
else:
sums[hour] = value
avg = {}
for s in sums:
avg[s] = sums[s]/12
print avg
I have been trying to use a list in Python. I call it once to store a value
inside of one loop. I then call it again inside of another loop, but by the
time I add something to the list, it has overwritten my old entries with the
new ones, before I ever actually add the new one. It may be that I do not fully
understand Python, but I will post a simple version of the code below, then the
whole version:
ret=[]
plan=argument
for i in range(x):
plan.changeY
ret.append(plan)
plan=argument
for i in range(Z):
plan.changeD
ret.append(plan)
plan=argument
The problem arises before I get to the second append: all the values of the
first append are altered. The code for it is below.
global PLANCOUNTER
global VARNUM
ret=[]
ret=[]
plan = node.state
keep = plan
if printPlans:
print "fringe[0].state:",plan.id, "---"
printstate(plan)
if node.parent:
print "parent: plan",node.parent.state.id
if len(plan.openconds)>0:
print plan.openconds[0],"is the condition being resolved\n"
openStep = plan.openconds[0][1]#access the step
openStep1=openStep
openCond = plan.openconds[0][0]
plan.openconds = plan.openconds[1:]
keep=plan
if(len(plan.steps)>1):#has init and goal!
########################
#NOT GETTING RID OF THE OPENCOND, SO ASTAR NEVER TAKING IT
#######################
if openStep!="init" and openStep!="goal":
val = openStep.index("*")
openStep=openStep[:val]
numPreConds=len(preconds[openStep])
numStep=len(plan.steps)
for i in plan.steps:
i2=i
plan = keep
if i!="init" and i!="goal":
i=i[:i.index("*")]
if i !="goal" and i!=openStep:
for j in adds[i]:
bool=0
if j==openCond:
plan.causallinks.append((i2,openCond,openStep1))#problem
plan.ordercons.append((i2,openStep1))
PLANCOUNTER+=1
plan.id=PLANCOUNTER
#threats
bol=0
for t in plan.steps:#all steps
t2=t
if t!="init" and t!="goal":
val = t.index("*")
t=t[:val]
for k in deletes[t]:
if k == openCond:
for b in plan.ordercons:
if b ==(t,i):
bol=1
if bol==0 and t!=i:
for v in plan.threats:
if v[0]==(i2,openCond,openStep1) and v[1]==t2:
bol=1
if bol==0 and t!=i and i2!="init":
plan.threats.append(((i2,openCond,openStep1),t2))
else:
bol=0
ret.append(plan)
print len(plan.openconds)+len(plan.threats)," upper\n"
plan=keep
meh=ret
counter=0
arr={}
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"middle"
key = counter
arr[counter]=k
print arr[counter].id
counter+=1
for i in adds:#too many conditions
stepCons = i
plan2 = keep
if i!="goal" and i!="init" and i!=openStep:
for j in adds[i]:
if j==openCond:
nextStep=i
st = str(i)+"*"+str(VARNUM)
VARNUM+=1
plan2.steps.append(st)
plan2.ordercons.append(("init",st))
plan2.ordercons.append((st, "goal"))
plan2.ordercons.append((st,openStep1))
plan2.causallinks.append((st,openCond,openStep1))
##################################################
for k in preconds[i]:#issue is htereeeeeeeee
plan2.openconds.append((k,st))
for k in meh:
print len(k.openconds)+len(k.threats)," ", k.id,"middle2s"
PLANCOUNTER+=1
plan2.id=PLANCOUNTER
#threats
cnt=0
for tr in range(len(arr)):
print len(arr[cnt].openconds)+len(arr[cnt].threats)," ", arr[cnt].id,"middlearr"
cnt+=1
bol=0
for t in plan2.steps:#all steps
t2=t
if t!="init" and t!="goal":
val = t.index("*")
t=t[:val]
for k in deletes[t]:#check their delete list
if k == openCond:#if our condition is on our delete lise
for b in plan2.ordercons:
if b ==(t,i):# and it is not ordered before it
bol=1
if bol==0 and t!=i:
for v in plan2.threats:
if v[0]==(i2,openCond,openStep1) and v[1]==t2:
bol=1
if bol==0 and t!=i and st!="init":
plan2.threats.append(((st,openCond,openStep1),t2))
else:
bol=0
#and y is not before C
#causal link, threatening step
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"middle3"
ret.append(plan2)
print len(plan2.openconds)+len(plan2.threats)," ",plan2.id," lower\n"
elif len(plan.threats)>0:
#keep=plan
openThreatProducer = plan.threats[0][0][0]#access the step
openThreat=plan.threats[0][1]
plan.threats=plan.threats[1:]
print openThreatProducer, " ", openThreat
i=0
while i<2:
plan = keep
if i==0:
bool=0
for k in plan.ordercons:
if (k[0]==openThreat and k[1]==openThreatProducer) or (k[1]==openThreat and k[0]==openThreatProducer):
bool=1
if bool==0:
plan.ordercons.append((openThreatProducer,openThreat))
elif i==1:
bool=0
for k in plan.ordercons:
if (k[0]==openThreat and k[1]==openThreatProducer) or (k[1]==openThreat and k[0]==openThreatProducer):
bool=1
if bool==0:
plan.ordercons.append((openThreat,openThreatProducer))
ret.append(plan)
i+=1
t=len(ret)
for k in ret:
print len(k.openconds)+len(k.threats)," ", k.id,"lowest"
print t
return ret
It seems like after doing plan=argument, plan and argument are pointing to same location. you should do something like this
import copy
plan = copy.deepcopy(argument)
This would create a exact copy of argument.
I have a for loop which references a dictionary and prints out the value associated with the key. Code is below:
for i in data:
if i in dict:
print dict[i],
How would i format the output so a new line is created every 60 characters? and with the character count along the side for example:
0001
MRQLLLISDLDNTWVGDQQALEHLQEYLGDRRGNFYLAYATGRSYHSARELQKQVGLMEP
0061
DYWLTAVGSEIYHPEGLDQHWADYLSEHWQRDILQAIADGFEALKPQSPLEQNPWKISYH
0121 LDPQACPTVIDQLTEMLKETGIPVQVIFSSGKDVDLLPQRSNKGNATQYLQQHLAMEPSQ
It's a finicky formatting problem, but I think the following code:
import sys
class EveryN(object):
def __init__(self, n, outs):
self.n = n # chars/line
self.outs = outs # output stream
self.numo = 1 # next tag to write
self.tll = 0 # tot chars on this line
def write(self, s):
while True:
if self.tll == 0: # start of line: emit tag
self.outs.write('%4.4d ' % self.numo)
self.numo += self.n
# wite up to N chars/line, no more
numw = min(len(s), self.n - self.tll)
self.outs.write(s[:numw])
self.tll += numw
if self.tll >= self.n:
self.tll = 0
self.outs.write('\n')
s = s[numw:]
if not s: break
if __name__ == '__main__':
sys.stdout = EveryN(60, sys.stdout)
for i, a in enumerate('abcdefgh'):
print a*(5+ i*5),
shows how to do it -- the output when running for demonstration purposes as the main script (five a's, ten b's, etc, with spaces in-between) is:
0001 aaaaa bbbbbbbbbb ccccccccccccccc dddddddddddddddddddd eeeeee
0061 eeeeeeeeeeeeeeeeeee ffffffffffffffffffffffffffffff ggggggggg
0121 gggggggggggggggggggggggggg hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
0181 hhhhhhh
# test data
data = range(10)
the_dict = dict((i, str(i)*200) for i in range( 10 ))
# your loops as a generator
lines = ( the_dict[i] for i in data if i in the_dict )
def format( line ):
def splitter():
k = 0
while True:
r = line[k:k+60] # take a 60 char block
if r: # if there are any chars left
yield "%04d %s" % (k+1, r) # format them
else:
break
k += 60
return '\n'.join(splitter()) # join all the numbered blocks
for line in lines:
print format(line)
I haven't tested it on actual data, but I believe the code below would do the job. It first builds up the whole string, then outputs it a 60-character line at a time. It uses the three-argument version of range() to count by 60.
s = ''.join(dict[i] for i in data if i in dict)
for i in range(0, len(s), 60):
print '%04d %s' % (i+1, s[i:i+60])
It seems like you're looking for textwrap
The textwrap module provides two convenience functions, wrap() and
fill(), as well as TextWrapper, the class that does all the work, and
a utility function dedent(). If you’re just wrapping or filling one or
two text strings, the convenience functions should be good enough;
otherwise, you should use an instance of TextWrapper for efficiency.