How to get rid of the rest of the text after getting the results I want? - python

import urllib.request
import json
from collections import Counter
def count_coauthors(author_id):
coauthors_dict = {}
url_str = ('https://api.semanticscholar.org/graph/v1/author/47490276?fields=name,papers.authors')
respons = urllib.request.urlopen(url_str)
text = respons.read().decode()
for line in respons:
print(line.decode().rstip())
data = json.loads(text)
print(type(data))
print(list(data.keys()))
print(data["name"])
print(data["authorId"])
name = []
for lines in data["papers"]:
for authors in lines["authors"]:
name.append(authors.get("name"))
print(name)
count = dict()
names = name
for i in names:
if i not in count:
count[i] = 1
else:
count[i] += 1
print(count)
c = Counter(count)
top = c.most_common(10)
print(top)
return coauthors_dict
author_id = '47490276'
cc = count_coauthors(author_id)
top_coauthors = sorted(cc.items(), key=lambda item: item[1], reverse=True)
for co_author in top_coauthors[:10]:
print(co_author)
This is how my code looks this far, there are no error. I need to get rid of the rest of the text when I run it, so it should look like this:
('Diego Calvanese', 47)
('D. Lanti', 28)
('Martín Rezk', 21)
('Elem Güzel Kalayci', 18)
('B. Cogrel', 17)
('E. Botoeva', 16)
('E. Kharlamov', 16)
('I. Horrocks', 12)
('S. Brandt', 11)
('V. Ryzhikov', 11)
I have tried using rstrip and split on my 'c' variable but it doesn't work. Im only allowed importing what I already have imported and must use the link which is included.
Tips on simplifying or bettering the code is also appreciated!
("Extend the program below so that it prints the names of the top-10 coauthors together with the numbers of the coauthored publications")

From what I understand you are not quite sure where your successful output originates from. It is not the 5 lines at the end.
Your result is printed by the print(top) on line 39. This top variable is what you want to return from the function, as the coauthors_dict you are currently returning never actually gets any data written to it.
You will also have to slightly adjust your sorted(...) as you now have a list and not a dictionary, but you should then get the correct result.

If I understand correctly you are wanting this function to return a count of each distinct co-author (excluding the author), which it seems like you already have in your count variable, which you don't return. The variable you DO return is empty.
Instead consider:
import urllib.request
import json
from collections import Counter
def count_coauthors(author_id):
url_str = (f'https://api.semanticscholar.org/graph/v1/author/{author_id}?fields=name,papers.authors')
response = urllib.request.urlopen(url_str)
text = response.read().decode()
data = json.loads(text)
names = [a.get("name") for l in data["papers"] for a in l["authors"] if a['authorId'] != author_id]
#The statement above can be written long-hand like:
#names=[]
#for l in data["papers"]:
# for a in l["authors"]:
# if a['authorId'] != author_id:
# names.append(a.get("name"))
return list(Counter(names).items())
author_id = '47490276'
cc = count_coauthors(author_id)
top_coauthors = sorted(cc, key=lambda item: item[1], reverse=True)
for co_author in top_coauthors[:10]:
print(co_author)
('Diego Calvanese', 47)
('D. Lanti', 28)
('Martín Rezk', 21)
('Elem Güzel Kalayci', 18)
('B. Cogrel', 17)
('E. Botoeva', 16)
('E. Kharlamov', 16)
('I. Horrocks', 12)
('S. Brandt', 11)
('V. Ryzhikov', 11)
You might also consider moving the top N logic into the function as an optional paramter:
import urllib.request
import json
from collections import Counter
def count_coauthors(author_id, top=0):
url_str = (f'https://api.semanticscholar.org/graph/v1/author/{author_id}?fields=name,papers.authors')
response = urllib.request.urlopen(url_str)
text = response.read().decode()
data = json.loads(text)
names = [a.get("name") for l in data["papers"] for a in l["authors"] if a['authorId'] != author_id]
name_count = list(Counter(names).items())
top = top if top!=0 else len(name_count)
return sorted(name_count, key=lambda x: x[1], reverse=True)[:top]
author_id = '47490276'
for auth in count_coauthors(author_id, top=10):
print(auth)

Related

Is there a way to retrieve the number of citations per year of an article using the inspirehep website?

I have a database of scientific articles with their authors, the date of publication (on arXiV) and their respective arXiV id. Now, I want to add to this database the number of citations each year after the article has been created.
For instance, I would to like to retrieve the graph on the right hand side (example).
Is there an API that could help me?
I could use this method here opencitationAPI, but I wondered if there was a more straightforward way using the inspirehep data.
I figured out how to do that by using the inspirehep api. A sleeping time should also be considered.
import pandas as pd
import requests
from collections import defaultdict
ihep_search_arxiv = "https://inspirehep.net/api/arxiv/"
ihep_search_article = "https://inspirehep.net/api/literature?sort=mostcited&size=100&page=1&q=refersto%3Arecid%3A"
year = [str(x+1) for x in range(2009,2022)]
def count_year(year, input_list):
#counting the number of citations each year
year_count = {}
for y in year:
if input_list[0] == 'NaN':
year_count[y] = 0
else:
year_count[y] = input_list.count(y)
return year_count
def get_cnumber():
citation_url = []
for id in arxiv_id:
inspirehep_url_arxiv = f"{ihep_search_arxiv}{id}"
control_number = requests.get(inspirehep_url_arxiv).json()["metadata"]["control_number"]
citation_url.append(f"{ihep_search_article}{control_number}")
return citation_url
def get_citations():
citation_url = get_cnumber()
citation_per_year = pd.DataFrame(columns=year)
citation_date = defaultdict(list)
for i, url in enumerate(citation_url):
data_article = requests.get(url).json()
if len(data_article["hits"]["hits"]) == 0:
citation_date[i].append('NaN')
else :
for j, _ in enumerate(data_article["hits"]["hits"]):
citation_date[i].append(data_article["hits"]["hits"][j]["created"][:4])
for p, _ in enumerate(citation_date):
citation_per_year = citation_per_year.append(count_year(year,citation_date[p]), True)
citation_per_year.insert(0,"arxiv_id",arxiv_id,True)
return citation_per_year
arxiv_id = recollect_data() #list of arxiv ids collected in a separate way
print(get_citations())

Attempting to Traverse a search API Collecting results in Python 3.9

I am attempting to yield all results from a search api.
There are 3 key cases:
Case 1: There are 0 results yielded
Case 2: There are 1-9 results yielded
Case 3: There are 10 results yielded
In both cases 1 and 2, we can conclude that there are no results deeper. i.e if we searched for "iiiiia" and it yields 0 then that means there are no results for "iiiiia" and no results deeper than "iiiiia". If "iiiiia" yields 1-9 results, then we can concluded "iiiiia" yields 109 results and no results deeper.
In case 3, if we search for "iiiiia" and it yields 10, we can conclude that "iiiiia" has 10 results, and there may or may not be results deeper as 10 means 10+ results possible. In this case we would have to move one layer down the chain and traverse through those results. i.e "iiiiia0"-"iiiiiaz" and if any of those yield 10 we would also have to go one layer deeper.
an example of how this may look:
a
a0
a00
a000
a0000
a0001
...
a000z
a001
a002
a003
a0030
...
a0034
a00340
...
a0034z
a0035...
Here is my attempted code:
import json
from time import sleep
import urllib3
import requests
import re
import random
import sys
import numpy
# Saving the reference of the standard output
original_stdout = sys.stdout
header = {}
link = '/search?query='
http = requests.session()
fileName = '/output.txt'
hasCompleted = 0
number = '11'
def search(target):
global targetList
global link
global header
global http
resp = http.request('GET',link+target,headers=header)
match = re.findall('"key":"(.+?)","',resp.text)
if len(match) == 10:
return False
if len(match) == 0:
return " "
elif len(match) < 10:
return resp.text
def treeSearch():
global fileName
global number
global hasCompleted
if hasCompleted == 1:
new = (int(int(number, 36) / 36) + 1)
new = numpy.base_repr(new, 36)
number = new
hasCompleted = 0
if hasCompleted == 0:
x = 0
new = int(int(number, 36)*36)
new = numpy.base_repr(new, 36)
number = new
while x < 37:
new = int(int(number, 36) + 1)
new = numpy.base_repr(new, 36)
number = new
result = search(number)
print(number)
if result:
with open(fileName, 'a+') as f:
sys.stdout = f
print(result)
sys.stdout = original_stdout
x = x + 1
else:
treeSearch()
temp = number
number = (int(int(number, 36) / 36) + 1) #maybe not + 1
print(number)
number = numpy.base_repr(number, 36)
print(number)
result = search(number)
if not result == " ":
new = int(int(number, 36)*36)
new = numpy.base_repr(new, 36)
number = new
hasCompleted = 1
treeSearch()
Here is my output:
111
1111
11111
111111
1111111
11111111
111111111
1111111111
11111111111
111111111111
1111111111111
11111111111111
111111111111111
1111111111111111
1111111111111112
1111111111111113
1111111111111114
1111111111111115
1111111111111116
1111111111111117
1111111111111118
1111111111111119
111111111111111A
111111111111111B
111111111111111C
111111111111111D
111111111111111E
111111111111111F
111111111111111G
111111111111111H
111111111111111I
111111111111111J
111111111111111K
111111111111111L
111111111111111M
111111111111111N
111111111111111O
111111111111111P
111111111111111Q
111111111111111R
111111111111111S
111111111111111T
111111111111111U
111111111111111V
111111111111111W
111111111111111X
111111111111111Y
111111111111111Z
1111111111111120
1111111111111121
6316397706306666889217
11111111110QR5T
11111111110QR5U
11111111110QR5V
11111111110QR5W
11111111110QR5X
11111111110QR5Y
11111111110QR5Z
11111111110QR60
11111111110QR61
11111111110QR62
11111111110QR63
11111111110QR64
11111111110QR65
11111111110QR66
11111111110QR67
11111111110QR68
11111111110QR69
11111111110QR6A
11111111110QR6B
11111111110QR6C
11111111110QR6D
11111111110QR6E
11111111110QR6F
11111111110QR6G
11111111110QR6H
11111111110QR6I
11111111110QR6J
11111111110QR6K
11111111110QR6L
11111111110QR6M
11111111110QR6N
11111111110QR6O
11111111110QR6P
11111111110QR6Q
11111111110QR6R
11111111110QR6S
11111111110QR6T
11111111110QR6U
175455491841851850753
11111111110L4X
11111111110L4Y
11111111110L4Z
11111111110L50
11111111110L51
11111111110L52
11111111110L53
11111111110L54
11111111110L55
11111111110L56
11111111110L57
11111111110L58
11111111110L59
11111111110L5A
11111111110L5B
11111111110L5C
11111111110L5D
11111111110L5E
11111111110L5F
11111111110L5G
11111111110L5H
11111111110L5I
11111111110L5J
11111111110L5K
11111111110L5L
11111111110L5M
11111111110L5N
11111111110L5O
11111111110L5P
11111111110L5Q
11111111110L5R
11111111110L5S
11111111110L5T
11111111110L5U
11111111110L5V
11111111110L5W
11111111110L5X
11111111110L5Y
4873763662273662977
11111111110XT
11111111110XU
11111111110XV
11111111110XW
11111111110XX
11111111110XY
11111111110XZ
11111111110Y0
11111111110Y1
11111111110Y2
11111111110Y3
11111111110Y4
11111111110Y5
11111111110Y6
11111111110Y7
11111111110Y8
11111111110Y9
11111111110YA
11111111110YB
11111111110YC
11111111110YD
11111111110YE
11111111110YF
11111111110YG
11111111110YH
11111111110YI
11111111110YJ
11111111110YK
11111111110YL
11111111110YM
11111111110YN
11111111110YO
11111111110YP
11111111110YQ
11111111110YR
11111111110YS
11111111110YT
11111111110YU
135382323952046193
11111111110X
11111111110Y
11111111110Z
111111111110
111111111111
111111111121
111111111122
111111111123
111111111124
111111111125
111111111126
111111111127
111111111128
111111111129
11111111112A
11111111112B
11111111112C
11111111112D
11111111112E
11111111112F
11111111112G
11111111112H
11111111112I
11111111112J
11111111112K
11111111112L
11111111112M
11111111112N
11111111112O
11111111112P
11111111112Q
11111111112R
11111111112S
11111111112T
11111111112U
11111111112V
11111111112W
11111111112X
11111111112Y
11111111112Z
111111111130
111111111131
3760620109779064
11111111114
111111111141
111111111142
111111111143
111111111144
111111111145
111111111146
111111111147
111111111148
111111111149
11111111114A
11111111114B
11111111114C
11111111114D
11111111114E
11111111114F
11111111114G
11111111114H
11111111114I
11111111114J
11111111114K
11111111114L
11111111114M
11111111114N
11111111114O
11111111114P
11111111114Q
11111111114R
11111111114S
11111111114T
11111111114U
11111111114V
11111111114W
11111111114X
11111111114Y
3760620109779066
11111111116
11111111117
11111111118
11111111119
1111111111A
1111111111B
1111111111C
1111111111D
1111111111E
1111111111F
1111111111G
1111111111H
1111111111I
1111111111J
1111111111K
1111111111L
1111111111M
1111111111N
1111111111O
1111111111P
1111111111Q
1111111111R
1111111111S
1111111111T
1111111111U
1111111111V
1111111111W
1111111111X
1111111111Y
1111111111Z
11111111120
11111111121
11111111122
11111111123
11111111124
11111111125
11111111126
11111111127
104461669716087
1111111113
1111111114
1111111115
1111111116
1111111117
1111111118
1111111119
111111111A
111111111B
111111111C
111111111D
111111111E
111111111F
111111111G
111111111H
111111111I
111111111J
111111111K
111111111L
111111111M
111111111N
111111111O
111111111P
111111111Q
111111111R
111111111S
111111111T
111111111U
111111111V
111111111W
111111111X
111111111Y
111111111Z
1111111120
1111111121
1111111122
1111111123
1111111124
2901713047671
111111113
111111114
111111115
111111116
111111117
111111118
111111119
11111111A
11111111B
11111111C
11111111D
11111111E
11111111F
11111111G
11111111H
11111111I
11111111J
11111111K
11111111L
11111111M
11111111N
11111111O
11111111P
11111111Q
11111111R
11111111S
11111111T
11111111U
11111111V
11111111W
11111111X
11111111Y
11111111Z
111111120
111111121
111111122
111111123
111111124
80603140215
11111113
11111114
11111115
11111116
11111117
11111118
11111119
1111111A
1111111B
1111111C
1111111D
1111111E
1111111F
1111111G
1111111H
1111111I
1111111J
1111111K
1111111L
1111111M
1111111N
1111111O
1111111P
1111111Q
1111111R
1111111S
1111111T
1111111U
1111111V
1111111W
1111111X
1111111Y
1111111Z
11111120
11111121
11111122
11111123
11111124
2238976119
1111113
11111131
11111132
11111133
11111134
11111135
11111136
11111137
11111138
11111139
1111113A
1111113B
1111113C
1111113D
1111113E
1111113F
1111113G
1111113H
1111113I
1111113J
1111113K
1111113L
1111113M
1111113N
1111113O
1111113P
1111113Q
1111113R
1111113S
1111113T
1111113U
1111113V
1111113W
1111113X
1111113Y
1111113Z
11111140
11111141
2238976121
1111115
11111151
11111152
11111153
11111154
11111155
11111156
11111157
11111158
11111159
1111115A
1111115B
1111115C
1111115D
1111115E
1111115F
1111115G
1111115H
1111115I
1111115J
1111115K
1111115L
1111115M
1111115N
1111115O
1111115P
1111115Q
1111115R
1111115S
1111115T
1111115U
1111115V
1111115W
1111115X
1111115Y
1111115Z
11111160
11111161
2238976123
1111117
11111171
11111172
11111173
11111174
11111175
11111176
11111177
11111178
11111179
1111117A
1111117B
1111117C
1111117D
1111117E
1111117F
1111117G
1111117H
1111117I
1111117J
1111117K
1111117L
1111117M
1111117N
1111117O
1111117P
1111117Q
1111117R
1111117S
1111117T
1111117U
1111117V
1111117W
1111117X
1111117Y
1111117Z
11111180
11111181
2238976125
1111119
111111A
111111B
111111C
111111D
111111E
111111F
111111G
111111H
111111I
111111J
111111K
111111L
111111M
111111N
111111O
111111P
111111Q
111111R
111111S
111111T
111111U
111111V
111111W
111111X
111111Y
111111Z
1111120
1111121
1111122
1111123
1111124
1111125
1111126
1111127
1111128
1111129
111112A
62193783
111113
1111131
1111132
1111133
1111134
1111135
1111136
1111137
1111138
1111139
111113A
111113B
111113C
111113D
111113E
111113F
111113G
111113H
111113I
111113J
111113K
111113L
111113M
111113N
111113O
111113P
111113Q
111113R
111113S
111113T
111113U
111113V
111113W
111113X
111113Y
111113Z
1111140
1111141
62193785
111115
My code traverses in only deeper once then comes out, I will keep working on my code however I am hoping to find an easier solution or possibly a library that can perform this style of search. Thanks!

'itertools._grouper' object has no attribute 'user'

Why can't I convert the loop group in groupby as list? Currently, I am working on Django==2.2.1 and when I try this data = [...] below into python console, it is working fine.
from itertools import groupby
from operator import itemgetter
#login_required
def list(request, template_name='cart/list.html'):
# I also try with this dummy data
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
print(type(data)) # a list
sorted_totals = sorted(test_data, key=itemgetter('total_order'))
for agent_name, group in groupby(sorted_totals, key=lambda x: x['agent_name']):
print(agent_name, list(group)) # I stopped here when converting the `group` as list.
But, I am getting an error looking like this when I try it at views in Django.
I also tried it with defaultdict
from collections import defaultdict
#login_required
def list(request, template_name='cart/list.html'):
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = defaultdict(list)
for data_total in test_data:
grouped[data_total['agent_name']].append(data_total) # stoped here
grouped_out = []
for agent_name, group in grouped.items():
total_order = 0
total_pcs = 0
total_kg = 0
if isinstance(group, list):
for data_total in group:
total_order += data_total.get('total_order')
total_pcs += data_total.get('total_pcs')
total_kg += data_total.get('total_kg')
grouped_out.append({
'agent_name': agent_name,
'total_order': total_order,
'total_pcs': total_pcs,
'total_kg': total_kg
})
But the error I found stoped by wrapper view. If we following the previous issue, it referenced with this _wrapped_view
Finally, I fixed it manually by using a dict.
test_data = [{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':5.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agentbeli','total_pcs':1,'total_kg':6.0},{'total_order':1,'agent_name':'agent123','total_pcs':1,'total_kg':7.0}]
grouped = {}
for data_total in test_data:
agent_name = data_total.get('agent_name')
if agent_name in grouped:
new_data = grouped[agent_name] # dict
new_data['total_order'] += data_total.get('total_order')
new_data['total_pcs'] += data_total.get('total_pcs')
new_data['total_kg'] += data_total.get('total_kg')
grouped[agent_name].update(**new_data)
else:
grouped[agent_name] = data_total
And the result of grouped is look like this:
{'agent123': {'agent_name': 'agent123',
'total_kg': 18.0,
'total_order': 3,
'total_pcs': 3},
'agentbeli': {'agent_name': 'agentbeli',
'total_kg': 17.0,
'total_order': 3,
'total_pcs': 3}}

How can I count different values per same key with Python?

I have a code which is able to give me the list like this:
Name id number week number
Piata 4 6
Mali 2 20,5
Goerge 5 4
Gooki 3 24,64,6
Mali 5 45,9
Piata 6 1
Piata 12 2,7,8,27,16 etc..
with the below code:
import csv
from datetime import date
datedict = defaultdict(set)
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.reader(csvfile, 'excel')
#passing the header
read_header = False
start_date=date(year=2009,month=1,day=1)
#print((seen_date - start_date).days)
tdic = {}
for row in filereader:
if not read_header:
read_header = True
continue
# reading the rest rows
name,id,firstseen = row[0],row[1],row[3]
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
key = name,id
currentvalue = tdic.get(key, set())
currentvalue.add(deltaweeks)
tdic[key] = currentvalue
except ValueError:
print('Date value error')
pass
Right now I want to convert my list to a list that give me number of ids for each name and its weeks numbers like the below list:
Name number of ids weeknumbers
Mali 2 20,5,45,9
Piata 3 1,6,2,7,8,27,16
Goerge 1 4
Gooki 1 24,64,6
Can anyone help me with writing the code for this part?
Since it looks like your csv file has headers (which you are currently ignoring) why not use a DictReader instead of the standard reader class? If you don't supply fieldnames the DictReader will assume the first line contains them, which will also save you from having to skip the first line in your loop.
This seems like a great opportunity to use defaultdict and Counter from the collections module.
import csv
from datetime import date
from collections import defaultdict, Counter
datedict = defaultdict(set)
namecounter = Counter()
with open('d:/info.csv', 'r') as csvfile:
filereader = csv.DictReader(csvfile)
start_date=date(year=2009,month=1,day=1)
for row in filereader:
name,id,firstseen = row['name'], row['id'], row['firstseen']
try:
seen_date = datetime.datetime.strptime(firstseen, '%d/%m/%Y').date()
except ValueError:
print('Date value error')
pass
deltadays = (seen_date-start_date).days
deltaweeks = deltadays/7 + 1
datedict[name].add(deltaweeks)
namecounter.update([name]) # Without putting name into a list, update will index each character
This assumes that (name, id) is unique. If this is not the case then you can use anotherdefaultdict for namecounter. I've also moved the try-except statement so it is more explicit in what you are testing.
givent that :
tdict = {('Mali', 5): set([9, 45]), ('Gooki', 3): set([24, 64, 6]), ('Goerge', 5): set([4]), ('Mali', 2): set([20, 5]), ('Piata', 4): set([4]), ('Piata', 6): set([1]), ('Piata', 12): set([8, 16, 2, 27, 7])}
then to output the result above:
names = {}
for ((name, id), more_weeks) in tdict.items():
(ids, weeks) = names.get(name, (0, set()))
ids = ids + 1
weeks = weeks.union(more_weeks)
names[name] = (ids, weeks)
for (name, (id, weeks)) in names.items():
print("%s, %s, %s" % (name, id, weeks)

Python: File formatting

I have a for loop which references a dictionary and prints out the value associated with the key. Code is below:
for i in data:
if i in dict:
print dict[i],
How would i format the output so a new line is created every 60 characters? and with the character count along the side for example:
0001
MRQLLLISDLDNTWVGDQQALEHLQEYLGDRRGNFYLAYATGRSYHSARELQKQVGLMEP
0061
DYWLTAVGSEIYHPEGLDQHWADYLSEHWQRDILQAIADGFEALKPQSPLEQNPWKISYH
0121 LDPQACPTVIDQLTEMLKETGIPVQVIFSSGKDVDLLPQRSNKGNATQYLQQHLAMEPSQ
It's a finicky formatting problem, but I think the following code:
import sys
class EveryN(object):
def __init__(self, n, outs):
self.n = n # chars/line
self.outs = outs # output stream
self.numo = 1 # next tag to write
self.tll = 0 # tot chars on this line
def write(self, s):
while True:
if self.tll == 0: # start of line: emit tag
self.outs.write('%4.4d ' % self.numo)
self.numo += self.n
# wite up to N chars/line, no more
numw = min(len(s), self.n - self.tll)
self.outs.write(s[:numw])
self.tll += numw
if self.tll >= self.n:
self.tll = 0
self.outs.write('\n')
s = s[numw:]
if not s: break
if __name__ == '__main__':
sys.stdout = EveryN(60, sys.stdout)
for i, a in enumerate('abcdefgh'):
print a*(5+ i*5),
shows how to do it -- the output when running for demonstration purposes as the main script (five a's, ten b's, etc, with spaces in-between) is:
0001 aaaaa bbbbbbbbbb ccccccccccccccc dddddddddddddddddddd eeeeee
0061 eeeeeeeeeeeeeeeeeee ffffffffffffffffffffffffffffff ggggggggg
0121 gggggggggggggggggggggggggg hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
0181 hhhhhhh
# test data
data = range(10)
the_dict = dict((i, str(i)*200) for i in range( 10 ))
# your loops as a generator
lines = ( the_dict[i] for i in data if i in the_dict )
def format( line ):
def splitter():
k = 0
while True:
r = line[k:k+60] # take a 60 char block
if r: # if there are any chars left
yield "%04d %s" % (k+1, r) # format them
else:
break
k += 60
return '\n'.join(splitter()) # join all the numbered blocks
for line in lines:
print format(line)
I haven't tested it on actual data, but I believe the code below would do the job. It first builds up the whole string, then outputs it a 60-character line at a time. It uses the three-argument version of range() to count by 60.
s = ''.join(dict[i] for i in data if i in dict)
for i in range(0, len(s), 60):
print '%04d %s' % (i+1, s[i:i+60])
It seems like you're looking for textwrap
The textwrap module provides two convenience functions, wrap() and
fill(), as well as TextWrapper, the class that does all the work, and
a utility function dedent(). If you’re just wrapping or filling one or
two text strings, the convenience functions should be good enough;
otherwise, you should use an instance of TextWrapper for efficiency.

Categories

Resources