Python making matching pairs - python

I've got the .txt file like this within:
Crista
Jame
7,3
2,0
Wiki
Rok
4,1
6,2
3,2
6,8
Pope
Lokk
5,2
0,1
3,1
Sam
Antony
4,3
9,1
My code to find all names and append them to the names[] list, and to find all digits and append them to the digits[] list (if there are more than two lines with digits in a row I didn't need them in the list before):
import re
f=open('mine.txt')
names=[]
digits=[]
count=0
for line in f:
line = line.rstrip()
if re.search('^[a-zA-Z]', line):
name=line
names.append(name)
if re.findall('^\d{1}:\d{1}', line):
if count < 2 :
digit=line
digits.append(digit)
count += 1
elif line != "" :
count = 0
Then I made pairs for matching names and digits:
my_pairs_dig=list()
while(digits):
a = digits.pop(0); b = digits.pop(0)
my_pairs_dig.append((a,b))
my_pairs_dig
my_pairs_names = list()
while(names):
a = names.pop(0); b = names.pop(0)
my_pairs_names.append((a,b))
my_pairs_names
outp=list(zip(my_pairs_names,my_pairs_dig))
And got this output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2')), (('Pope', 'Lokk'), ('5,2', '0,1')), (('Sam', 'Antony'),('4,3', '9,1'))]
But plans were changed and now my desired outout is:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1')), (('Sam', 'Antony'),('4,3', '9,1'))]
How can I rewrite my code to got the desired outoput?

Try this
with open('test.txt', 'r') as fp:
data = fp.read().split("\n")
i, res = 0, []
while i < len(data):
if data[i].isalpha():
names = (data[i], data[i+1])
i += 2
digits = []
while i < len(data) and not data[i].isalpha():
digits.append(data[i])
i += 1
digits = tuple(digits)
if len(digits) > 2:
res.append((names, digits[: 2], digits[2: ]))
else:
res.append((names, digits[: 2]))
print(res)
Output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1',)), (('Sam', 'Antony'), ('4,3', '9,1'))]

Try this:
import re
digits=[]
result = []
name1, name2 = None, None
for line in f:
if line:
line = line.rstrip()
if re.search('^[a-zA-Z]', line):
if name1 and name2:
result.append(((name1, name2), *tuple(tuple(digits[i:i+2]) for i in range(0, len(digits), 2))))
name1, name2, digits = None, None, []
if name1:
name2 = line
else:
name1 = line
else:
digits.append(line)
if name1 and name2:
result.append(((name1, name2), *tuple(tuple(digits[i:i+2]) for i in range(0, len(digits), 2))))
name1, name2, digits = None, None, []
print(result)
Output:
[(('Crista', 'Jame'), ('7,3', '2,0')), (('Wiki', 'Rok'), ('4,1', '6,2'), ('3,2', '6,8')), (('Pope', 'Lokk'), ('5,2', '0,1'), ('3,1',)), (('Sam', 'Antony'), ('4,3', '9,1'))]
This is based on your assumption:
that's always two names and then 2,3 or 4 lines with numbers

Related

Is there any method to count different items in the text file for every matched string and store in dataframe?

The text file looks like
data/File_10265.data:
Apple:2kg
Apple:3kg
Banana:1kg
Banana:4kg
Some string1
data/File_10276.data:
Apple:6kg
Apple:5kg
Apple:3kg
Banana:2kg
Banana:4kg
Banana:2kg
Banana:4kg
Extra line
data/File_10278.data:
Apple:3kg
Banana:2kg
Banana:4kg
Banana:2kg
Banana:7kg
Some words
The code is as follows:
import re
import pandas as pd
f = open("Samplefruit.txt", "r")
lines = f.readlines()
Apple_count=0
Banana_count=0
File_count=0
Filename_list=[]
Apple_list=[]
Banana_list=[]
for line in lines:
match1=re.findall('data/(?P<File>[^\/]+(?=\..*data))',line)
if match1:
Filename_list.append(match1[0])
print('Match found:',match1)
if line.startswith("Apple"):
Apple_count+=1
elif line.startswith("Banana"):
Banana_count+=1
Apple_list.append(Apple_count)
Banana_list.append(Banana_count)
df=pd.DataFrame({'Filename': Filename_list,'Apple':
Apple_list,'Banana':
Banana_list})
The desired output:
Filename: |Apple |Banana
File_10265|2 |2
File_10276|3 |4
File_10278|1 |4
Maybe there is a more efficient way to do this but here's one solution:
with open('filetest.txt') as f:
lines = f.readlines()
unique_lines = list(dict.fromkeys(lines))
for line in unique_lines:
print(line + str(lines.count(line)))
f1 = open('file.txt', 'a')
f1.write(line + str(lines.count(line)))
f1.close()
You simply open the file, read all lines into a list, then get rid of any duplicates. Then you loop through the list (now with the duplicates removed), and use the .count (docs) function to get the number of occurrences of each unique item in the list.
Try this,
pattern = re.compile(r"data/File_[\d]+.data:")
lines = text.split("\n")
files = itertools.groupby(lines, lambda line:pattern.search(line) == None)
for k, content in files:
if k == True:
content = list(content)
all_words = list(set(content))
counts = {word:content.count(word) for word in all_words if word != ""}
print(counts)
Output -
{'Banana:': 2, 'Apple:': 2}
{'Banana:': 4, 'Apple:': 3}
{'Banana:': 4, 'Apple:': 1}
NOTE: New changes have been made to the code as per the changes in the question.
Try this:
import re
text = {}
def unit_cal(val1, val2): #function to add quantities with units and return the final answer with units
q1 = re.findall("[0-9]+", val1)
unit = re.findall("[a-zA-Z]+", val1)
if (val2 != False):
q2 = re.findall("[0-9]+", val2)
ans = int(q1[0]) + int(q2[0])
else:
ans = int(q1[0])
return str(ans) + unit[0] #remove + unit[0] to return only the value
with open("item.txt", "r") as f1:
for line in f1:
if ("data" in line):
temp_key = line
k = {}
text[temp_key] = k
elif (line.strip() != ""):
temp_word = line.strip().split(":")
if temp_word[0] in text[temp_key]:
text[temp_key][temp_word[0]] = unit_cal(temp_word[1], text[temp_key][temp_word[0]])
else:
text[temp_key][temp_word[0]] = unit_cal(temp_word[1], False)
final_text = ""
for main_key in text:
final_text += main_key + "\n"
for sub_key in text[main_key]:
final_text += sub_key + " : " + str(text[main_key][sub_key]) + "\n\n"
print(final_text) #final output is displayed in the idle
with open("new_items.txt", "w") as f2:
f2.write(final_text) #the output is also written to a new file
Output:
data/File_10265.data:
Apple : 5kg
Banana : 5kg
data/File_10276.data:
Apple : 14kg
Banana : 12kg
data/File_10278.data:
Apple : 3kg
Banana : 15kg
Here, I have posted an answer. Thanks, #Mani,#CarySwoveland, #Zero, and #M B for your support. The code is as follows:
import pandas as pd
text = {}
with open(r"Samplefruit.txt", "r") as file:
for line in file:
if "data" in line:
Filename=line.split('/')[-1].split('.')[0]
Apple_count=0
Banana_count=0
print('----------------')
print(Filename)
elif ("Apple" in line or "Banana" in line):
if line.startswith("Apple"):
Apple_count+=1
elif line.startswith("Banana"):
Banana_count+=1
print('Apple:',Apple_count)
print('Banana:',Banana_count)
text[Filename] = {'Apple':Apple_count,'Banana':Banana_count}
File_list.append(Filename)
df = pd.DataFrame(
{"Filename": text.keys(), "Apple": [x['Apple'] for x in text.values()],"Banana": [x['Banana'] for x in text.values()]}
)
print(df)

Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages

10.2 Write a program to read through the mbox-short.txt and figure out the distribution by hour of the day for each of the messages. You can pull the hour out from the 'From ' line by finding the time and then splitting the string a second time using a colon.
From stephen.marquard#uct.ac.za Sat Jan 5 09:14:16 2008
Once you have accumulated the counts for each hour, print out the counts, sorted by hour as shown below.
My Code:
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith("From "):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line[0:2]
for bline in line:
dic[bline]=dic.get(bline,0)+1 # Using this line we created a dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)
Desired Output:
04 3
06 1
07 1
09 2
10 3
11 6
14 1
15 2
16 4
17 2
18 1
19 1
My Output:
enter image description here
I don't understand what's going wrong.
Working Code. Break down the code in to simple form as much i can. So it will be easy to understand for you.
d = dict()
lst = list()
fname = input('enter the file name : ')
try:
fopen = open(fname,'r')
except:
print('wrong file name !!!')
for line in fopen:
stline = line.strip()
if stline.startswith('From:'):
continue
elif stline.startswith('From'):
spline = stline.split()
time = spline[5]
tsplit = time.split(':')
t1 = tsplit[0].split()
for t in t1:
if t not in d:
d[t] = 1
else:
d[t] = d[t] + 1
for k,v in d.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
emailcount = dict()
for line in handle:
if not line.startswith("From "): continue
line = line.split()
line = line[1]
emailcount[line] = emailcount.get(line, 0) +1
bigcount = None
bigword = None
for word,count in emailcount.items():
if bigcount == None or count > bigcount:
bigcount = count
bigword = word
print(bigword, bigcount)
name = input("Enter file:")
if len(name) < 1 : name = "mbox-short.txt"
handle = open(name)
counts = {}
for line in handle:
word = line.split()
if len(word) < 3 or word[0] != "From" : continue
full_hour = word[5]
hour = full_hour.split(":")
hour = str(hour[:1])
hour = hour[2:4]
if hour in counts :
counts[hour] = 1 + counts[hour]
else :
counts.update({hour:1})
lst = list()
for k, v in counts.items():
new_tup = (k, v)
lst.append(new_tup)
lst = sorted(lst)
for k, v in lst:
print(k,v)
counts=dict()
fill=open("mbox-short.txt")
for line in fill :
if line.startswith("From "):
x=line.split()
b=x[5]
y=b.split(":")
f=y[0]
counts[f]=counts.get(f,0)+1
l=list()
for k,v in counts.items():
l.append((k,v))
l.sort()
for k,v in l:
print(k,v)
I listen carefully in the online lessons, This is my code by what i learned in class. I think it will be easy for you to understand.
fn = input('Please enter file: ')
if len(fn) < 1: fn = 'mbox-short.txt'
hand = open(fn)
di = dict()
for line in hand:
ls = line.strip()
wds = line.split()
if 'From' in wds and len(wds) > 2:
hours = ls.split()
hour = hours[-2].split(':')
ch = hour[0]
di[ch] = di.get(ch, 0) + 1
tmp = list()
for h,t in di.items():
newt = (h,t)
tmp.append(newt)
tmp = sorted(tmp)
for h,t in tmp:
print(h,t)
name = input("Enter file:")
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
counts = dict()
for line in handle:
line = line.strip()
if not line.startswith("From ") : continue
line = line.split()
hr = line[5].split(":")
hr = hr[0:1]
for piece in hr:
counts[piece] = counts.get(piece,0) + 1
lst = list()
for k,v in counts.items():
lst.append((k,v))
lst = sorted(lst)
for k,v in lst:
print(k,v)
fname = input("Enter file:")
fhandle = open(fname)
dic={}
for line in fhandle:
if not line.startswith('From '):
continue
else:
line=line.split()
line=line[5] # accesing the list using index and splitting it
line=line.split(':')
bline=line[0]
#for bline in line:
#print(bline)
dic[bline]=dic.get(bline,0)+1 # Using this line we created a
dictionary having keys and values
#Now it's time to access the dictionary and sort in some way.
lst=[]
for k1,v1 in dic.items(): # dictionary er key value pair access korar jonno items method use kora hoyechhe
lst.append((k1,v1)) # dictionary er keys and corresponding values ke lst te append korlam
lst.sort() #lst take sort korlam. sorting is done through key
#print(lst)
for k1,v1 in lst: # we are able to access this list using key value pair as it was basically a dictionary before, It is just appended
print(k1,v1)
#print(dic)
#print(dic)

How do I separate name and sum of numbers?

I'm struggling to add those numbers together and put names apart.
So, I need to print each line containing names and total numbers.
e.g. Peter Jones: 155
File 'test1.txt' example:
Marshall Rogers, 88, 21, 90
Richard Lao, 30
Peter Jones, 23, 54,78
AABB CC EE RR rest, 90, 3, 3, 4
Here's my code:
def find_their_numbers(files):
"""print it out"""
file = open(files)
lines = file.read().splitlines()
nam = ""
new_list = []
for name in lines:
names = name.split(',')
for i in range(len(names)):
if i == 0:
print(names[i] + ':', end='')
if i > 0:
print(names[i])
find_their_numbers('test1.txt')
You can do that without finding individually each number:
def find_their_numbers(text_file):
with open(text_file) as f:
lines = f.read().splitlines()
for line in lines:
line_split = line.split(',')
name = line_split[0]
total = sum([int(x) for x in line_split[1:]])
print(name + ": " + str(total))
Sample test:
>>> find_their_numbers('test1.txt')
rshall Rogers: 199
Richard Lao: 30
Peter Jones: 155
AABB CC EE RR rest: 100
Try this:
file = open(files)
lines = file.read().splitlines()
for name in lines:
names = name.split(',')
print(f"{names[0]}: {sum(map(int,names[1:]))}")
where sum(map(int,names[1:])) will slice names from the second element, convert all the elements to integers and sum them.
You can use unpacking to separate the name from the rest of the components:
with open(fileName,'r') as file:
for line in file.readLines():
name,*numbers = line.split(',')
print(name + ":", sum(map(int,numbers)))

Lists in dictionary

I have a text file of format like this
10:45 a b c
x 0 1 2
y 4 5 6
z 7 8 9
I want to make x as key and 0,1,2 its value in the list.Same with y and z
while(1):
line = f.readline()
if time in line:
print (line)
L1.append(line)
for count,line in enumerate(f):
if (i < 3):
L1.append(line)
print ("Line{} : {}".format(count,line.strip()))
i=i+1
#print(L1)
for k in range(1):
print(L1[k])
test1 = L1[k]
a1 = test1.split()
print (a1[1])
dict = {a1[1]: L1[k] for a1[1] in L1[k]}
print (dict)
for k in range(1,3):
#print("hey")
print (L1[k]) #will list the single row
test = L1[k]
#print(test)
a = test.split()
print (a[0])
dict = {a[0]:L1[k] for a[0] in L1[k]}
print (dict)
Any idea what i am doing wrong here?
P.S. - I am new to python
You could try this:
my_dict = {}
lines = f.readLines()
lines.pop(0)
for line in lines:
line_list = line.split(' ')
key = line_list.pop(0)
my_dict.update({key: line_list})
This will accomplish what it is I think you need (assuming your text file is stored in the same directory and you replace 'test.txt' with your filename):
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: contents[i+1:i+4]})
i += 4
print(new_dict)
or this, if you want the values as integers:
with open('test.txt', 'r') as values:
contents = values.read().strip().split()
new_dict = {}
i = 4
while i <= len(contents)-4:
new_dict.update({contents[i]: [int(contents[i+1]),int(contents[i+2]),int(contents[i+3])]})
i += 4
print(new_dict)
Try this
import string
start_found = False
result_dict = {}
for line in open("stack1_input.txt", mode='r'):
if (line.startswith("10:45")):
start_found = True
continue
if start_found == True:
values = line.split()
if len(values) == 4:
result_dict[values[0]] = values[1:]
print (result_dict)

Calculation from text file

I have a text file containing:
SKT:SSG:2:1
NJW:UIA:1:0
SKT:TRP:3:2
SSG:NJW:0:2
I want to calculate the number of wins by each team corresponding to the number in the text file. Example:
SKT: 2
NJW: 2
UIA: 0
SSG: 0
Here's what i have so far:
fileName = input("Enter the file name:")
match = open(fileName)
table = []
for line in match:
contents = line.strip().split(':')
table.append(contents)
dictionary = {}
for line in table:
#how do i code the index of the team and it's score?
.
.
just a moment to test my understanding, if i were to calculate how many times each team wins, i have to ensure python is able to read that for example, SKT had a score of 2 against SSG of score 1 in game 1,which makes SKT the winner. Therefore, count + 1
however, I'm confused on how would i place the index of the team name corresponding to it's score. Any help is appreciated. Regards.
you can create a dict to store all the team winning score.
res = {}
for line in match:
team1,team2,score1,score2 = line.split(':')
if team1 not in res: res[team1] = 0
if team2 not in res: res[team2] = 0
if int(score1) == int(score2):
continue
else:
winner = team1 if int(score1) > int(score2) else team2
res[winner] += 1
You could use a dictionary.
fileName = input("Enter the file name:")
match = open(fileName)
d = {}
for line in match:
x, y, sx, sy = line.split(":")
if not x in d:
d[x] = 0
if not y in d:
d[y] = 0
if sx > sy:
d[x] += 1
elif sx < sy:
d[y] += 1
print(d)
Result:
{'SKT': 2, 'SSG': 0, 'NJW': 2, 'UIA': 0, 'TRP': 0}
Using collections.defaultdict simplifies the procedure:
import collections
scores = collections.defaultdict(int)
for line in table:
teamA,teamB,scoreA,scoreB = line.split(':')
# even if scores does not have the team key, += will create it
scores[teamA] += int(scoreA)
scores[teamB] += int(scoreB)

Categories

Resources