Python 3.X combining similar lines in .txt files together - python

A question regarding combining values from a text file into a single variable and printing it.
An example I can give is a .txt file such as this:
School, 234
School, 543
I want to know the necessary steps to combining both of the school into a single variable "school" and have a value of 777.
I know that we will need to open the .txt file for reading and then splitting it apart with the .split(",") method.
Code Example:
schoolPopulation = open("SchoolPopulation.txt", "r")
for line in schoolPopulation:
line = line.split(",")
Could anyone please advise me on how to tackle this problem?

Python has rich standard library, where you can find classes for many typical tasks. Counter is what you need in current situation:
from collections import Counter
c = Counter()
with open('SchoolPopulation.txt', 'r') as fh:
for line in fh:
name, val = line.split(',')
c[name] += int(val)
print(c)

Something like this?
schoolPopulation = open("SchoolPopulation.txt", "r")
results = {}
for line in schoolPopulation:
parts = line.split(",")
name = parts[0].lower()
val = int(parts[1])
if name in results:
results[name] += val
else:
results[name] = val
print(results)
schoolPopulation.close()
You could also use defaultdict and the with keyword.
from collections import defaultdict
with open("SchoolPopulation.txt", "r") as schoolPopulation:
results = defaultdict(int)
for line in schoolPopulation:
parts = line.split(",")
name = parts[0].lower()
val = int(parts[1])
results[name] += val
print(results)
If you'd like to display your results nicely you can do something like
for key in results:
print("%s: %d" % (key, results[key]))

school = population = prev = ''
pop_count = 0
with open('SchoolPopulation.txt', 'r') as infile:
for line in infile:
line = line.split(',')
school = line[0]
population = int(line[1])
if school == prev or prev == '':
pop_count += line[1]
else:
pass #do something else here
prev = school

Related

This produces and empty dictionary, and I don't know why

this is the start of something I'm trying to do as a weekly TAFE worksheet. All it requires is the emails sent in the text file to be tallied up and see who sent the most emails. Still quite new to all of it so sorry if noob question. This seems to me like it should grab the email addresses on each relevant line in the file and add it as a key to the dictionary.
if len(fname) < 1:
fname = "mbox-short.txt"
fhand = open(fname)
frequency = dict()
for line in fhand:
if line.startswith("From "):
line = line.split()
frequency.get(line[1], 0) + 1
print(frequency)
You should add something to the dictionary, but your code is not doing that. Consider to do this:
if len(fname) < 1:
fname = "mbox-short.txt"
fhand = open(fname)
frequency = dict()
for line in fhand:
if line.startswith("From "):
line = line.split()
frequency[line[1]] = frequency.get(line[1], 0) + 1
print(frequency)

Extracting ip address from a file in python and counting how many times the ip shows up

I have to use a text file and extract the most frequent ip address and count how many times they come up
def anaylse_log(parameter):
myfile = open("sample_log_1 test.txt", "r")
iPdata = myfile.readlines()
mydict = {}
ipAddress = []
item_list = []
result_file = []
counter = ()
def extract_log(myfile):
#split the file line by line
for line in myfile:
splitData = line.split()
ipAddress = splitData[0]
numbers = splitData[1]
ipAddress.append(ipAddress)
numbers.append(numbers)
if numbers in mydict:
#if numbers is already a key in the dictionary
#increase the count
mydict[numbers] += 1
else:
# Otherwise if it's not yet in the dictionary
# Initialise it to 1
mydict[numbers] = 1
return numbers
myfile.close()
def find_most_frequent(maximum,iPdata):
with open("sample_log_1 text", "r") as myfile:
for text in myfile:
if str(maximum) in text:
return maximum
with open("resultss.csv", "w") as file:
file.write(maximum(maximum))
#This will put the dictionary into tuples and give each key a value
item_list = [(k, v) for k, v in mydict.items()]
#This will sort the list by v
item_list.sort(key=lambda x:x[1], reverse=True)
maximum = mydict()
def main(myfile,mydict,iPdata):
result_file = open("resultss.csv", "w")
main()
i had to fix the spacing for the code to be edited, i hope this is ok and you are able to run it, i have stuck on this for a while and i thought i was calling the functions too
Suppose your log file is like
15.25.7.3
25.25.2.5
25.25.2.5
115.25.7.3
215.25.7.3
25.25.2.5
Here is a simple way to count ips
ip_count_dict = {}
with open('ip.log', 'r') as f:
ip_file = f.read()
# if separated by coma
# ip_list = ip_file.split(',')
# if separated by \n new line
ip_list = ip_file.splitlines()
for ip in ip_list:
ip = ip.strip()
if ip in ip_count_dict:
ip_count_dict[ip] += 1
else:
ip_count_dict[ip] = 1
print(ip_count_dict)
Output: {'15.25.7.3': 1, '25.25.2.5': 3, '115.25.7.3': 1, '215.25.7.3': 1}
Instead of manually counting IPs as you loop through your log, try this:
from collections import Counter
log_entries = open("resultss.csv").read().split("\n")
ip_list = [log.split(",")[0] for log in log_entries]
counts = Counter(ip_list)
print(counts)
This works with a CSV file format like:
10.10.10.1,asdf,31
5.9.7.11,aajbczxz,54
5.9.7.11,zzzzz,2

Incorrectly reading lines of a text File in python

So basically i want to iterate the lines of a text file that has this format:
-----------------------------------------
Code: 0123456789
EGGS: 3 7.00 21.00
BACON: 1 3.50 3.50
COFFEE: 2 14.20 28.40
TOTAL: 52.90
-----------------------------------------
and i have the following code to read the lines one by one:
with open(filename, "rt", encoding="utf-8") as f:
for line in f:
prevline = line
line.split()
if '-' in line:
temp = f.readline().split(':') #Get Code
print(temp)
AFM = temp[1]
print(AFM)
else:
tempProducts = line.split(':') #Get Product in a list
productName = tempProducts[0] #Store Product Name in a variable
productStats = tempProducts[1] #Store Product Stats in a list
productStats = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName:valueArray})
if '-' in f.readline():
rec = Receipt(AFM,products)
products={}
valueArray=[]
receipts.append(rec)
else:
line=prevline
mind that i want to skip the line with the '------------' characters the code works but it keeps reading second line then fourth then sixth(code,bacon,total). The question is how can i fix this.Edit: there are multiple receipts in the file so i need each time to skip the line with the'----------'.
with open(filename, "rt", encoding="utf-8") as f:
old_list = [] # Saving all the lines including '----'
for line in f:
old_list.append(line)
new_list = old_list[1:-1] # new list which removes the '----' lines
You can iterate just through new_list with your .split logic.
See if this does the job
with open(filename, "rt", encoding="utf-8") as f:
valueArray = []
for line in f:
if not '-' in line:
if 'Code' in line:
AFM = line.split(':')[1]
print(AFM)
valueArray = []
products = {}
else:
tempProducts = line.split(':') # Get Product in a list
productName = tempProducts[0] # Store Product Name in a variable
productStats = tempProducts[1] # Store Product Stats in a list
productStats_list = productStats.split(" ")
for value in productStats:
valueArray.append(float(value))
products.update({productName: valueArray})
if 'TOTAL' in line:
rec = Receipt(AFM, products)
receipts.append(rec)
To anyone seeing this post now consider it closed i do not provide enough information and the code was messed up. Sorry for wasting your time

Parsing specific contents in a file

I have a file that looks like this
!--------------------------------------------------------------------------DISK
[DISK]
DIRECTION = 'OK'
TYPE = 'normal'
!------------------------------------------------------------------------CAPACITY
[CAPACITY]
code = 0
ID = 110
I want to read sections [DISK] and [CAPACITY].. there will be more sections like these. I want to read the parameters defined under those sections.
I wrote a following code:
file_open = open(myFile,"r")
all_lines = file_open.readlines()
count = len(all_lines)
file_open.close()
my_data = {}
section = None
data = ""
for line in all_lines:
line = line.strip() #remove whitespace
line = line.replace(" ", "")
if len(line) != 0: # remove white spaces between data
if line[0] == "[":
section = line.strip()[1:]
data = ""
if line[0] !="[":
data += line + ","
my_data[section] = [bit for bit in data.split(",") if bit != ""]
print my_data
key = my_data.keys()
print key
Unfortunately I am unable to get those sections and the data under that. Any ideas on this would be helpful.
As others already pointed out, you should be able to use the ConfigParser module.
Nonetheless, if you want to implement the reading/parsing yourself, you should split it up into two parts.
Part 1 would be the parsing at file level: splitting the file up into blocks (in your example you have two blocks: DISK and CAPACITY).
Part 2 would be parsing the blocks itself to get the values.
You know you can ignore the lines starting with !, so let's skip those:
with open('myfile.txt', 'r') as f:
content = [l for l in f.readlines() if not l.startswith('!')]
Next, read the lines into blocks:
def partition_by(l, f):
t = []
for e in l:
if f(e):
if t: yield t
t = []
t.append(e)
yield t
blocks = partition_by(content, lambda l: l.startswith('['))
and finally read in the values for each block:
def parse_block(block):
gen = iter(block)
block_name = next(gen).strip()[1:-1]
splitted = [e.split('=') for e in gen]
values = {t[0].strip(): t[1].strip() for t in splitted if len(t) == 2}
return block_name, values
result = [parse_block(b) for b in blocks]
That's it. Let's have a look at the result:
for section, values in result:
print section, ':'
for k, v in values.items():
print '\t', k, '=', v
output:
DISK :
DIRECTION = 'OK'
TYPE = 'normal'
CAPACITY :
code = 0
ID = 110
Are you able to make a small change to the text file? If you can make it look like this (only changed the comment character):
#--------------------------------------------------------------------------DISK
[DISK]
DIRECTION = 'OK'
TYPE = 'normal'
#------------------------------------------------------------------------CAPACITY
[CAPACITY]
code = 0
ID = 110
Then parsing it is trivial:
from ConfigParser import SafeConfigParser
parser = SafeConfigParser()
parser.read('filename')
And getting data looks like this:
(Pdb) parser
<ConfigParser.SafeConfigParser instance at 0x100468dd0>
(Pdb) parser.get('DISK', 'DIRECTION')
"'OK'"
Edit based on comments:
If you're using <= 2.7, then you're a little SOL.. The only way really would be to subclass ConfigParser and implement a custom _read method. Really, you'd just have to copy/paste everything in Lib/ConfigParser.py and edit the values in line 477 (2.7.3):
if line.strip() == '' or line[0] in '#;': # add new comment characters in the string
However, if you're running 3'ish (not sure what version it was introduced in offhand, I'm running 3.4(dev)), you may be in luck: ConfigParser added the comment_prefixes __init__ param to allow you to customize your prefix:
parser = ConfigParser(comment_prefixes=('#', ';', '!'))
If the file is not big, you can load it and use Regexes to find parts that are of interest to you.

Finding a line in a file then reading next few lines in Python

I have a plain text file with the following data:
id=1
name=Scott
occupation=Truck driver
age=23
id=2
name=Dave
occupation=Waiter
age=16
id=3
name=Susan
occupation=Computer programmer
age=29
I'm trying to work out the best way to get to any point in the file given an id string, then grab the rows underneath to extract the data for use in my program. I can do something like:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
But I'm not sure how I can now go through the next bunch of lines and do line.split("=") or similar to extract the info (put into a list or dict or whatever) that I can use my program. Any pointers?
One option would be to load the entire thing into memory, which would save you from reading the file every time:
with open('rooms') as f:
chunks = f.read().split('\n\n')
people_by_id = {}
for chunk in chunks:
data = dict(row.split('=', 1) for row in chunk.split('\n'))
people_by_id[data['id']] = data
del data['id']
def get_person_by_id(id):
return people_by_id.get(id)
How about exiting from a for loop after finding the correct line:
def get_person_by_id(id):
file = open('rooms', 'r')
for line in file:
if ("id=" + id) in line:
print(id + " found")
break
#now you can continue processing your file:
next_line = file.readline()
Maybe:
d = dict()
with open(filename) as f:
for line in f:
k,v = line.split('=')
if 'id=' in line:
d[v] = {}
d[d.keys()[-1]][k] = v
And here is an iterative solution.
objects = []
current_object = None
with open("info.txt", "rb") as f:
for line in f:
line = line.strip("\r\n")
if not line:
current_object = None
continue
if current_object is None:
current_object = {}
objects.append(current_object)
key,_,value = line.partition('=')
current_object[key] = value
print objects
Another example of an iterative parser:
from itertools import takewhile
def entries(f):
e = {}
def read_one():
one = {}
for line in takewhile(lambda x: '=' in x, f):
key, val = line.strip().split('=')
one[key] = val
return one
while True:
one = read_one()
if not one:
break
else:
e[one.pop('id')] = one
return e
Example:
>>> with open('data.txt') as f:
..: print entries(f)['2']
{'age': '16', 'occupation': 'Waiter', 'name': 'Dave'}
Get all the person's attributes and values (i.e. id, name, occupation, age, etc..), till you find
an empy line.
def get_person_by_id(id):
person = {}
file = open('rooms', 'r')
for line in file:
if found == True:
if line.strip():
attr, value = line.split("="):
else:
return person
elif ("id=" + id) in line:
print(id + " found")
found = True
attr, value = line.split("=")
person[attr] = value
return person
This solution is a bit more forgiving of empty lines within records.
def read_persons(it):
person = dict()
for l in it:
try:
k, v = l.strip('\n').split('=', 1)
except ValueError:
pass
else:
if k == 'id': # New record
if person:
yield person
person = dict()
person[k] = v
if person:
yield person

Categories

Resources