Counting a ncaa basketball teams wins - python

I am trying to count the wins of certain college basketball teams, I have a csv file containing that data. When I run this code no matter what I have tried it always returns 0.
import csv
f = open("data.csv", 'r')
data = list(csv.reader(f))
def ncaa(team):
count = 0
for row in data:
if row[2] == team:
count += 1
return count
airforce_wins = ncaa("Air force")
akron_wins = ncaa("Akron")
print(akron_wins)

This will give you "1".
import csv
f = open("C:\\users/alex/desktop/data.csv", 'r')
data = list(csv.reader(f))
def ncaa(team):
count = 0
for row in data:
if row[1] == team: #corrected index here
count += 1
return count
airforce_wins = ncaa("Air force")
akron_wins = ncaa("Akron")
print(akron_wins)
However, I don't think you are counting the wins correctly. You are counting occurrences of a row in the file but, since each team only has one row, you will always get "1" for any team. Perhaps, your wins are in another column and that's the value you need to look up when you find your team.

Try this instead before the function definition:
import csv
with open("data1.csv", 'r') as f:
data = csv.reader(f,delimiter=',')
I don't think using list(reader_object) is correct.

Related

Arguments when Creating Functions

I am trying to create a function from weather data on a .csv file: When given temperature and a location, the function returns the number of entries that exceed that temperature in the specific location. After the condition statement I am unsure of what I need to write.
I have read the dictionary in a previous cell.
import csv
given_location = input ('Enter given location:')
given_temp = input('Enter given temp:')
count = 0
def daysOver (smalldict, location, temp):
reader = csv.Dictreader(dataFile)
for row in reader:
if row ['Location'] == given_location and line['MaxTemp'] > given_temp:
count = row
return count
print('Number of days over',given_temp, 'in', given_location,':', count)
You probably want to replace count = row with count count += 1

How to get a running total for a column in a csv file while depending on a unique variable in a different column?

import csv
def getDataFromFile(filename, dataList):
file = open(filename, "r")
csvReader = csv.reader(file)
for aList in csvReader:
dataList.append(aList)
file.close()
def getTotalByYear(expendDataList):
total = 0
for row in expendDataList:
expenCount = float(row[2])
total += expenCount**
Rtotal = input(print("Enter 'every' or a particular year. "))
if Rtotal == 'every' or == 'Every':
print(expenCount)
As you can see I got the running total for column 2 if you type every or Every but I don't understand how to do a running total for column 2 while dependent on a certain variable in column one.
In this case my CSV file has three columns of data. A year field, an item field, and an expenditure field. How do I get a running total of the expenditure field based on a certain year?
expendDataList = []
fname = "expenditures.csv"
getDataFromFile(fname, expendDataList)
getTotalByYear(expendDataList)
Producing a running total is good task for a generator function. This example uses the filter built-in function to filter out unwanted years (a generator expression/ list comprehension could be used instead). Then it iterates over the selected rows to produce the results.
import csv
def running_totals(year):
with open('year-item-expenditure.csv') as f:
reader = csv.DictReader(f)
predicate = None if year.lower() == 'every' else lambda row: row['Year'] == year
total = 0
for row in filter(predicate, reader):
total += float(row['Expenditure'])
yield total
totals = running_totals('2019')
for total in totals:
print(total)
Another approach would be to use itertools.accumulate, though you still have to perform all of the filtering, so there's not much benefit to doing this unless you need performance.
import csv
import itertools
def running_totals(year):
with open('year-item-expenditure.csv') as f:
reader = csv.DictReader(f)
predicate = None if year.lower() == 'every' else lambda row: row['Year'] == year
# Create a generator expression that yields expenditures as floats
expenditures = (float(row['Expenditure']) for row in filter(predicate, reader))
for total in itertools.accumulate(expenditures):
yield total

String Replace for Multiple Lines In A CSV

Below is a snippet from a csv file. The first column is the product number, 2 is the stock level, 3 is the target level, and 4 is the distance from target (target minus stock level.)
34512340,0,95,95
12395675,3,95,92
56756777,70,95,25
90673412,2,95,93
When the stock level gets to 5 or below, I want to have the stock levels updated from python when a user requests it.
I am currently using this piece of code which I have adapted from just updating one line in the CSV. It isn't working though. The first line is written back to the file as 34512340,0,95,95 and the rest of the file is deleted.
choice = input("\nTo update the stock levels of the above products, type 1. To cancel, enter anything else.")
if choice == '1':
with open('stockcontrol.csv',newline='') as f:
for line in f:
data = line.split(",")
productcode = int(data[0])
target = int(data[2])
stocklevel = int(data[1])
if stocklevel <= 5:
target = str(target)
import sys
import csv
data=[]
newval= target
newtlevel = "0"
f=open("stockcontrol.csv")
reader=csv.DictReader(f,fieldnames=['code','level', 'target', 'distancefromtarget'])
for line in reader:
line['level']= newval
line['distancefromtarget']= newtlevel
data.append('%s,%s,%s,%s'%(line['code'],line['level'],line['target'],line['distancefromtarget']))
f.close()
f=open("stockcontrol.csv","w")
f.write("\n".join(data))
f.close()
print("The stock levels were updated successfully")
else:
print("Goodbye")
Here is the code that I had changing one line in the CSV file and works:
with open('stockcontrol.csv',newline='') as f:
for line in f:
if code in line:
data = line.split(",")
target = (data[2])
newlevel = stocklevel - quantity
updatetarget = int(target) - int(newlevel)
stocklevel = str(stocklevel)
newlevel = str(newlevel)
updatetarget = str(updatetarget)
import sys
import csv
data=[]
code = code
newval= newlevel
newtlevel = updatetarget
f=open("stockcontrol.csv")
reader=csv.DictReader(f,fieldnames=['code','level', 'target', 'distancefromtarget'])
for line in reader:
if line['code'] == code:
line['level']= newval
line['distancefromtarget']= newtlevel
data.append('%s,%s,%s,%s'%(line['code'],line['level'],line['target'],line['distancefromtarget']))
f.close()
f=open("stockcontrol.csv","w")
f.write("\n".join(data))
f.close()
What can I change to make the code work? I basically want the program to loop through each line of the CSV file, and if the stock level (column 2) is equal to or less than 5, update the stock level to the target number in column 3, and then set the number in column 4 to zero.
Thanks,
The below code reads each line and checks the value of column 2. If it is less than or equal to 5, the value of column2 is changed to value of column3 and last column is changed to 0 else all the columns are left unchanged.
import sys
import csv
data=[]
f=open("stockcontrol.csv")
reader=csv.DictReader(f,fieldnames=['code','level','target','distancefromtarget'])
for line in reader:
if int(line['level']) <= 5:
line['level']= line['target']
line['distancefromtarget']= 0
data.append("%s,%s,%s,%s"%(line['code'],line['level'],line['target'],line['distancefromtarget']))
f.close()
f=open("stockcontrol.csv","w")
f.write("\n".join(data))
f.close()
Coming to issues in your code:
You are first reading the file without using the csv module and getting the values in each column by splitting the line. You are again using the DictReader method of csv module to read the values you already had.

Making various groupings

My data set is a list of people either working together or alone.
I have a row for each project, and columns with names of all the people who worked on that project. If column 2 is the first empty column in a row, it was a solo job. If column 4 is the first empty column in a row, there were 3 people working together.
I have the code to find all pairs. In the output data set, a square N x N is created with every actor labelling columns and rows. Cells (A,B) and (B,A) contain how many times that pair worked together. A working with B is treated the same as B working with A.
An example of the input data, in a comma delimited fashion:
A,.,.
A,B,.
B,C,E
B,F,.
D,F,.
A,B,C
D,B,.
E,C,B
X,D,A
F,D,.
B,.,.
F,.,.
F,X,C
C,F,D
I am using Python 3.2. The code that does this:
import csv
import collections
import itertools
grid = collections.Counter()
with open("connect.csv", "r") as fp:
reader = csv.reader(fp)
for line in reader:
# clean empty names
line = [name.strip() for name in line if name.strip()]
# count single works
if len(line) == 1:
grid[line[0], line[0]] += 1
# do pairwise counts
for pair in itertools.combinations(line, 2):
grid[pair] += 1
grid[pair[::-1]] += 1
actors = sorted(set(pair[0] for pair in grid))
with open("connection_grid.csv", "w") as fp:
writer = csv.writer(fp)
writer.writerow([''] + actors)
for actor in actors:
line = [actor,] + [grid[actor, other] for other in actors]
writer.writerow(line)
My questions are:
If I had a column with months and years, is it possible to make a matrix spreadsheet for each month year? (i.e., for 2011, I would have 12 matrices)?
For whatever breakdown I use, is it possible to make a variable such that the variable name is a combo of all the people who worked together? e.g. 'ABD' would mean a project Person A, Person B, and Person D worked together on and would equal how many times ABD worked as a group of three, in whatever order. Projects can hold up to 20 people so it would have to be able to make groups of 2 to 20. Also, it would be easiest if the variables should be in alphabetical order.
1) Sort your projects by month & year, then create a new 'grid' for every month. e.g.:
Pull the month & year from every row. Remove month & year from the row, then add the remaining data to a dictionary. In the end you get something like {(month, year): [line, line, ...]} . From there, it's easy to loop through each month/year and create a grid, output spreadsheet, etc.
2) ''.join(sorted(list)).replace('.','') gives you the persons who worked together listed alphabetically.
import csv
import collections
import itertools
grids = dict()
groups = dict()
with open("connect.csv", "r") as fp:
reader = csv.reader(fp)
for line in reader:
# extract month/year from the last column
date = line.pop(-1)
month,year = date.split('/')
# clean empty names
line = [name.strip() for name in line if name.strip()]
# generate group name
group = ''.join(sorted(line)).replace('.','')
#increment group count
if group in groups:
groups[group]+=1
else:
groups[group]=1
#if grid exists for month, update else create
if (month,year) in grids:
grid = grids[(month,year)]
else:
grid = collections.Counter()
grids[(month,year)] = grid
# count single works
if len(line) == 1:
grid[line[0], line[0]] += 1
# do pairwise counts
for pair in itertools.combinations(line, 2):
grid[pair] += 1
grid[pair[::-1]] += 1
for date,grid in grids.items():
actors = sorted(set(pair[0] for pair in grid))
#Filename from date
filename = "connection_grid_%s_%s.csv" % date
with open(filename, "w") as fp:
writer = csv.writer(fp)
writer.writerow([''] + actors)
for actor in actors:
line = [actor,] + [grid[actor, other] for other in actors]
writer.writerow(line)
with open('groups.csv','w') as fp:
writer = csv.writer(fp)
for item in sorted(groups.items()):
writer.writerow(item)

Parse CSV file and aggregate the values

I'd like to parse a CSV file and aggregate the values. The city row has repeating values (sample):
CITY,AMOUNT
London,20
Tokyo,45
London,55
New York,25
After parsing the result should be something like:
CITY, AMOUNT
London,75
Tokyo,45
New York,25
I've written the following code to extract the unique city names:
def main():
contrib_data = list(csv.DictReader(open('contributions.csv','rU')))
combined = []
for row in contrib_data:
if row['OFFICE'] not in combined:
combined.append(row['OFFICE'])
How do I then aggregate values?
Tested in Python 3.2.2:
import csv
from collections import defaultdict
reader = csv.DictReader(open('test.csv', newline=''))
cities = defaultdict(int)
for row in reader:
cities[row["CITY"]] += int(row["AMOUNT"])
writer = csv.writer(open('out.csv', 'w', newline = ''))
writer.writerow(["CITY", "AMOUNT"])
writer.writerows([city, cities[city]] for city in cities)
Result:
CITY,AMOUNT
New York,25
London,75
Tokyo,45
As for your added requirements:
import csv
from collections import defaultdict
def default_factory():
return [0, None, None, 0]
reader = csv.DictReader(open('test.csv', newline=''))
cities = defaultdict(default_factory)
for row in reader:
amount = int(row["AMOUNT"])
cities[row["CITY"]][0] += amount
max = cities[row["CITY"]][1]
cities[row["CITY"]][1] = amount if max is None else amount if amount > max else max
min = cities[row["CITY"]][2]
cities[row["CITY"]][2] = amount if min is None else amount if amount < min else min
cities[row["CITY"]][3] += 1
for city in cities:
cities[city][3] = cities[city][0]/cities[city][3] # calculate mean
writer = csv.writer(open('out.csv', 'w', newline = ''))
writer.writerow(["CITY", "AMOUNT", "max", "min", "mean"])
writer.writerows([city] + cities[city] for city in cities)
This gives you
CITY,AMOUNT,max,min,mean
New York,25,25,25,25.0
London,75,55,20,37.5
Tokyo,45,45,45,45.0
Note that under Python 2, you'll need the additional line from __future__ import division at the top to get correct results.
Using a dict with the value as the AMOUNT might do the trick. Something like the following-
Suppose you read one line at a time and city indicates the current city and amount indicates the current amount -
main_dict = {}
---for loop here---
if city in main_dict:
main_dict[city] = main_dict[city] + amount
else:
main_dict[city] = amount
---end for loop---
At the end of the loop you will have aggregate values in main_dict.

Categories

Resources