Sorting Columns within csv files(Python) - python

Having issues with code. was given a file called "racing.csv" that stored the variables found in the "Drive" class. Concept behind the problem is that the program should sort the racetimes (lowest to highest) and assign points to the top 3 racers then export this data to a new file. All code is working fine aside from when I'm calling the shortBubbleSort on Drive and isn't sorting the racetimes correctly. Help is appreciated.
import csv
class Drive(object):
driver = ""
team = ""
racetime = 0.0
points = 0
def __init__(self,driver,team,racetime,points):
self.driver = driver
self.team = team
self.racetime = racetime
self.points = points
f = open('racing.csv', 'r')
csv_f = list(csv.reader(f))
driverclasses = []
for i in range(len(csv_f)):
d = Drive(csv_f[i][0],csv_f[i][1],csv_f[i][2],csv_f[i][3])
driverclasses.append(d)
for row in csv_f:
print (row)
for x in range(0, 6):
csv_f[x][2]=(input("Enter Racetime"))
def shortBubbleSort(alist):
exchanges = True
passnum = len(alist)-1
while passnum > 0 and exchanges:
exchanges = False
for i in range(passnum):
if alist[i]>alist[i+1]:
exchanges = True
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
passnum = passnum-1
shortBubbleSort(Drive)
print(csv_f)
csv_f[0][3] = 25
csv_f[1][3] = 18
csv_f[2][3] = 15
f = open('RacingResults.csv', 'w')
for row in csv_f:
print (row)

Does this help?
**range function sintax*: range([start], stop[, step])
start: Starting number of the sequence.
stop: Generate numbers up to, but not including this number.
step: Difference between each number in the sequence.
def shortBubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]>alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp

Related

How to re-write a while statement as an if statement in Python 2.7?

I wrote a script that searches an excel document for 'X', and when it finds an 'X' it copies the first column and first row associated with the 'X' into a CSV file.
I've been told that there's a better way to do this with 'if' statements. Not quite sure how.
Here's the code:
import xlrd
import csv
###Grab the data from sheet 1
def get_row_values(workSheet, row):
to_return = []
num_cells = myWorksheet.ncols - 1
curr_cell = -1
while curr_cell < num_cells:
curr_cell += 1
cell_value = myWorksheet.cell_value(row, curr_cell)
to_return.append(cell_value)
return to_return
file_path = 'foo.xlsx'
output = []
#Write the data
myWorkbook = xlrd.open_workbook(file_path)
myWorksheet = myWorkbook.sheet_by_name('foosheet')
num_rows = myWorksheet.nrows - 1
curr_row = 0
column_names = get_row_values(myWorksheet, curr_row)
#print("TOTAL ENTRIES:")
#print len(column_names)
#print("-----")
framework_name = myWorksheet.cell(0,2)
framework_version = myWorksheet.cell(0,3)
while curr_row < num_rows:
curr_row += 1
row = myWorksheet.row(curr_row)
this_row = get_row_values(myWorksheet, curr_row)
x = 0
while x <len(this_row):
if this_row[x] == 'x':
output.append(['', fooname, foo_version,
foo_names[x], foo_row[0]])
myData = [["foo1", "foo2",
"foo3", "foo4", "foo5"]]
myFile = open('./results/barTemp.csv', 'w')
with myFile:
writer = csv.writer(myFile)
writer.writerows(myData)
writer.writerows(output)
x += 1
#print output
myFile.close()
myWorkbook.release_resources()
Its not necessarily better. Still the same runtime-complexity.
The difference would be a more compact line:
For example, you can change
while x < len(this_row):
to
for x in this_row:
but I see that you use the 'x' index to find column_names[x] so another approach might be better such as
for x in range(len(this_row)):

Python: object of type '_io.TextIOWrapper' has no len()

I keep getting the error when running my code:
TypeError: object of type '_io.TextIOWrapper' has no len() function
How do I get it to open/read the file and run it through the loop?
Here's a link to the file that I am trying to import:
download link of the DNA sequence
def mostCommonSubstring():
dna = open("dna.txt", "r")
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
The problem occurs due to the way you are opening the text file.
You should add dna = dna.read() to your code.
so your end code should look something like this:
def mostCommonSubstring():
dna = open("dna.txt", "r")
dna = dna.read()
mink = 4
maxk = 9
count = 0
check = 0
answer = ""
k = mink
while k <= maxk:
for i in range(len(dna)-k+1):
sub = dna[i:i+k]
count = 0
for i in range(len(dna)-k+1):
if dna[i:i+k] == sub:
count = count + 1
if count >= check:
answer = sub
check = count
k=k+1
print(answer)
print(check)
#tfabiant : I suggest this script to read and process a DNA sequence.
To run this code, in the terminal: python readfasta.py fastafile.fasta
import string, sys
##########I. To Load Fasta File##############
file = open(sys.argv[1])
rfile = file.readline()
seqs = {}
##########II. To Make fasta dictionary####
tnv = ""#temporal name value
while rfile != "":
if ">" in rfile:
tnv = string.strip(rfile)
seqs[tnv] = ""
else:
seqs[tnv] += string.strip(rfile)
rfile = file.readline()
##############III. To Make Counts########
count_what = ["A", "T", "C", "G", "ATG"]
for s in seqs:
name = s
seq = seqs[s]
print s # to print seq name if you have a multifasta file
for cw in count_what:
print cw, seq.count(cw)# to print counts by seq

Bubble sort descending list in Python

How can I arrange a list, with bubble sort, but in descending order?
I searched in other topics, but I couldn't find an answer.
This is my working implementation of Bubblesort code:
from timeit import default_timer as timer
import resource
start = timer()
def bubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]>alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
with open('lista.txt', 'r') as f:
long_string = f.readline()
alist = long_string.split(',')
bubbleSort(alist)
f = open("bubble.txt", "w")
print >>f,(alist)
print resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
end = timer()
print(end - start)
f.close()
You need to replace the greater than in the following if statement if alist[i]<alist[i+1]: with a smaller than if alist[i]<alist[i+1]:. You also need to return alist leaving you with the following.
def bubbleSort(alist):
for passnum in range(len(alist)-1,0,-1):
for i in range(passnum):
if alist[i]<alist[i+1]:
temp = alist[i]
alist[i] = alist[i+1]
alist[i+1] = temp
return(alist)
Write like this alist[i]<alist[i+1]

Python CSV judge numbers' composition and replace it

I have a python script to run PostgreSQL and store its output in a CSV file. The script and the file looks like,
import sys, os
os.chdir('C:\Users\Heinz\Desktop')
print os.getcwd()
#set up psycopg2 environment
import psycopg2
#driving_distance module
query = """
select *
from driving_distance ($$
select
gid as id,
source::int4 as source,
target::int4 as target,
cost::double precision as cost,
rcost::double precision as reverse_cost
from network
$$, %s, %s, %s, %s
)
"""
#make connection between python and postgresql
conn = psycopg2.connect("dbname = 'TC_area' user = 'postgres' host = 'localhost' password = 'xxxx'")
cur = conn.cursor()
#count rows in the table
cur.execute("select count(*) from network")
result = cur.fetchone()
k = result[0] + 1 #number of points = number of segments + 1
#run loops
rs = []
i = 1
while i <= k:
cur.execute(query, (i, 100000000000, False, True))
rs.append(cur.fetchall())
i = i + 1
#import csv module
import csv
import tempfile
import shutil
j = 0
h = 0
ars = []
element = list(rs)
#export data to every row
filename = 'distMatrix.csv'
with open(filename, 'wb') as f:
writer = csv.writer(f, delimiter = ',')
while j <= k - 1:
while h <= k - 1:
rp = element[j][h][2]
ars.append(rp)
h = h + 1
else:
h = 0
writer.writerow(ars)
ars = []
j = j + 1
#concerning about flow-connection
with open(filename, 'rb') as f, tempfile.NamedTemporaryFile(mode='wb', delete=False) as g:
writer = csv.writer(g, delimiter = ',')
for row in csv.reader(f):
row = [element if float(element) < 10**6 else 0 for element in row]
writer.writerow(row)
shutil.move(g.name, filename)
conn.close()
The numbers in the CSV file are paths calculated by PostgreSQL, and I know they are all composed by the following numbers, let's call them generator
0, 1, 0.844, 0.69, 0.567, 0.387, 0.156, 0.31, 0.433, 0.613
I want to write some codes that can judge these 2 conditions, and then edit every field in this CSV file,
if numbers in the CSV file are just the same as one of the generator, then they stay the same as their original number
if numbers in the CSV file are not one of the generator, then the code can judge this number composed by what generator, for example, 2 = 1 + 1, and then change the addition to multiplication, for the last example, replace this number by 1 * 1
I think these additional codes should be implemented in this part of the script,
#export data to every row
filename = 'distMatrix.csv'
with open(filename, 'wb') as f:
writer = csv.writer(f, delimiter = ',')
while j <= k - 1:
while h <= k - 1:
rp = element[j][h][2]
ars.append(rp)
h = h + 1
else:
h = 0
writer.writerow(ars)
ars = []
j = j + 1
But how to do this task? Please give me some suggestions and hints, thank you.
I am using python 2.7.4 under Windows 8.1 x64.
The second part of your requirement is somewhat confusing. But it sounds like to me you need a generator function to provide values on demand from a list and a way to test if the number is in a list...
list = [ 0, 1, 0.844, 0.69, 0.567, 0.387, 0.156, 0.31, 0.433, 0.613 ]
def gen():
for i in range(len(list)):
yield list[i]
g = gen()
def test_the_number(nbr):
if nbr-int(nbr) in list:
print("Number in list")
else:
print(next(g))
nbr = 5 # not in list
test_the_number(nbr)
nbr =777 # also not in the list
test_the_number(nbr)
nbr = 0.844 # In the list
test_the_number(nbr)

Determining Nearest Neighbour in Dijkstra

Ok, I have changed my code a little, but I am getting confused on what variable names should be passed to my nearestNeighbour function. These two functions work ok:
infinity = 1000000
invalid_node = -1
startNode = 0
#Values to assign to each node
class Node:
def __init__(self):
self.distFromSource = infinity
self.previous = invalid_node
self.visited = False
#read in all network nodes
#node = the distance values between nodes
def network():
f = open ('network.txt', 'r')
theNetwork = [[int(networkNode) for networkNode in line.split(',')] for line in f.readlines()]
#theNetwork = [[int(node) for node in line.split(',')] for line in f.readlines()]
#print theNetwork
return theNetwork
#for each node assign default values
#populate table with default values
def populateNodeTable():
nodeTable = []
index = 0
f = open('network.txt', 'r')
for line in f:
networkNode = map(int, line.split(','))
nodeTable.append(Node())
#print "The previous node is " ,nodeTable[index].previous
#print "The distance from source is " ,nodeTable[index].distFromSource
#print networkNode
index +=1
nodeTable[startNode].distFromSource = 0
return nodeTable
So, all well and good. However, my next function is giving me an error, and despite me changing variable names in the brackets I can't work out the problem. Here is the next function code and the error message:
def nearestNeighbour(nodeTable, theNetwork):
listOfNeighbours = []
nodeIndex = 0
for networkNode in nodeTable[currentNode]:
if networkNode != 0 and networkNode.visited == False:
listOfNeighbours.append(nearestNode)
nodeIndex +=1
print listOfNeighbours
## #print node.distFromSource, node.previous, node.visited
##
return listOfNeighbours
for networkNode in nodeTable[currentNode]:
TypeError: iteration over non-sequence
I think you want nodeTable[node], not node[nodeTable], and similarly with theNetwork[node].

Categories

Resources