How can I print a new line on the output file? When I try to add the new line with "/n" it just prints /n
This is what I have so far.
``
inputFile = open("demofile1.txt", "r")
outFile = open("Ji
string = line.split(',')
go =(string)[3::]
bo = [float(i) for i in go]
total = sum(bo)
pine = ("%8.2f"%total)
name = string[2] + "," + " " + string[1]
kale = (string[0] + " " + name + " " + "/n")
se)
Current Result
8
53 Baul
A999999
You need to use \n, not /n. So this line:
kale = (string[0] + " " + name + " " + "/n")
Should be:
kale = (string[0] + " " + name + " " + "\n")
Also, please do consider using a str formatter, so all these lines:
go =(string)[3::]
bo = [float(i) for i in go]
total = sum(bo)
pine = ("%8.2f"%total)
name = string[2] + "," + " " + string[1]
kale = (string[0] + " " + name + " " + "/n")
str1 = ''.join(kale)
str2 = ''.join(pine)
outFile.write(str1 + " " + str2 + " ")
Will become:
outFile.write("{} {} {:8.2f}\n".format(string[0], string[2] + ", " + string[1], sum(bo))
Related
def complete_stage_purge_process(self, target_cnxn, stage_table, process_cd):
self.logger.debug(datetime.now())
self.logger.debug('complete_stage_purge_process')
delete_dt = datetime.today() - timedelta(days=30)
delete_dt = str(delete_dt)
run_pk_sql = "select run_pk from " + schemaName.PROCESS.value + "." + tableName.RUN_LOG.value + " where " + ProcessRunlog.ETL_MODIFIED_DTM.value + " <= '" + delete_dt + "' and " + \
ProcessRunlog.PROCESS_PK.value + " = (select " + ProcessRunlog.PROCESS_PK.value + " from " + schemaName.PROCESS.value + "." + \
tableName.PROCESS.value + " where " + \
Process.PROCESS_CODE.value + " = '" + process_cd + "') "
delete_sql = "delete from " + schemaName.STAGE.value + "." + stage_table + " where run_pk in (" + run_pk_sql + ")"
print(delete_sql)
print(target_cnxn)
try:
trgt_cursor = target_cnxn.cursor()
trgt_cursor.execute(delete_sql)
self.logger.debug("deletes processed successfully ")
except:
self.logger.exception('Error in processing deletes')
raise
But when added commit after trgt_cursor.execute(delete_sql) then below error is thrown. Could someone please help on how to handle this
AttributeError: 'psycopg2.extensions.cursor' object has no attribute 'commit'
I'm currently using openpyxl to use an excel file that has mul-indices (2 Levels of headers) and i'm trying to do the operations depending on the subheaders a header has.
I have some exp. doing this in pandas but for this Project i have to use openpyxl which i barly made any use of before.
Only thing i could think off is the manual way:
iterating over the rows
saving the first row as header and 2nd row as subheader
do some cleaning.
manually save the headers with their subheaders in dics. then filleing in the values by iterating over all the cols
my code is as follows:
#reading the excel file
path = r'path to file'
wb = load_workbook(path) #loading the excel table
ws = wb.active #grab the active worksheet
#Setting the doc Header
for h in ws.iter_rows(max_row = 1, values_only = True): #getting the first row (Headers) in the table
header = list(h)
for sh in ws.iter_rows(min_row = 1 ,max_row = 2, values_only = True):
sub_header = list(sh)
#removing all of the none Values
header = list(filter(None, header))
sub_header = list(filter(None, sub_header))
print(header)
print(sub_header)
#creating a list of all the Columns in the excel file
col_list = []
for col in ws.iter_cols(min_row=3,min_col = 1): #Iteration over every single row starting from the third row since first two are the headers
col = [cell.value for cell in col] #Creating a list from each row
col = list(filter(None, col)) #removing the none values from each row
col_list.append(col) #creating a list of all rows (starting from the 3d one)
#print (col_list)
But i'm sure there must be a better way that i wasnt able to find in the docs or by checking this website.
Thanks in advance!
My goal in the end is to automate this part of my code by iterating over the header and use the subheaders of that head and their values each time
code:
#bulding the templates using yattag "yattag.org"
doc , tag , text = Doc().tagtext()
#building the tags of the xml file
with tag("Data"): #root tag
for row in row_list :
with tag("Row"):
with tag("Input"):
with tag(header[0].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[0] + " the Precentage of Students with "+ sub_header[0] + " is "+ str(row[1]) + " whereas the " + sub_header[1] + " are " + str(row[2]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[0] + " | " + sub_header[0]+ " | " + str(row[1]) + " | " + sub_header[1] + " | " + str(row[2]))
with tag(header[1].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[1] + " the Precentage of Students with "+ sub_header[2] + " is "+ str(row[3]) + " whereas the " + sub_header[3] + " are " + str(row[4]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[1] + " | " + sub_header[2]+ " | " + str(row[3]) + " | " + sub_header[3] + " | " + str(row[4]))
with tag(header[2].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[2] + " the Precentage of Students with "+ sub_header[4] + " is "+ str(row[5]) + " whereas the " + sub_header[5] + " are " + str(row[6]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[2] + " | " + sub_header[4]+ " | " + str(row[5]) + " | " + sub_header[5] + " | " + str(row[6]))
with tag(header[3].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[3] + " the Precentage of Students with "+ sub_header[6] + " is "+ str(row[7]) + " whereas the " + sub_header[7] + " are " + str(row[8]) +" and for " + sub_header[8] + str(row[9]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[3] + " | " + sub_header[6]+ " | " + str(row[7]) + " | " + sub_header[7] + " | " + str(row[8]) + " | " + sub_header[8] + " | " + str(row[9]))
with tag(header[4].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[4] + " the Precentage of Students with "+ sub_header[9] + " is "+ str(row[10]) + " whereas the " + sub_header[10] + " are " + str(row[11]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[4] + " | " + sub_header[9]+ " | " + str(row[10]) + " | " + sub_header[10] + " | " + str(row[11]))
with tag(header[5].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[5] + " the Precentage of Students with "+ sub_header[11] + " is "+ str(row[12]) + " whereas the " + sub_header[12] + " are " + str(row[13]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[5] + " | " + sub_header[11]+ " | " + str(row[12]) + " | " + sub_header[12] + " | " + str(row[13]))
with tag(header[6].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[6] + " the Precentage of Students with "+ sub_header[13] + " is "+ str(row[14]) + " whereas the " + sub_header[14] + " are " + str(row[15]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[6] + " | " + sub_header[13]+ " | " + str(row[14]) + " | " + sub_header[14] + " | " + str(row[15]))
#print(doc.getvalue())
result = indent(
doc.getvalue(),
indentation=' ',
indent_text=True
)
#saving the xml file
with open("output.xml", "w") as f:
f.write(result)
Unless Pandas is completely off the table I think you might be able to do something pandas and openpyxl. The documentation mentions reading data from openpyxl into a pandas dataframe: Working with Pandas and Numpy.
Could you use:
data = ws.values
df = DataFrame(data[2:,:], index=data[0], columns=data[1])
There may be some filtering necessary with regards to the None values.
This is my file text:
Covid-19 Data
Country / Number of infections / Number of Death
USA 124.356 2.236
Netherlands 10.866 771
Georgia 90 NA
Germany 58.247 455
I created a function to calculate the ratio of deaths compared to the infections, however it does not work, because some of the values aren't floats.
f=open("myfile.txt","w+")
x="USA" + " " + " " + "124.356" + " " + " " + "2.236"
y="Netherlands" + " " + " " + "10.866" + " " + " " + "771"
z="Georgia" + " " + " " + "90" + " " + " " + "NA"
w="Germany" + " " + " " + "58.247" + " " + " " + "455"
f.write("Covid-19 Data" + "\n" + "Country" + " " + "/" + " " + "Number of infections" + " " + "/" + " " + "Number of Death" + "\n")
f.write(x + "\n")
f.write(y + "\n")
f.write(z + "\n")
f.write(w)
f.close()
with open("myfile.txt", "r") as file:
try:
for i in file:
t = i.split()
result=float(t[-1])/float(t[-2])
print(results)
except:
print("fail")
file.close()
Does someone have an idea how to solve this problem ?
You can do the following:
with open("myfile.txt", "r") as file:
for i in file:
t = i.split()
try:
result = float(t[-1]) / float(t[-2])
print(result)
except ValueError:
pass
At the time you don't know if the values you are trying to divide are numeric values or not, therefore surrounding the operation with a try-catch should solve your problem.
If you want to become a bit more "clean" you can do the following:
def is_float(value):
try:
float(value)
except ValueError:
return False
return True
with open("myfile.txt", "r") as file:
for i in file:
t = i.split()
if is_float(t[-1]) and is_float(t[-2]):
result = float(t[-1]) / float(t[-2])
print(result)
The idea is the same, however.
I used the same file that you attached in your example. I created this function hopefully it helps:
with open("test.txt","r") as reader:
lines = reader.readlines()
for line in lines[2:]:
line = line.replace(".","") # Remove points to have the full value
country, number_infections, number_deaths = line.strip().split()
try:
number_infections = float(number_infections)
number_deaths = float(number_deaths)
except Exception as e:
print(f"[WARNING] Could not convert Number of Infections {number_infections} or Number of Deaths {number_deaths} to float for Country: {country}\n")
continue
ratio = number_deaths/number_infections
print(f"Country: {country} D/I ratio: {ratio}")
As you can see I avoided the headers of your file using lines[2:] that means that I will start from row 3 of your file. Also, added try/exception logic to avoid non-float converts. Hope this helps!
Edit
Just noticed that the format for thousands is used with "." instead "," in that case the period was removed in line 7.
The results for this execution is:
Country: USA D/I ratio: 0.017980636237897647
Country: Netherlands D/I ratio: 0.07095527332965212
[WARNING] Could not convert Number of Infections 90.0 or Number of Deaths NA to float for Country: Georgia
Country: Germany D/I ratio: 0.007811561110443456
Fixed the following:
The first two lines in your text-file are headers. These need to be skipped
'NA' Can't be converted to zero
If there is a 0 in your data, your program would crash. Now it wouldn't.
f=open("myfile.txt","w+")
x="USA" + " " + " " + "124.356" + " " + " " + "2.236"
y="Netherlands" + " " + " " + "10.866" + " " + " " + "771"
z="Georgia" + " " + " " + "90" + " " + " " + "NA"
w="Germany" + " " + " " + "58.247" + " " + " " + "455"
f.write("Covid-19 Data" + "\n" + "Country" + " " + "/" + " " + "Number of infections" + " " + "/" + " " + "Number of Death" + "\n")
f.write(x + "\n")
f.write(y + "\n")
f.write(z + "\n")
f.write(w)
f.close()
with open("myfile.txt", "r") as file:
#Skipping headers
next(file)
next(file)
try:
for i in file:
t = i.split()
#Make sure your code keeps working when one of the numbers is zero
x = 0
y = 0
#There are some NA's in your file. Strings not representing
#a number can't be converted to float
if t[1] != "NA":
x = t[1]
if t[2] != "NA":
y = t[2]
if x == 0 or y == 0:
result = 0
else:
result=float(x)/float(y)
print(t[0] + ": " + str(result))
except:
print("fail")
file.close()
Output:
USA: 55.615384615384606
Netherlands: 0.014093385214007782
Georgia: 0
Germany: 0.12801538461538461
Your header line in the file is Covid-19 Data. this is the first line and when you call t=i.split() you then have a list t which has data ['Covid-19', 'Data']
you cannot convert these to floats since they have letters in them. Instead you should read the first 2 header line before the loop and do nothing with them. However you are then going to have issues with Georgia as "NA" also cannot be converted to a float.
A few other points, its not good practice to have a catch all exception. Also you dont need to close the file explicitly if you open the file using a with statement.
This script was created by an ex-lab member that was quite a bit more adapt at Python scripting than I am.
I am attempting to find Cooccupancy between annotated peaks in "exon" regions of the entire human h19 genome. However, after trying to get this to run for about an hour I am looking for help.
Here is the script:
#!/usr/bin/python
import math
import sys
import re
import csv
import MySQLdb
import itertools
import argparse
# format for execution: ./findCooccupancy.py <loci file> <comma separated list of marks to check> <window size> <outputfile>
# example: ./findCooccupancy.py AllGenes.txt PolII-ChIP,KAP1-ChIP,Hexim 150 output.txt
# format of loci file:
# chr2 12345678 12345900 GENEA 1 +
# chr4 987654321 98765000 GENEB 1 -
# etc...
locifile = sys.argv[1]
marks = sys.argv[2]
window = int(sys.argv[3])
outputfile = sys.argv[4]
loci = list(csv.reader(open(locifile, 'rb'),delimiter='\t'))
#loci = list(itertools.chain.from_iterable(loci))
db = MySQLdb.connect(host="localhost",user="snrnp",passwd="snrnp",db="snrnp")
cur = db.cursor()
cntdict = {}
for mark in marks.split(","):
cntdict[mark] = []
counter = 1
for locus in loci:
print "Working on line# " + str(counter)
counter += 1
if str(locus[5]) == "+":
exon = locus[1]
else:
exon = locus[2]
for mark in marks.split(","):
# this is incredibly dirty. sorry. I don't have time to do this better
if mark == 'PolII-ChIP':
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
#print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")"
else:
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))")
#print "select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and ((chr_start < " + str(exon) + " and chr_end > " + str(exon) + ") or (abs(chr_start - " + str(exon) + ") < " + str(window) + ") or (abs(chr_end - " + str(exon) + ") < " + str(window) + "))"
cnt = cur.fetchone()[0]
if cnt > 0:
cntdict[mark].append(",".join(locus))
convertedlist = []
for key in cntdict.keys():
convertedlist.append(cntdict[key])
intersectlist = set(convertedlist[0]).intersection(*convertedlist[1:])
for key in cntdict.keys():
print str(key) + " hits: " + str(len(cntdict[key]))
print "\nTotal Intersection Count: " + str(len(intersectlist))
with open(outputfile, 'w') as outputwriter:
for line in intersectlist:
outputwriter.write(line + "\n")
This is the command line that I have been using:
./findCooccupancy.py ~/code/snRNP/analysis/from\ sequencing/KEC_Project/Pol-IIAnnotatedPeaksGenome.txt PolII-ChIP 150 KECExonOccupancy.txt
This is the latest error message I have received:
Working on line# 1
Traceback (most recent call last):
File "./findCooccupancy.py", line 41, in <module>
cur.execute("select count(*) from CHIP_PEAK where mark = '" + str(mark) + "' and chr = '" + str(locus[0]) + "' and (abs(summit - " + str(exon) + ") < " + str(window) + ")")
File "/Library/Python/2.7/site-packages/MySQLdb/cursors.py", line 205, in execute
self.errorhandler(self, exc, value)
File "/Library/Python/2.7/site-packages/MySQLdb/connections.py", line 36, in defaulterrorhandler
raise errorclass, errorvalue
_mysql_exceptions.OperationalError: (1054, "Unknown column 'Start' in 'where clause'")
I try to update a game of n in a row. But when I try to update the array matrix i get the "string out of range error.
I made a while statement with ind < len(board_height).
What am I doing wrong here?
Traceback (most recent call last):
File "matrix.py", line 61, in <module>
drop_disk(print_board(1))
File "matrix.py", line 23, in print_board
(matrix[0][4]) + " " + str(matrix[0][5]) + " " + str(matrix[0][6]) +" |")
IndexError: string index out of range
This is what my terminal spits out at me.
import tkinter
def print_board(y):
"""Prints the board"""
matrix = [
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0]]
matrix = str(y)
print("\n")
print("| " + str(matrix[0][0]) + " " + str(matrix[0][1]) + " " + str(matrix[0][2]) + " " + str(matrix[0][3]) + " " + str
(matrix[0][4]) + " " + str(matrix[0][5]) + " " + str(matrix[0][6]) +" |")
print("| " + str(matrix[1][0]) + " " + str(matrix[1][1]) + " " + str(matrix[1][2]) + " " + str(matrix[1][3]) + " " + str
(matrix[1][4]) + " " + str(matrix[1][5]) + " " + str(matrix[1][6]) +" |")
print("| " + str(matrix[2][0]) + " " + str(matrix[2][1]) + " " + str(matrix[2][2]) + " " + str(matrix[2][3]) + " " + str
(matrix[2][4]) + " " + str(matrix[2][5]) + " " + str(matrix[2][6]) +" |")
print("| " + str(matrix[3][0]) + " " + str(matrix[3][1]) + " " + str(matrix[3][2]) + " " + str(matrix[3][3]) + " " + str (matrix[3][4]) + " " + str(matrix[3][5]) + " " + str(matrix[3][6]) +" |")
print("| " + str(matrix[4][0]) + " " + str(matrix[4][1]) + " " + str(matrix[4][2]) + " " + str(matrix[4][3]) + " " + str (matrix[4][4]) + " " + str(matrix[4][5]) + " " + str(matrix[4][6]) +" |")
print("| " + str(matrix[5][0]) + " " + str(matrix[5][1]) + " " + str(matrix[5][2]) + " " + str(matrix[5][3]) + " " + str (matrix[5][4]) + " " + str(matrix[5][5]) + " " + str(matrix[5][6]) +" |")
print("| " + str(matrix[6][0]) + " " + str(matrix[6][1]) + " " + str(matrix[6][2]) + " " + str(matrix[6][3]) + " " + str (matrix[6][4]) + " " + str(matrix[6][5]) + " " + str(matrix[6][6]) +" |")
print("="*17)
return matrix
def drop_disk(matrix):
"Drops the disk in one of the seven columns"
board_height = 7
empty = 0
row = 0
col = 0
ind = 0
player1 = input("Wat is de naam van speler 1?\n")
player2 = input("Wat is de naam van speler 2?\n")
column = int(input(player1 + ", In welke colom wil je je stuk laten vallen (1-7)?\n"))
while ind < len(board_height):
for y in range(board_height):
if matrix[row][col] == empty:
y = matrix[row -1][col -1] = 1
ind += 1
return y
return -1
drop_disk(print_board(1))
print_board(1)
error is because you are reassigning matrix to str(y)
so matrix changes to str(y) which is actually '1'.
drop_disk(print_board(1)) # calls print_board(1) and sets y = '1'
def print_board(y): # assign y='1'
matrix = [
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0]]
matrix = str(y) # matrix = 1