Iterating through top row of excel sheet - python

I have the following section of code which is using openpyxl to search through the top row of a spreadsheet and find the first element that does not contain a value. It returns the following error when I run it. Is there a better way to do this? Or how do I get rid of the error?
val = "something"
j = 1
titleIndex = None
while val != None:
val = lecture['%s1' % chr(ord('#') + j)].internal_value
print val
print j
j += 1
else:
titleIndex = '%s1' % chr(ord('#') + j - 1)
File "C:\Users\ecustodio\Documents\Python Scripts\ExcelIterate.py",
line 14, in set_title
val = lecture['%s1' % chr(ord('A') + j)].internal_value File "C:\Users\ecustodio\AppData\Local\Continuum\anaconda2\lib\site-packages\openpyxl\worksheet\worksheet.py",
line 345, in getitem
min_col, min_row, max_col, max_row = range_boundaries(key) File "C:\Users\ecustodio\AppData\Local\Continuum\anaconda2\lib\site-packages\openpyxl\utils\cell.py",
line 135, in range_boundaries
raise ValueError("{0} is not a valid coordinate or range") ValueError: {0} is not a valid coordinate or range

As far as I can see, the line
val = lecture['%s1' % chr(ord('A') + j)].internal_value
provided in the error message differs from the one in the code:
val = lecture['%s1' % chr(ord('#') + j)].internal_value
Please, check the value of '%s1' % chr(ord('#') + j) or whatever before requesting the item from lecture. And be sure that your lecture is really an existing worksheet.

Related

Python - I am getting an error that says 'substring not found'

I am trying to make a transposition cipher encryption function for a class project.
from string import ascii_lowercase
def swap(s: str, index0: int, index1: int):
smaller = index0 if index0 < index1 else index1
bigger = index0 if index0 >= index1 else index1
if bigger >= len(s) or smaller < 0:
return None
ret = s[:smaller] + s[bigger] + s[smaller+1:] # swap first
ret = ret[:bigger] + s[smaller] + s[bigger+1:] # swap second
return ret
def swap_encrypt(s: str, key:str):
ret = s
for key_chr in key:
index = ascii_lowercase.index(key_chr)
swap_this = index % len(ret)
with_this = (swap_this + 1) % len(ret)
ret = swap(ret, swap_this, with_this)
return ret
s = ''
key = ''
def main2():
s = input('Enter your message: ')
s = cleanup(s)
key = input('Enter your keyword: ')
key = cleanup(key)
ret= swap_encrypt((s), (key))
print(cleanup(ret))
main2()
I am getting the error 'substring not found', is there something I am doing wrong?
If my input is =(‘SLOTH POWER’) for s, (‘TOP’) for the key, my output should be: ‘RLOTPOHWES’
Is there also another to limit the functions to ord(), len(), and range()? If so, could I be shown how as well?
error:
Traceback (most recent call last):
File "c:\Users\darks\OneDrive\Documents\7\ciphers.py", line 139, in <module>
main2()
File "c:\Users\darks\OneDrive\Documents\7\ciphers.py", line 136, in main2
ret= swap_encrypt((s), (key))
File "c:\Users\darks\OneDrive\Documents\7\ciphers.py", line 123, in swap_encrypt
index = ascii_lowercase.index(key_chr)
ValueError: substring not found
It can't find the character in the ascii_lowercase, because your input is uppercase. Try "sloth power" instead of "SLOTH POWER", or use s.lower().

New to PYTHON using Sublime TEXT“TypeError: not all arguments converted during string formatting” works on all other queries but this

for fi in dirs:
file_nm = fi
if file_nm == "XXX_PRODCost.xlsx":
print 'Beginning file:', file_nm
wb=open_workbook('Labor_Cost' + '\\' + file_nm)
sh=wb.sheet_by_name('New Dept Data')
values=[]
for r in range(1, sh.nrows):
v = [None]*8
#v[0] = str(sh.cell(r,0).value)[:6] # YM
v[3] = sh.cell(r,2).value #
#v[1] = sh.cell(r,1).value #
v[0] = sh.cell(r,4).value # DEPT
v[1] = sh.cell(r,5).value # DEPT Job
v[4] = time # append date
v[2] = sh.cell(r,6).value # PROD #
v[7] = sh.cell(r,7).value #
v[8] = sh.cell(r,3).value #
values.append(tuple(v))
for i in lst_split(len(values)):
cnxn.cursor().executemany('insert into LABOR."stgLaborFlagsX" values (%s,%s,%s,%s,%s)', values[min(i):max(i)+1])
I continue to get this error:
Traceback (most recent call last):
File "xxx_ALL.py", line 128, in <module>
cnxn.cursor().executemany('insert into LABOR."xxxxPROD" values (%s,%s,%s,%s,%s,%s,%s)', values[min(i):max(i)+1])
File "C:\Python27\lib\site-packages\snowflake\connector\cursor.py", line 702, in executemany
param, self))
TypeError: not all arguments converted during string formatting
That is because your insert statement is expecting 7 data items, but your code is making a slice of the list values that has 8 or more elements. Print values[min(i):max(i)+1]) before your executemany() call to diagnose the problem.

Python Pandas How to save output to csv

Hello now im working on my project. I want to get candidate of text block by using algorithm below.
My input is a csv document which contain :
HTML column : the html code in a line
TAG column : the tag of html code in a line
Words : the text inside the tag in aline
TC : the number of words in a line
LTC : the number of anchor words in a line
TG : the number of tag in a line
P : the number of tag p and br in a line
CTTD : TC + (0.2*LTC) + TG - P
CTTDs : the smoothed CTTD
This is my algorithm to find candidate of text block. I make the csv file into dataframe using pandas. I am using CTTDs,TC and TG column to find the candidate.
from ListSmoothing import get_filepaths_smoothing
import pandas as pd
import numpy as np
import csv
filenames = get_filepaths_smoothing(r"C:\Users\kimhyesung\PycharmProjects\newsextraction\smoothing")
index = 0
for f in filenames:
file_html=open(str(f),"r")
df = pd.read_csv(file_html)
#df = pd.read_csv('smoothing/Smoothing001.csv')
news = np.array(df['CTTDs'])
new = np.array(df['TG'])
minval = np.min(news[np.nonzero(news)])
maxval = np.max(news[np.nonzero(news)])
j = 0.2
thetaCTTD = minval + j * (maxval-minval)
#maxGap = np.max(new[np.nonzero(new)])
#minGap = np.min(new[np.nonzero(new)])
thetaGap = np.min(new[np.nonzero(new)])
#print thetaCTTD
#print maxval
#print minval
#print thetaGap
def create_candidates(df, thetaCTTD, thetaGAP):
k = 0
TB = {}
TC = 0
for index in range(0, len(df) - 1):
start = index
if df.ix[index]['CTTDs'] > thetaCTTD:
start = index
gap = 0
TC = df.ix[index]['TC']
for index in range(index + 1, len(df) - 1):
if df.ix[index]['TG'] == 0:
continue
elif df.ix[index]['CTTDs'] <= thetaCTTD and gap >= thetaGAP:
break
elif df.ix[index]['CTTDs'] <= thetaCTTD:
gap += 1
TC += df.ix[index]['TC']
if (TC < 1) or (start == index):
continue
TB.update({
k: {
'start': start,
'end': index - 1
}
})
k += 1
return TB
def get_unique_candidate(TB):
TB = tb.copy()
for key, value in tb.iteritems():
if key == len(tb) - 1:
break
if value['end'] == tb[key+1]['end']:
del TB[key+1]
elif value['start'] < tb[key+1]['start'] < value['end']:
TB[key]['end'] = tb[key+1]['start'] - 1
else:
continue
return TB
index += 1
stored_file = "textcandidate/textcandidate" + '{0:03}'.format(index) + ".csv"
tb = create_candidates(df, thetaCTTD, thetaGap)
TB = get_unique_candidate(tb)
filewrite = open(stored_file, "wb")
df_list = []
for (k, d) in TB.iteritems():
candidate_df = df.loc[d['start']:d['end']]
candidate_df['candidate'] = k
df_list.append(candidate_df)
output_df = pd.concat(df_list)
output_df.to_csv(stored_file)
writer = csv.writer(filewrite, lineterminator='\n')
filewrite.close
ThetaCTTD is 10.36 and thethaGap is 1.
The output is
The output means there are 2 candidates of text block . First the candiate of text block start from line number 215 and end line number 225 (like the pict bellow). And the other candidate of text block start from line number 500 and end line number 501.
My question is how to save the output into csv and not only the number of line but the range of the text block and the others column will appear as the output too?
My expected output is like the screenshot of candidate text block is like this one
Assuming your output is a list of dictionaries:
pd.concat([df.loc[d['start']:d['end']] for (k, d) in TB.iteritems()])
Note that we slice by label, so d['end'] will be included.
Edit: add the candidate number in a new column.
It's cleaner to write a loop than to do two concat operations:
df_list = []
for (k, d) in TB.iteritems():
candidate_df = df.loc[d['start']:d['end']]
candidate_df['candidate'] = k
df_list.append(candidate_df)
output_df = pd.concat(df_list)
It's also faster to concatenate all dataframes at once at the end.

Inserting a string into a list of integers

I am trying to make a script where a '-' is put in between all odd digits in a given number (ie 991453 would be 9-9-145-3), but for some reason python wont allow me to insert a str into a list of integers. The error I keep on getting is 'TypeError: not all arguments converted during string formatting'
My code:
def DashInsert(text):
list_int = map(int, list(text))
for i in xrange(len(list_int)-1):
if (list_int[i] % 2 == 1) and (list_int[i+1] % 2 == 1):
print i
list_int.insert(i+1,'-')
return list_int
Here is my actual input and error:
999472
0
Traceback (most recent call last):
File "DashInsert.py", line 17, in
print DashInsert(string)
File "DashInsert.py", line 11, in DashInsert
if (list_int[i] % 2 == 1) and (list_int[i+1] % 2 == 1):
TypeError: not all arguments converted during string formatting
Your error is because you are modifying the list that you are iterating over. When you insert - into the list, that becomes the target of % and you get a TypeError.
In Python, % is an operator for string formatting and '-' is a string; that is why you get a less than clear error:
>>> '-' % 2
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: not all arguments converted during string formatting
For strings you use % this way:
>>> 'x %s y %s %i' % ('and', 'is', 13)
'x and y is 13'
The fix to your code is to append to a separate list:
def DashInsert(s):
list_int = map(int, s)
rtr=[]
for i, e in enumerate(list_int[0:-1]):
rtr.append(str(e))
if e % 2 == 1 and list_int[i+1] % 2 == 1:
rtr.append('-')
rtr.append(str(list_int[-1]))
return rtr
You could do this through regex.
>>> import re
>>> s = 991453
>>> re.sub(r'(?<=[13579])(?=[13579])', r'-', str(s))
'9-9-145-3'
I suspect this is horrible code but it works-
number = 991453
number_list = []
for i, item in enumerate(str(number)):
try:
if int(item) % 2 != 0 and int(str(number)[i + 1]) % 2 != 0:
number_list.append(item + '-')
else:
number_list.append(item)
except:
number_list.append(item)
print(''.join(number_list))
Edit: Actually, there's no need to make a list so we can do this-
number = 991453
dash_number = ''
for i, item in enumerate(str(number)):
try:
if int(item) % 2 != 0 and int(str(number)[i + 1]) % 2 != 0:
dash_number += item + '-'
else:
dash_number += item
except:
dash_number += item
print(dash_number)
Edit: Here's how to do it without the try/except.
number = 991453
dash_number = ''
for i, item in enumerate(str(number)[:-1]):
if int(item) % 2 != 0 and int(str(number)[i + 1]) % 2 != 0:
dash_number += item + '-'
else:
dash_number += item
dash_number += str(number)[-1]
print(dash_number)

(Python) List index out of range when trying to pull data out of a .CSV?

This program pulls data out of two .CSV files, which are linked here:
https://drive.google.com/folderview?id=0B1SjPejhqNU-bVkzYlVHM2oxdGs&usp=sharing
It's supposed to look for anything after a comma in each of the two files, but my range logic is somehow wrong. I'm running a traceback error to line 101:
"line 101, in calc_corr: sum_smokers_value = sum_smokers_value + float(s_percent_smokers_data[r][1])
IndexError: list index out of range"
I assume it would do the same for the other times [k][1] shows up.
many thanks in advance if there's a way to fix this.
the program so far is:
# this program opens two files containing data and runs a corralation calculation
import math
def main():
try:
print('does smoking directly lead to lung cancer?')
print('''let's find out, shall we?''''')
print('to do so, this program will find correlation between the instances of smokers, and the number of people with lung cancer.')
percent_smokers, percent_cancer = retrieve_csv()
s_percent_smokers_data, c_percent_cancer_data = read_csv(percent_smokers, percent_cancer)
correlation = calc_corr(s_percent_smokers_data, c_percent_cancer_data,)
print('r_value =', corretation)
except IOError as e:
print(str(e))
print('this program has been cancelled. run it again.')
def retrieve_csv():
num_times_failed = 0
percent_smokers_opened = False
percent_cancer_opened = False
while((not percent_smokers_opened) or (not percent_cancer_opened)) and (num_times_failed < 5):
try:
if not percent_smokers_opened:
percent_smokers_input = input('what is the name of the file containing the percentage of smokers per state?')
percent_smokers = open(percent_smokers_input, 'r')
percent_smokers_opened = True
if not percent_cancer_opened:
percent_cancer_input = input('what is the name of the file containing the number of cases of lung cancer contracted?')
percent_cancer = open(percent_cancer_input, 'r')
percent_cancer_opened = True
except IOError:
print('a file was not located. try again.')
num_times_failed = num_times_failed + 1
if not percent_smokers_opened or not percent_cancer_opened:
raise IOError('you have failed too many times.')
else:
return(percent_smokers, percent_cancer)
def read_csv(percent_smokers, percent_cancer):
s_percent_smokers_data = []
c_percent_cancer_data = []
empty_list = ''
percent_smokers.readline()
percent_cancer.readline()
eof = False
while not eof:
smoker_list = percent_smokers.readline()
cancer_list = percent_cancer.readline()
if smoker_list == empty_list and cancer_list == empty_list:
eof = True
elif smoker_list == empty_list:
raise IOError('smokers file error')
elif cancer_list == empty_list:
raise IOError('cancer file error')
else:
s_percent_smokers_data.append(smoker_list.strip().split(','))
c_percent_cancer_data.append(cancer_list.strip().split(','))
return (s_percent_smokers_data, c_percent_cancer_data)
def calc_corr(s_percent_smokers_data, c_percent_cancer_data):
sum_smokers_value = sum_cancer_cases_values = 0
sum_smokers_sq = sum_cancer_cases_sq = 0
sum_value_porducts = 0
numbers = len(s_percent_smokers_data)
for k in range(0, numbers):
sum_smokers_value = sum_smokers_value + float(s_percent_smokers_data[k][1])
sum_cancer_cases_values = sum_cancer_cases_values + float(c_percent_cancer_data[k][1])
sum_smokers_sq = sum_smokers_sq + float(s_percent_smokers_data[k][1]) ** 2
sum_cancer_cases_sq = sum_cancer_cases_sq + float(c_percent_cancer_data[k][1]) ** 2
sum_value_products = sum_value_products + float(percent_smokers[k][1]) ** float(percent_cancer[k][1])
numerator_value = (numbers * sum_value_products) - (sum_smokers_value * sum_cancer_cases_values)
denominator_value = math.sqrt(abs((numbers * sum_smokers_sq) - (sum_smokers_value ** 2)) * ((numbers * sum_cancer_cases_sq) - (sum_cancer_cases_values ** 2)))
return numerator_value / denominator_value
main()
The values in each row of your data files are not comma separated, but rather tab separated. You need to change the ',' delimiter character you're splitting on for '\t'. Or perhaps use the csv module and tell it that your delimiter is '\t'. You can read more about the csv module in the documentation.

Categories

Resources