Getting the subheaders in openpyxl - python

I'm currently using openpyxl to use an excel file that has mul-indices (2 Levels of headers) and i'm trying to do the operations depending on the subheaders a header has.
I have some exp. doing this in pandas but for this Project i have to use openpyxl which i barly made any use of before.
Only thing i could think off is the manual way:
iterating over the rows
saving the first row as header and 2nd row as subheader
do some cleaning.
manually save the headers with their subheaders in dics. then filleing in the values by iterating over all the cols
my code is as follows:
#reading the excel file
path = r'path to file'
wb = load_workbook(path) #loading the excel table
ws = wb.active #grab the active worksheet
#Setting the doc Header
for h in ws.iter_rows(max_row = 1, values_only = True): #getting the first row (Headers) in the table
header = list(h)
for sh in ws.iter_rows(min_row = 1 ,max_row = 2, values_only = True):
sub_header = list(sh)
#removing all of the none Values
header = list(filter(None, header))
sub_header = list(filter(None, sub_header))
print(header)
print(sub_header)
#creating a list of all the Columns in the excel file
col_list = []
for col in ws.iter_cols(min_row=3,min_col = 1): #Iteration over every single row starting from the third row since first two are the headers
col = [cell.value for cell in col] #Creating a list from each row
col = list(filter(None, col)) #removing the none values from each row
col_list.append(col) #creating a list of all rows (starting from the 3d one)
#print (col_list)
But i'm sure there must be a better way that i wasnt able to find in the docs or by checking this website.
Thanks in advance!
My goal in the end is to automate this part of my code by iterating over the header and use the subheaders of that head and their values each time
code:
#bulding the templates using yattag "yattag.org"
doc , tag , text = Doc().tagtext()
#building the tags of the xml file
with tag("Data"): #root tag
for row in row_list :
with tag("Row"):
with tag("Input"):
with tag(header[0].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[0] + " the Precentage of Students with "+ sub_header[0] + " is "+ str(row[1]) + " whereas the " + sub_header[1] + " are " + str(row[2]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[0] + " | " + sub_header[0]+ " | " + str(row[1]) + " | " + sub_header[1] + " | " + str(row[2]))
with tag(header[1].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[1] + " the Precentage of Students with "+ sub_header[2] + " is "+ str(row[3]) + " whereas the " + sub_header[3] + " are " + str(row[4]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[1] + " | " + sub_header[2]+ " | " + str(row[3]) + " | " + sub_header[3] + " | " + str(row[4]))
with tag(header[2].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[2] + " the Precentage of Students with "+ sub_header[4] + " is "+ str(row[5]) + " whereas the " + sub_header[5] + " are " + str(row[6]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[2] + " | " + sub_header[4]+ " | " + str(row[5]) + " | " + sub_header[5] + " | " + str(row[6]))
with tag(header[3].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[3] + " the Precentage of Students with "+ sub_header[6] + " is "+ str(row[7]) + " whereas the " + sub_header[7] + " are " + str(row[8]) +" and for " + sub_header[8] + str(row[9]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[3] + " | " + sub_header[6]+ " | " + str(row[7]) + " | " + sub_header[7] + " | " + str(row[8]) + " | " + sub_header[8] + " | " + str(row[9]))
with tag(header[4].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[4] + " the Precentage of Students with "+ sub_header[9] + " is "+ str(row[10]) + " whereas the " + sub_header[10] + " are " + str(row[11]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[4] + " | " + sub_header[9]+ " | " + str(row[10]) + " | " + sub_header[10] + " | " + str(row[11]))
with tag(header[5].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[5] + " the Precentage of Students with "+ sub_header[11] + " is "+ str(row[12]) + " whereas the " + sub_header[12] + " are " + str(row[13]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[5] + " | " + sub_header[11]+ " | " + str(row[12]) + " | " + sub_header[12] + " | " + str(row[13]))
with tag(header[6].replace(' ','_').replace('\n','_')):
text("In " + dic[row[0]]+" the precentage of Students " + " regarding the " + header[6] + " the Precentage of Students with "+ sub_header[13] + " is "+ str(row[14]) + " whereas the " + sub_header[14] + " are " + str(row[15]) )
with tag("Row_Data"):
text(dic[row[0]] + " | " + header[6] + " | " + sub_header[13]+ " | " + str(row[14]) + " | " + sub_header[14] + " | " + str(row[15]))
#print(doc.getvalue())
result = indent(
doc.getvalue(),
indentation=' ',
indent_text=True
)
#saving the xml file
with open("output.xml", "w") as f:
f.write(result)

Unless Pandas is completely off the table I think you might be able to do something pandas and openpyxl. The documentation mentions reading data from openpyxl into a pandas dataframe: Working with Pandas and Numpy.
Could you use:
data = ws.values
df = DataFrame(data[2:,:], index=data[0], columns=data[1])
There may be some filtering necessary with regards to the None values.

Related

Below piece of code is not deleting records but the records are deleted from the query when run manually

def complete_stage_purge_process(self, target_cnxn, stage_table, process_cd):
self.logger.debug(datetime.now())
self.logger.debug('complete_stage_purge_process')
delete_dt = datetime.today() - timedelta(days=30)
delete_dt = str(delete_dt)
run_pk_sql = "select run_pk from " + schemaName.PROCESS.value + "." + tableName.RUN_LOG.value + " where " + ProcessRunlog.ETL_MODIFIED_DTM.value + " <= '" + delete_dt + "' and " + \
ProcessRunlog.PROCESS_PK.value + " = (select " + ProcessRunlog.PROCESS_PK.value + " from " + schemaName.PROCESS.value + "." + \
tableName.PROCESS.value + " where " + \
Process.PROCESS_CODE.value + " = '" + process_cd + "') "
delete_sql = "delete from " + schemaName.STAGE.value + "." + stage_table + " where run_pk in (" + run_pk_sql + ")"
print(delete_sql)
print(target_cnxn)
try:
trgt_cursor = target_cnxn.cursor()
trgt_cursor.execute(delete_sql)
self.logger.debug("deletes processed successfully ")
except:
self.logger.exception('Error in processing deletes')
raise
But when added commit after trgt_cursor.execute(delete_sql) then below error is thrown. Could someone please help on how to handle this
AttributeError: 'psycopg2.extensions.cursor' object has no attribute 'commit'

How to fix stopping loop in following code

I'm working at parser. Loop breaks after exception. Need ur help
def requestBarter():
response = requests.get(api url)
return response.json();
def responsePrint(id, json_data):
title = json_data[id]["title"]
tradable = json_data[id]["tradable"]
wishlist = json_data[id]["wishlist"]
library = json_data[id]["library"]
bundles = json_data[id]["bundles"]
cards = json_data[id]["cards"]
userreviews = json_data[id]["userreviews"]
print("ID: " + id + " | Titile: " + title + " | Tradable: " + str(tradable) + " | Wishlist: " + str(
wishlist) + " | Library: " + str(
library) + " | Bundles: " + str(bundles) + " | Cards: " + str(cards) + " | Userreviews: " + str(userreviews))
def responsePrintOnException(id, json_data):
title = json_data[id]["title"]
tradable = json_data[id]["tradable"]
wishlist = json_data[id]["wishlist"]
library = json_data[id]["library"]
bundles = json_data[id]["bundles"]
cards = json_data[id]["cards"]
print("ID: " + id + " | Titile: " + title + " | Tradable: " + str(tradable) + " | Wishlist: " + str(
wishlist) + " | Library: " + str(
library) + " | Bundles: " + str(bundles) + " | Cards: " + str(cards))
def getAll():
try:
json_data = requestBarter()
for id in json_data:
responsePrint(id, json_data)
except KeyError:
responsePrintOnException(id, json_data)
pass
if __name__ == '__main__':
getAll()
After KeyError getting out of loop so need help in the following code
Expected over 90000 lines. actual output - 30

How to get new line

How can I print a new line on the output file? When I try to add the new line with "/n" it just prints /n
This is what I have so far.
``
inputFile = open("demofile1.txt", "r")
outFile = open("Ji
string = line.split(',')
go =(string)[3::]
bo = [float(i) for i in go]
total = sum(bo)
pine = ("%8.2f"%total)
name = string[2] + "," + " " + string[1]
kale = (string[0] + " " + name + " " + "/n")
se)
Current Result
8
53 Baul
A999999
You need to use \n, not /n. So this line:
kale = (string[0] + " " + name + " " + "/n")
Should be:
kale = (string[0] + " " + name + " " + "\n")
Also, please do consider using a str formatter, so all these lines:
go =(string)[3::]
bo = [float(i) for i in go]
total = sum(bo)
pine = ("%8.2f"%total)
name = string[2] + "," + " " + string[1]
kale = (string[0] + " " + name + " " + "/n")
str1 = ''.join(kale)
str2 = ''.join(pine)
outFile.write(str1 + " " + str2 + " ")
Will become:
outFile.write("{} {} {:8.2f}\n".format(string[0], string[2] + ", " + string[1], sum(bo))

Python error: string index out of range

I try to update a game of n in a row. But when I try to update the array matrix i get the "string out of range error.
I made a while statement with ind < len(board_height).
What am I doing wrong here?
Traceback (most recent call last):
File "matrix.py", line 61, in <module>
drop_disk(print_board(1))
File "matrix.py", line 23, in print_board
(matrix[0][4]) + " " + str(matrix[0][5]) + " " + str(matrix[0][6]) +" |")
IndexError: string index out of range
This is what my terminal spits out at me.
import tkinter
def print_board(y):
"""Prints the board"""
matrix = [
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0]]
matrix = str(y)
print("\n")
print("| " + str(matrix[0][0]) + " " + str(matrix[0][1]) + " " + str(matrix[0][2]) + " " + str(matrix[0][3]) + " " + str
(matrix[0][4]) + " " + str(matrix[0][5]) + " " + str(matrix[0][6]) +" |")
print("| " + str(matrix[1][0]) + " " + str(matrix[1][1]) + " " + str(matrix[1][2]) + " " + str(matrix[1][3]) + " " + str
(matrix[1][4]) + " " + str(matrix[1][5]) + " " + str(matrix[1][6]) +" |")
print("| " + str(matrix[2][0]) + " " + str(matrix[2][1]) + " " + str(matrix[2][2]) + " " + str(matrix[2][3]) + " " + str
(matrix[2][4]) + " " + str(matrix[2][5]) + " " + str(matrix[2][6]) +" |")
print("| " + str(matrix[3][0]) + " " + str(matrix[3][1]) + " " + str(matrix[3][2]) + " " + str(matrix[3][3]) + " " + str (matrix[3][4]) + " " + str(matrix[3][5]) + " " + str(matrix[3][6]) +" |")
print("| " + str(matrix[4][0]) + " " + str(matrix[4][1]) + " " + str(matrix[4][2]) + " " + str(matrix[4][3]) + " " + str (matrix[4][4]) + " " + str(matrix[4][5]) + " " + str(matrix[4][6]) +" |")
print("| " + str(matrix[5][0]) + " " + str(matrix[5][1]) + " " + str(matrix[5][2]) + " " + str(matrix[5][3]) + " " + str (matrix[5][4]) + " " + str(matrix[5][5]) + " " + str(matrix[5][6]) +" |")
print("| " + str(matrix[6][0]) + " " + str(matrix[6][1]) + " " + str(matrix[6][2]) + " " + str(matrix[6][3]) + " " + str (matrix[6][4]) + " " + str(matrix[6][5]) + " " + str(matrix[6][6]) +" |")
print("="*17)
return matrix
def drop_disk(matrix):
"Drops the disk in one of the seven columns"
board_height = 7
empty = 0
row = 0
col = 0
ind = 0
player1 = input("Wat is de naam van speler 1?\n")
player2 = input("Wat is de naam van speler 2?\n")
column = int(input(player1 + ", In welke colom wil je je stuk laten vallen (1-7)?\n"))
while ind < len(board_height):
for y in range(board_height):
if matrix[row][col] == empty:
y = matrix[row -1][col -1] = 1
ind += 1
return y
return -1
drop_disk(print_board(1))
print_board(1)
error is because you are reassigning matrix to str(y)
so matrix changes to str(y) which is actually '1'.
drop_disk(print_board(1)) # calls print_board(1) and sets y = '1'
def print_board(y): # assign y='1'
matrix = [
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0],
[0,0,0,0,0,0,0]]
matrix = str(y) # matrix = 1

Get field name from an object retrieved by orm query

I have a small Django app, and when I query my DB with django orm I get my objects list.
What I want is the name of the db column where the query has matched.
Is possible?
Thanks in advance!
results = Verbs.objects.filter(
Q(fps__icontains = " " + word_name + " ") | Q(fps__endswith = " " + word_name) | Q(fps__startswith = word_name + " ") |
Q(sps__icontains = " " + word_name + " ") | Q(sps__endswith = " " + word_name) | Q(sps__startswith = word_name + " ") |
Q(tps__icontains = " " + word_name + " ") | Q(tps__endswith = " " + word_name) | Q(tps__startswith = word_name + " ") |
Q(fpp__icontains = " " + word_name + " ") | Q(fpp__endswith = " " + word_name) | Q(fpp__startswith = word_name + " ") |
Q(spp__icontains = " " + word_name + " ") | Q(spp__endswith = " " + word_name) | Q(spp__startswith = word_name + " ") |
Q(tpp__icontains = " " + word_name + " ") | Q(tpp__endswith = " " + word_name) | Q(tpp__startswith = word_name + " ")
).all()
What I want is the name of field where the query as matched. For example: fps or fpp ...
I think this is what you want--for each returned row it will print the matched fields, indented:
fields = ('fps', 'sps', 'tps', 'fpp', 'spp', 'tpp')
for r in results.values():
print 'Row with id=%s:' % r['id']
for f in fields:
if word_name in str(r[f]):
print ' Field %s matches' % f
Edited to account for non-string values and to look only at the desired fields.

Categories

Resources