Python pptx merging table rows - python

I have a table in powerpoint where (after rendering with some other function) every other row remains completely empty. I tried solving that by merging every empty row with the row below it, as follows:
def is_empty_row(row):
for cell in row.cells:
if len(cell.text):
return False
return True
def merge_empty_row(table,index): # Assumes no 2 consecutive rows are empty!
row = table.rows[index]
try:
next_row = table.rows[index+1]
except:
return
cell_1 = row.cells[0]
cell_2 = next_row.cells[len(next_row.cells)-1]
cell_1.merge(cell_2)
def fix_tables(document):
ppt = Presentation(document)
for slide in ppt.slides:
for shape in slide.shapes:
if shape.shape_type == MSO_SHAPE_TYPE.TABLE:
for index in range(len(shape.table.rows)):
if is_empty_row(shape.table.rows[index]):
merge_empty_row(shape.table, index)
docname = "".join(document.split(".")[0])
ppt.save(docname+'.out.pptx')
And I am calling this function from Django on a template pptx file, only to get the following error:
Exception Type: XMLSyntaxError at /amas/analysis/1178/report/download/34
Exception Value: Opening and ending tag mismatch: r line 2 and t, line 2, column 11532 (<string>, line 2)
Any ideas?

My first choice would be avoiding inserting empty rows. But if that weren't possible for some reason you could try deleting empty rows like this:
def delete_row(row):
tr = row._tr
tr.getparent().remove(tr)
rows = [table.rows[i] for i in range(len(rows))]
empty_rows = [r for r in rows if row_is_empty(r)]
for row in empty_rows:
delete_row(row)
You need to identify the empty rows separately beforehand because otherwise deleting them in the middle of iteration can screw up the references (change which row rows[i] points to).

Related

Is it possible to update a row of data using position of column (e.g. like a list index) in Python / SQLAlchemy?

I am trying to compare two rows of data to one another which I have stored in a list.
for x in range(0, len_data_row):
if company_data[0][0][x] == company_data[1][0][x]:
print ('MATCH 1: {} - {}'.format(x, company_data[0][0][x]))
# do nothing
if company_data[0][0][x] == None and company_data[1][0][x] != None:
print ('MATCH 2: {} - {}'.format(x, company_data[1][0][x]))
# update first company_id with data from 2nd
if company_data[0][0][x] != None and company_data[1][0][x] == None:
print ('MATCH 3: {} - {}'.format(x, company_data[0][0][x]))
# update second company_id with data from 1st
Psuedocode of what I want to do:
If data at index[x] of a list is not None for row 2, but is blank for row 1, then write the value of row 2 at index[x] for row 1 data in my database.
The part I can't figure out is if in SQLAlchemy you can do specify which column is being updated by an "index" (I think in db-land index means something different than what I mean. What I mean is like a list index, e.g., list[1]). And also if you can dynamically specify which column is being updated by passing a variable to the update code? Here's what I'm looking to do (it doesn't work of course):
def some_name(column_by_index, column_value):
u = table_name.update().where(table_name.c.id==row_id).values(column_by_index=column_value)
db.execute(u)
Thank you!

Python - ValueError: could not broadcast input array from shape (5) into shape (2)

I have written some code which takes in my dataframe which consists of two columns - one is a string and the other is an idea count - the code takes in the dataframe, tries several delimeters and cross references it with the count to check it is using the correct one. The result I am looking for is to add a new column called "Ideas" which contains the list of broken out ideas. My code is below:
def getIdeas(row):
s = str(row[0])
ic = row[1]
# Try to break on lines ";;"
my_dels = [";;", ";", ",", "\\", "//"]
for d in my_dels:
ideas = s.split(d)
if len(ideas) == ic:
return ideas
# Try to break on numbers "N)"
ideas = re.split(r'[0-9]\)', s)
if len(ideas) == ic:
return ideas
ideas = []
return ideas
# k = getIdeas(str_contents3, idea_count3)
xl = pd.ExcelFile("data/Total Dataset.xlsx")
df = xl.parse("Sheet3")
df1 = df.iloc[:,1:3]
df1 = df1.loc[df1.iloc[:,1] != 0]
df1["Ideas"] = df1.apply(getIdeas, axis=1)
When I run this I am getting an error
ValueError: could not broadcast input array from shape (5) into shape (2)
Could someone tell me how to fix this?
You have 2 option with apply with axis=1, ether you return a single value or a list of length that match the length your number of columns. if you match the number of columns in will be broadcast to the entire row. if you return a single value it will return a pandas Series
one work around would be not to use apply.
result = []
for idx, row in df1.iterrows():
result.append(getIdeas(row))
df1['Ideas'] = result

Google chart input data

I have a python script to build inputs for a Google chart. It correctly creates column headers and the correct number of rows, but repeats the data for the last row in every row. I tried explicitly setting the row indices rather than using a loop (which wouldn't work in practice, but should have worked in testing). It still gives me the same values for each entry. I also had it working when I had this code on the same page as the HTML user form.
end1 = number of rows in the data table
end2 = number of columns in the data table represented by a list of column headers
viewData = data stored in database
c = connections['default'].cursor()
c.execute("SELECT * FROM {0}.\"{1}\"".format(analysis_schema, viewName))
viewData=c.fetchall()
curDesc = c.description
end1 = len(viewData)
end2 = len(curDesc)
Creates column headers:
colOrder=[curDesc[2][0]]
if activityOrCommodity=="activity":
tableDescription={curDesc[2][0] : ("string", "Activity")}
elif (activityOrCommodity == "commodity") or (activityOrCommodity == "aa_commodity"):
tableDescription={curDesc[2][0] : ("string", "Commodity")}
for i in range(3,end2 ):
attValue = curDesc[i][0]
tableDescription[curDesc[i][0]]= ("number", attValue)
colOrder.append(curDesc[i][0])
Creates row data:
data=[]
values = {}
for i in range(0,end1):
for j in range(2, end2):
if j == 2:
values[curDesc[j][0]] = viewData[i][j].encode("utf-8")
else:
values[curDesc[j][0]] = viewData[i][j]
data.append(values)
dataTable = gviz_api.DataTable(tableDescription)
dataTable.LoadData(data)
return dataTable.ToJSon(columns_order=colOrder)
An example javascript output:
var dt = new google.visualization.DataTable({cols:[{id:'activity',label:'Activity',type:'string'},{id:'size',label:'size',type:'number'},{id:'compositeutility',label:'compositeutility',type:'number'}],rows:[{c:[{v:'AA26FedGovAccounts'},{v:49118957568.0},{v:1.94956132673}]},{c:[{v:'AA26FedGovAccounts'},{v:49118957568.0},{v:1.94956132673}]},{c:[{v:'AA26FedGovAccounts'},{v:49118957568.0},{v:1.94956132673}]},{c:[{v:'AA26FedGovAccounts'},{v:49118957568.0},{v:1.94956132673}]},{c:[{v:'AA26FedGovAccounts'},{v:49118957568.0},{v:1.94956132673}]}]}, 0.6);
it seems you're appending values to the data but your values are not being reset after each iteration...
i assume this is not intended right? if so just move values inside the first for loop in your row setting code

selectionModel and selectedRows used selected rows but PyQt4.QtCore.QModelIndex object at 0x12xxxxxx

def listedensecilensatirlar(self):
adada = self.ui.tableWidget.selectionModel().selectedRows()
print adada
I have chosen the line in each row I want to achieve but the model did not read the index. I choose what I want to get as text data contained in rows.
This is a picture of my problem: i.stack.imgur.com/APFPl.png
If you want to get the text from the items in the selected rows, you could try this:
indexes = tablewidget.selectionModel().selectedRows(column)
for index in sorted(indexes):
row = index.row()
rowtext = []
for column in range(tablewidget.columnCount()):
rowtext.append(tablewidget.item(row, column).text())
print(rowtext)
But note that selectedRows only get rows where all items are selected.
There is a good answer on the top, but try this one too.
indexRows = table.selectionModel().selectedRows()
for indexRow in sorted(indexRows):
row = indexRow.row()
rowText = table_model.item(row, column=number).text()
print(rowText)

How can I do windowed query on multiple columns primary key?

Based on example found here but I guess I'm not understanding it. This works for single column primary keys but fails on multiple ones.
This is my code
#classmethod
def column_windows(cls, q, columns, windowsize, where = None):
"""Return a series of WHERE clauses against
a given column that break it into windows.
Result is an iterable of tuples, consisting of
((start, end), whereclause), where (start, end) are the ids.
Requires a database that supports window functions,
i.e. Postgresql, SQL Server, Oracle.
Enhance this yourself ! Add a "where" argument
so that windows of just a subset of rows can
be computed.
"""
#Here is the thing... how to compare...
def int_for_range(start_id, end_id):
if end_id:
return and_(
columns>=start_id,
columns<end_id
)
else:
return columns>=start_id
if isinstance(columns, Column):
columns_k=(columns,)
else:
columns_k=tuple(columns)
q2=None
cols=()
for c in columns:
cols = cols + (c,)
if not q2:
q2=q.session.query(c)
else:
q2=q2.add_column(c)
q2 = q2.add_column(func.row_number().over(order_by=columns_k).label('rownum'))
q2=q2.filter(q._criterion).from_self(cols)
if windowsize > 1:
q2 = q2.filter("rownum %% %d=1" % windowsize)
for res in q2:
print res
intervals = [id for id, in q2]
while intervals:
start = intervals.pop(0)
if intervals:
end = intervals[0]
else:
end = None
yield int_for_range(start, end)
#classmethod
def windowed_query(cls, q, columns, windowsize):
""""Break a Query into windows on a given column."""
for whereclause in cls.column_windows(q,columns, windowsize):
for row in q.filter(whereclause).order_by(columns):
yield row
Now I have the problem when comparing the set of columns of the primary key. Well I guess kind of recursive clause generating function should do it... Let's try it...
Well, result is not what expected but got it to work: Now it really windows any query keeping all in place, multi column unique ordering, and so on:
Here is my code, hope it may be usefull for someone else:
#classmethod
def window_query(cls, q, windowsize, windows=None):
"""
q=Query object we want to window results
windowsize=The number of elements each window has
windows=The window, or window list, numbers: 1-based to query
"""
windowselect=False
if windows:
if not isinstance(windows,list):
windows=list(windows)
windowselect=True
#Appending u_columns to ordered counting subquery will ensure unique ordering
u_columns=list([col for col in cls.getBestUniqueColumns()])
#o_columns is the list of order by columns for the query
o_columns=list([col for col in q._order_by])
#we append columns from u_columns not in o_columns to ensure unique ordering but keeping the desired one
sq_o_columns=list(o_columns)
for col in u_columns:
if not col in sq_o_columns:
sq_o_columns.append(col)
sub=None
#we select unique columns in subquery that we'll need to join in parent query
for col in u_columns:
if not sub:
sub=q.session.query(col)
else:
sub=sub.add_column(col)
#Generate a tuple from sq_o_columns list (I don't know why over() won't accept list itself TODO: more elegant
sq_o_col_tuple=()
for col in sq_o_columns:
sq_o_col_tuple=sq_o_col_tuple + (col,)
#we add row counting column, counting on generated combined ordering+unique columns tuple
sub = sub.add_column(func.row_number().over(order_by=sq_o_col_tuple).label('rownum')).filter(q._criterion)
#Prepare sub query to use as subquery (LOL)
sub=sub.subquery('lacrn')
#Prepare join ON clauses epxression comparing unique columns defined by u_columns
joinclause=expression.BooleanClauseList()
for col in u_columns:
joinclause=joinclause.__and__(col == sub.c[col.key])
#Make the joining
q=q.join(sub,joinclause
)
i=-1
while True:
#We try to query windows defined by windows list
if windowselect:
#We want selected-windows-results to returned
if windows:
i=windows.pop(0)-1
else:
break
else:
#We want all-windows-results to be returned
i=i+1
res=q.filter(and_(sub.c.rownum > (i*windowsize), sub.c.rownum <= ((i+1)*windowsize))).all()
if not (res or windowselect):
#We end an all-windows-results because of no more results, we must check if is selected-window-query
#because of selected-window-results may not exist and the are unordered
#EX: [1,2,9999999999999,3] : Assuming the third page required has no results it will return pages 1, 2, and 3
break
for row in res:
yield row

Categories

Resources