In my QtableView there are four column. The 0th col is for date in dd-MM-yyyy format. and the other three column contains string so for them sorting is not a problem (can be done using QSortFilterProxyModel class) but for col 0 i want sorting from right to left ( for both ascending and descending order).
Here is simple example of customSortingModel
self.tableView = QtGui.TableView(self)
self.table_model = QtGui.QStandardItemModel(0, 0)
self.proxyModel = CustomSortingModel(self)
self.proxyModel.setSourceModel(self.table_model)
self.tableView.setModel(self.proxyModel)
class CustomSortingModel(QtGui.QSortFilterProxyModel):
def lessThan(self,left,right):
col = left.column()
dataleft = left.data()
dataright = right.data()
if col == 2:
dataleft = float(dataleft)
dataright = float(dataright)
elif col == 3:
dataleft = QtCore.QDateTime.fromString(dataleft, "d/M/yy").addYears(100)
dataright = QtCore.QDateTime.fromString(dataright, "d/M/yy").addYears(100)
return dataleft < dataright
Related
I have Dataset like this:
ORDER_CODE
ITEM_ID
ITEM_NAME
TOTALPRICE
123
id1
name1
345
321
id2
name2
678
and Function for calculation which items was sold together. Which ones was most popular or more expensive
out:
ITEM_ID
sold together
id1
[ id33, id23, id12 ]
id2
[ id56, id663 ]
I using this Func:
def freq(df):
hit_list = [list of ID's]
result = pd.DataFrame(columns = ['ITEM_ID', 'sold together'])
unic_arc = df['ITEM_ID'].unique()
unic_num = df['ORDER_CODE'].unique()
data_arc ={}
data_num={}
for i in unic_arc:
data_arc[i] = {}
tturns = response_ur[['ITEM_ID', 'TOTALPRICE']].groupby(by = 'ITEM_ID', as_index = False).sum()
tturns = tturns.rename(columns = {'ITEM_ID' : 'inum', 'TOTALPRICE' : 'turn'})
for i in tqdm(unic_arc):
b = df[df['ITEM_ID'] == i]['ORDER_CODE'].values
for t in b:
a = df[df['ORDER_CODE'] == t]['ID'].values
if i in a:
for arc in a:
if int(arc) not in hit_list:
if arc != i:
if arc in data_arc[i]:
data_arc[i][arc]+=1
else:
data_arc[i][arc] = 1
dd = data_arc[i]
tmp = pd.DataFrame(columns = ['inum', 'freq'])
tmp['inum'] = data_arc[i].keys()
tmp['freq'] = data_arc[i].values()
tmp['inum'] = tmp['inum'].astype(str)
tturns['inum'] = tturns['inum'].astype(str)
tmp = pd.merge(tmp, tturns, on = 'inum', how = 'inner')
tmp = tmp.sort_values(by = ['freq', 'turn'], ascending = False)
if len(tmp['inum'].values) > 14:
inums = str(tmp['inum'].values[0:15]).replace("\n", "").replace(' ', ',').replace('\'', '')
else:
inums = str(tmp['inum'].values).replace("\n", "").replace(' ', ',').replace('\'', '')
result = res.append({'inum' : i, 'recs' : inums}, ignore_index = True)
return(result)
I try to add merge 1for addint ITEM_NAME in Func on any iteration, but it so long. My dataset have about 10kk rows
I need add to my output one more column with list of 'ITEM_NAME' of 'sold together' list items. And calc it fast?
UPD:
Here's what is needed:
item_id
list_of items
list_of_names
sum
id_01
[id, id, id, id]
[name, name....]
num
Where list_of items - 'list of most common' items, which were purchased with 'item_id'
This might do it:
import pandas as pd
df = pd.DataFrame( {
'ORDER_CODE':['123','321','123','123','321','555'],
'ITEM_ID':[1,2,5,5,4,6],
'ITEM_NAME':['name1','name2','name3','name4','name5','name6'],
'TOTALPRICE':[10,20,50,50,40,60]}
)
result = df.groupby("ORDER_CODE").agg({"ITEM_ID":list, "ITEM_NAME":list, "TOTALPRICE":"sum"})
Further good answer how to create a list in a group by aggregation:
I have a long list of names in Column A and its x,y coordinates in column B and C. I want the script to find:
a partial string match in the names in column a and also pick the one with the same y coordinates:
fine max and min values from x coordinates
append ‘start’ to the max x value cell name. Append ‘end’ to the min x value cell name
Sheet
`
import xlwings as xw
wb = xw.books.active
def rename():
sht = wb.sheets['new Sheet']
dict1 = {}
ccount = 0
mr = sht.range(
'A' + str(wb.sheets[name].cells.last_cell.row)).end('up').row
for cell in sht.range(f'A2:A{mr}'):
if 'DAD' in cell.value:
# x_coord = int(sht.range(f'B{cell.row}').value)
# y_coord = int(sht.range(f'C{cell.row}').value)
for yv in sht.range(f'C{cell.row}'):
if yv.value == yv.offset(1, 0).value:
ccount = ccount+1
if ccount >= 4:
y_coord = int(sht.range(f'C{yv.row}').value)
x_coord = int(sht.range(f'B{yv.row}').value)
if y_coord in dict1:
dict1[y_coord] += [x_coord]
else:
dict1[y_coord] = [x_coord]
count = 0
start = 2
for key, value in dict1.items():
value.sort(reverse=True)
for enum, i in enumerate(value, start):
if enum == start:
count = 2
first_name = [f'A{enum}+_START', i, key]
sht.range(f'A{enum}').value = first_name
elif enum == len(value)+start-1:
last_name = [f'A{enum}+_END', i, key]
sht.range(f'A{enum}').value = last_name
else:
between_names = [f'DAD_{count}', i, key]
sht.range(f'A{enum}').value = between_names
if enum % 2 == 0:
count += 1
Your requirement and the data you are working with is clearer so now I can give a better example.
The code will create a dictionary using the Y coord as the key and the x cords as values.
Using your updated information; the dictionary 'dict1' would have three keys 67475, 57475 & 47475. All X cordinates that have a name including the text 'DAD' is added to the corresponding Y coord dict key as a value.
Then simply iterate through each key, sort the values in reverse so we know the first item is MAX value and the last is MIN value and then print each Y coord group with generated name and X coord separately.
def rename(fn):
out_sheet = 'Output' # Name of sheet to write output to
dict1 = {} # Dictionary for unsorted data
header_list = [] # List for headers
### Use a context manager with Xlwings
with xw.App() as app:
### Open Workbook and input sheet
wb = xw.Book(fn)
sht = wb.sheets['new Sheet']
### Get maximum rows in the sheet
mr = sht.range('A' + str(wb.sheets[0].cells.last_cell.row)).end('up').row
### Extract the data ###
### Iterate column A rows 1 - max rows
for cell in sht.range(f'A1:A{mr}'):
### For row 1 capture the Header text in to list header_list
if cell.row == 1:
for header_cell in sht.range(f'A1:C1'):
header_list.append(header_cell.value)
continue
### Checks if the text 'DAD' exists in the current cell in the Name column (A)
### then adds the x coord to a dictionary with y coord as keys
if 'DAD' in cell.value.upper():
### Create dictionary of cell values that match the criteria using X coord as key
x_coord = int(sht.range(f'B{cell.row}').value)
y_coord = int(sht.range(f'C{cell.row}').value)
if y_coord in dict1:
dict1[y_coord] += [x_coord]
else:
dict1[y_coord] = [x_coord]
### Output the data ###
### dict1 has 1 key per unique Y coord, Values are all the x coords for that Y coord
#
### Create the sheet 'Output' for write if not exist and assign variable sht2
if out_sheet not in wb.sheet_names:
sht2 = wb.sheets.add(out_sheet, after=wb.sheets[0])
### Add headers to new sheet
sht2['A1'].value = header_list
sht2.range('A1:C1').font.bold = True
else:
sht2 = wb.sheets['Output']
### Print and write data to the screen and output sheet
count = 0
start = 2
for key, value in dict1.items():
### Sort x cords into descending order
value.sort(reverse=True)
for enum, i in enumerate(value,start):
if enum == start:
### First x cord is max value
count = 2
tstr_start = ['DAD_START', i, key]
print(tstr_start, end=' ')
sht2.range(f'A{enum}').value = tstr_start
elif enum == len(value)+start-1:
### Last x cord is min value
tstr_end = ['DAD_END', i, key]
print(tstr_end)
sht2.range(f'A{enum}').value = tstr_end
else:
### In Between cells use count for the naming
### Use the 'count value that gets incremented every 2nd loop
tstr_mid = [f'DAD_{count}', i, key]
print(tstr_mid, end=' ')
sht2.range(f'A{enum}').value = tstr_mid
if enum % 2 == 0:
count += 1
print('')
start = enum+2
print('\n--------------\n')
### Save Workbook
wb.save(filename)
if __name__ == '__main__':
filename = 'foo.xlsx'
rename(filename)
I have a list of items in a 'variable:value' format, but the same 'variable' can appear multiple times. The only thing I know is that all values that follow the 'ID' category belong to the same 'ID', so I know how many rows I need (3 in this example).
I need to create a dataframe from this list. The problem I am encountering is that I cannot add a string value to my DF ('could not convert str to float'). I am not sure how to proceed.
mylist = ['ID:1', 'Date: Oct 2', 'B:88', 'C:noun', 'D:44', 'ID:2', 'B:55', 'C:noun', 'D:45', 'ID:3',
'Date:Sept 5', 'B:55', 'C:verb']
categories = []
for i in mylist:
var = i.split(":")
categories.append(var[0])
variables = list(set(categories))
df = np.empty((3,len(variables)))
df = pd.DataFrame(df)
counter = -1
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
tracker = -1
for j in variables:
tracker = tracker + 1
if j == category:
float(value)
df[counter, tracker] = value
if category == "ID":
counter = counter + 1
float(value)
df[counter, 0] = value
In addition, I've tried converting the items in the list to dictionary, but I am not sure if that's the best way to achieve my goal:
df = np.empty((3,len(variables)))
df = pd.DataFrame(df, columns = variables)
mydict = {}
counter = -1
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
mydict = {category:value}
if category == "ID":
counter = counter + 1
df[counter] = pd.DataFrame.from_dict(mydict)
else:
df[counter] = pd.DataFrame.from_dict(mydict)
Edit:
I solved it. Code below:
df = np.empty((0,len(variables)))
df = pd.DataFrame(df, columns = variables)
mydict = {}
counter = 0
for i in mylist:
item = i.split(":")
category = item[0]
value = item[1]
mynewdef = {category:value}
counter = counter + 1
if counter == len(mylist):
df = df.append(mydict, ignore_index = True)
df = df.iloc[1:]
elif category == 'ID':
df = df.append(mydict, ignore_index = True)
mydict = {}
mydict.update(mynewdef)
else:
mydict.update(mynewdef)
Perhaps this works
df = pd.DataFrame([e.split(':') for e in my_list],
columns=['key', 'value'])
df = df.pivot(columns='key', values='value') #not tested
I am using merge to join two data frames together. The 2 data frames are data from a database table over two different dates. I need to work out what changed. The number of rows will be different, but I just to join the newer data set to the older data set with an inner join, and see what changed.
At the moment, I am taking advantage of the _x and _y naming of the data frames, and the .columns data to let me compare the fields for differences after the merge.
There must be an easier way to do this. I did try to use the new compare() method of 1.1.0, but it doesnt seem to like frames with a different shape (i.e. rows in my case), rendering it useless to me.
def get_changed_records(df_old, df_new, file_info):
join_keys = file_info["compare_col"].split(",");
old_file_name = file_info["old_file_name"]
new_file_name = file_info["new_file_name"]
print("Changed Records: JOIN DATA FRAMES ON COLUMNS: previous file ", old_file_name, " current_file_name ", " new file name ", new_file_name)
columns = df_new.columns
df_merged = df_new.merge(df_old, how='inner', on=join_keys, indicator=True)
changed_records = []
for idx, row in df_merged.iterrows():
changes = []
for col in columns:
if col not in join_keys:
after_col = col + '_x';
before_col = col + '_y';
else:
after_col = col
before_col = col
after_val = row[after_col];
before_val = row[before_col];
changed = False
if pd.isnull(before_val) or pd.isnull(after_val):
if pd.isnull(before_val) == False and pd.isnull(after_val) == True:
changed = True;
if pd.isnull(before_val) == True and pd.isnull(after_val) == False:
changed = True;
if pd.isnull(before_val) == True and pd.isnull(after_val) == True:
changed = False;
elif after_val != before_val:
print("COLUMN_CHANGE: ", col, " before ", before_val, " after ", after_val);
changed = True;
if changed == True:
print('-' * 50);
print('-Adding changes to result...')
changes.append(['COLUMN_CHANGE', col, before_val, after_val, row, join_keys]);
print(changes);
if len(changes) > 0:
changed_records.append(changes);
print("changed records ", len(changed_records));
print(changed_records);
return changed_records
I am trying to create an excel spreadsheet from a pandas.pivot_table.
I don't want to use to_excel as it:
Renders at half the speed
Doesn't allow cell formatting
Doesn't compute cell widths
Doesn't allow me to create other cells with formulas
etc.
create a dataframe from a list of lists
convert to a pivot table with pivot_table
convert to records with to_records
Now I can create a worksheet, but I need the column headers for the indexes, which I can save as they were specified when creating the pivot table, but how do I get the inferred column_names deduced from the distinct values?
import pandas
import datetime
import logging
import math
import random
from dateutil.relativedelta import relativedelta
import xlsxwriter
logging.basicConfig(level=logging.INFO)
def get_sales_data(*, num_records, num_custs, num_products, date_from, number_of_months):
print("number of months %s" % number_of_months)
sales = {}
rows = []
for cust_nbr in range(0, num_custs):
cust_name = "cust " + "{0:0>3}".format(cust_nbr)
for month_delta in range(0, number_of_months):
ship_date = date_from + relativedelta(months=month_delta)
for product_nbr in (0, num_products):
product = "product " + str(product_nbr)
qty = random.randint(0, 20)
if (qty > 0):
key = (cust_name, product, ship_date)
shipment = (cust_name, product, ship_date, qty)
sales[key] = shipment
for shipment in sales.values():
rows.append(shipment)
return rows
def to_excel(workbook, sheetname, dataframe):
worksheet = workbook.add_worksheet(sheetname)
row_index = 1
max_widths = [None] * len(dataframe[0])
for row in dataframe:
#max_widths = [None] * len(row)
col_index = 0
for datum in row:
if datum is not None:
if isinstance(datum, float):
if not math.isnan(datum):
worksheet.write(row_index, col_index, datum)
else:
worksheet.write(row_index, col_index, datum)
# print ("len(max_widths) %s col_index %s " % (len(max_widths),col_index))
if max_widths[col_index] is None or len(str(datum)) > max_widths[col_index]:
max_widths[col_index] = len(str(datum))
# if row_index < 5:
# print("r: %s c: %s %s" % (row_index, col_index, datum))
col_index += 1
row_index += 1
col_index = 0
for width in max_widths:
worksheet.set_column(col_index, col_index, width + 1)
col_index += 1
# Get a List of Lists
from_date = datetime.date(2015, 1, 1)
to_date = datetime.date(2017, 7, 1)
matrix = get_sales_data(num_records=3000, num_products=3, date_from=from_date, number_of_months=30, num_custs=1000)
# Get a dataframe
labels = ["cust_name", "product", "ship_date", "qty"]
dataframe = pandas.DataFrame.from_records(matrix, columns=labels)
print(dataframe)
# get pivot
pivot_table = pandas.pivot_table(dataframe, columns='ship_date', values='qty', index=['cust_name', 'product'])
print(pivot_table)
# convert back to records
records = pivot_table.to_records()
# get excel
output = open("/tmp/crosstab.xslx", "wb")
workbook = xlsxwriter.Workbook(output)
to_excel(workbook, "Cust Product By Month", records)
workbook.close()