Very simply, this code should be outputting a summary file, which it isn't, in a specified output directory. I can't figure out why
I have tried editing the configuration as well as changing directories.
import os
import pandas as pd
def summarise(indir, outfile):
os.chdir(indir)
filelist = ""
dflist = []
colnames = ["DSP Code", "Report Date", "Initial Date", "End Date", "Transaction Type", "Sale Type",
"Distribution Channel", "Products Origin ID", "Product ID", "Artist", "Title", "Units Sold",
"Retail Price", "Dealer Price", "Additional Revenue", "Warner Share", "Entity to be billed",
"E retailer name", "E retailer Country", "End Consumer Country", "Price Code", "Currency Code"]
for filename in filelist:
print(filename)
df = pd.read_csv('SYB_M_20171001_20171031.txt', header=None, encoding='utf-8', sep='\t',
names=colnames, skiprows=1, usecols=['Units Sold', 'Dealer Price', 'End Consumer Country',
'Currency Code'])
# Multiplying units by dealer price will give you sum of file
df['Sum of Revenue'] = df['Units Sold'] * df['Dealer Price']
# Get those first two columns
d = {'Sum of Revenue': 'Total Revenue', 'Units Sold': 'Total Units'}
for col, newcol in d.items():
df.loc[df.index[0], newcol] = df[col].sum()
# Add the rest for every country:
s = df.groupby('End Consumer Country')['Units Sold'].sum().to_frame().T.add_suffix(' Total')
s.index = [df.index[0]]
df = pd.concat([df, s], 1, sort=False)
df.to_csv(outfile + r"\output.csv", index=None)
dflist.append(filename)
summarise(r"O:\James Upson\Sound Track Your Brand Testing\SYB Test",
r"O:\James Upson\Sound Track Your Brand Testing\SYB Test Formatted")
I am expecting an output file called 'output.csv'
Hmm, ok, I see filelist = "" and then for filename in filelist:
Your trying to loop over an empty list
Related
The code below creates a table with the values of all columns centered and the column titles also centered. I align the values in the local_team column to the right and the values in the visitor_team column to the left:
ef dfi_image(list_dict,name_file):
df = pd.DataFrame(list_dict)
df = df[['time','competition','local_team','score','visitor_team','channels']]
df = df.style.set_table_styles([dict(selector='th', props=[('text-align', 'center'),('background-color', '#40466e'),('color', 'white')])])
df.set_properties(**{'text-align': 'center'}).hide(axis='index')
df.set_properties(subset=['local_team'], **{'text-align': 'right'}).hide(axis='index')
df.set_properties(subset=['visitor_team'], **{'text-align': 'left'}).hide(axis='index')
dfi.export(df, name_file + ".png")
dfi_image(games,'table_dfi.png')
But the title of local_team doesn't align to the right and neither the title of visitor_team aligns to the left and I need them with equal alignment to the values of these columns.
So i try:
def dfi_image(list_dict,name_file):
df = pd.DataFrame(list_dict)
df = df[['time','competition','local_team','score','visitor_team','channels']]
df = df.style.set_table_styles([
dict(selector='th', props=[('text-align', 'center'), ('background-color', '#40466e'), ('color', 'white')]),
dict(selector='th.col_heading.local_team', props=[('text-align', 'right')]),
dict(selector='th.col_heading.visitor_team', props=[('text-align', 'left')])
])
df.set_properties(**{'text-align': 'center'}).hide(axis='index')
df.set_properties(subset=['local_team'], **{'text-align': 'right'}).hide(axis='index')
df.set_properties(subset=['visitor_team'], **{'text-align': 'left'}).hide(axis='index')
dfi.export(df, name_file + ".png")
dfi_image(games,'table_dfi.png')
how to proceed for the result is equal this image:
Here is one way to do it with Pandas Styler.set_table_styles:
import pandas as pd
df = pd.DataFrame(
{
"col1": [1000000000, 2000000000],
"col2": [4000000000, 5000000000],
"col3": [7000000000, 8000000000],
"col4": [10000000000, 11000000000],
"col5": [13000000000, 14000000000],
}
)
To center align all column headers except col2 (right align) and col3 (left align):
styles = (
{
key: [{"selector": "th", "props": [("text-align", "center")]}]
for key in df.columns
if key not in ["col2", "col3"]
}
| {"col2": [{"selector": "th", "props": [("text-align", "right")]}]}
| {"col3": [{"selector": "th", "props": [("text-align", "left")]}]}
)
df.style.set_table_styles(styles)
Output in a Jupyter notebook cell:
engine = sqlalchemy.create_engine('sqlite:///CRYPTO.db')
client = Client()
info = client.get_exchange_info()
# Create empty dict and get klines in iteration
columns = [
'Time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close Time', 'Asset Volume',
'Number Of Trades', 'Buy Base', 'Buy Quote', 'Ignore',
]
klines = {}
for symbol in USDTsymbol:
r_klines = client.get_historical_klines(
symbol, Client.KLINE_INTERVAL_1MONTH,
"1 April 2018", "1 DEC, 2020"
)
r_klines2 = client.get_historical_klines(
symbol, Client.KLINE_INTERVAL_1WEEK,
"1 JAN 2021", "1 JUNE, 2021"
df = pd.DataFrame(r_klines + r_klines2, columns=columns)
klines[symbol] = df
print(df)
df.Time = pd.to_datetime(df.Time, unit='ms')
for df in klines:
data = klines[df]
data[columns].to_sql(symbol, engine, if_exists='replace')
-How to update database from last left off instead of redownloading data?
-->Will appreciate your help. Thank you
I want to use the .diff() function on the log_price column in my for loops. What I am after is the old log price value - the new log price value from the df_DC_product data frame. When I try to use .diff() inside the for loops it only returns NaN values. Any thoughts why this might be happening? Thank you for your help.
DC_list = data4['Geography'].drop_duplicates().tolist()
Product_List = data4['Product'].drop_duplicates().tolist()
# create multiple empty lists to store values in:
my_dict = {
"Product" : [],
"Geography" : [],
"Base Dollar Sales": [],
"Base Unit Sales" :[],
"Price Numerator" : [],
"Price Denominator": [],
"Demand Numerator" : [],
"Demand Denominator" : [],
"% Change in Price" : [],
"% Change in Demand": [],
"Price Elasticity of Demand" : []
}
dc_product_ped_with_metrics_all = []
for DC in DC_list:
df_DC = data4.copy()
# # Filtering to the loop's current DC
df_DC = df_DC.loc[(df_DC['Geography'] == DC)]
df_DC = df_DC.copy()
# Making a list of all of the current DC's Product to loop through
Product_list = df_DC['Product'].drop_duplicates().tolist()
for Product in Product_list:
df_DC_product = df_DC.copy()
# # Filtering to the Product
df_DC_product = df_DC_product.loc[(df_DC_product['Product'] == Product)]
df_DC_product = df_DC_product.copy()
# create container:
df_DC_product['pn'] = df_DC_product.iloc[:,5].diff()
df_DC_product['price_d'] = np.divide(df_DC_product.iloc[:,5].cumsum(),2)
df_DC_product['dn'] = df_DC_product.iloc[:,6].diff()
df_DC_product['dd'] = np.divide(df_DC_product.iloc[:,6].cumsum(),2)
df_DC_product['% Change in Demand'] = np.divide(df_DC_product['dn'],df_DC_product['dd'])*100
df_DC_product['% Change in Price'] = np.divide(df_DC_product['pn'],df_DC_product['price_d'])*100
df_DC_product['ped']= np.divide(df_DC_product['% Change in Demand'], df_DC_product['% Change in Price'])
Product = Product,
DC = DC
sales = df_DC_product['Base_Dollar_Sales'].sum()
qty = df_DC_product['Base_Unit_Sales'].sum()
price = df_DC_product['Price'].mean()
log_price = df_DC_product['log_price'].mean()
log_units = df_DC_product['log_units'].sum()
price_numerator = df_DC_product['pn'].mean()
price_denominator = df_DC_product['price_d'].sum()
demand_numerator = df_DC_product['dn'].mean()
demand_denominator = df_DC_product['dd'].sum()
delta_demand = df_DC_product['% Change in Demand'].sum()
delta_price = df_DC_product['% Change in Price'].mean()
ped = df_DC_product['ped'].mean()
dc_product_ped_with_metrics = [
Product,
DC,
sales,
qty,
price,
price_numerator,
price_denominator,
demand_numerator,
demand_denominator,
delta_demand,
delta_price,
ped
]
dc_product_ped_with_metrics_all.append(dc_product_ped_with_metrics)
columns = [
'Product',
'Geography',
'Sales',
'Qty',
'Price',
'Price Numerator',
'Price Denominator',
'Demand Numerator',
'Demand Denominator',
'% Change in Demand',
'% Change in Price',
'Price Elasticity of Demand'
]
dc_product_ped_with_metrics_all = pd.DataFrame(data=dc_product_ped_with_metrics_all, columns=columns)
dc_product_ped_with_metrics_all
.append() doesn't update your dataframe inplace. You need to reassign the dataframe.
for DC in DC_list:
# your code
for Product in Product_list:
# your code
dc_product_ped_with_metrics_all = dc_product_ped_with_metrics_all.append(dc_product_ped_with_metrics)
I have two excel files from two different wholesalers with products and stock quantity information.
Some of the products in the two files are common, so they exist in both files.
The number of products in the files is different e.g. the first has 65000 products and the second has 9000 products.
I need to iterate through the products of the first file based on the common column 'EAN CODE' and check if the current product exists also in the EAN column of the 2nd file.
Afterwards check which product has the lower price (which has stock > 0) and print the matching row of this product to another output excel file.
import os
import re
from datetime import datetime
import pandas
from utils import recognize_excel_type
dataframes = []
input_directory = 'in'
for file in os.listdir(input_directory):
file_path = os.path.join(input_directory, file)
if file.lower().endswith('xlsx') or file.lower().endswith('xls'):
dataframes.append(
pandas.read_excel(file_path)
)
elif file.lower().endswith('csv'):
dataframes.append(
pandas.read_csv(file_path, delimiter=';')
)
combined_dataframe = pandas.DataFrame(columns=['Price', 'Stock', 'EAN Code'])
for dataframe in dataframes:
this_type = recognize_excel_type(dataframe)
if this_type == 'DIFOX':
dataframe.rename(columns={
'retail price': 'Price',
'availability (steps)': 'Stock',
'EAN number 1': 'EAN Code',
}, inplace=True)
tuned_dataframe = pandas.DataFrame(
dataframe[combined_dataframe.columns],
)
combined_dataframe = combined_dataframe.append(tuned_dataframe, ignore_index=True)
elif this_type == 'ECOM_VGA':
headers = dataframe.iloc[2]
dataframe = dataframe[3:]
dataframe.columns = headers
dataframe.rename(columns={
'Price (€)': 'Price',
'Stock': 'Stock',
'EAN Code': 'EAN Code',
}, inplace=True)
tuned_dataframe = pandas.DataFrame(
dataframe[combined_dataframe.columns],
)
combined_dataframe = combined_dataframe.append(tuned_dataframe, ignore_index=True)
elif this_type == 'MAXCOM':
dataframe.rename(columns={
'VK-Preis': 'Price',
'Verfügbar': 'Stock',
'EAN-Code': 'EAN Code',
}, inplace=True)
tuned_dataframe = pandas.DataFrame(
dataframe[combined_dataframe.columns],
)
combined_dataframe = combined_dataframe.append(tuned_dataframe, ignore_index=True)
combined_dataframe.dropna(inplace=True)
combined_dataframe['Stock'].replace('> ?', '', inplace=True, regex=True)
combined_dataframe['Price'].replace('> ?', '', inplace=True, regex=True)
combined_dataframe = combined_dataframe.astype(
{'Stock': 'int32', 'Price': 'float32'}
)
combined_dataframe = combined_dataframe[combined_dataframe['Stock'] > 0]
combined_dataframe = combined_dataframe.loc[combined_dataframe.groupby('EAN Code')['Price'].idxmin()]
combined_dataframe.to_excel('output_backup/output-{}.xlsx'.format(datetime.now().strftime('%Y-%m-%d')), index=False)
if os.path.exists('output/output.xlsx'):
os.remove("output/output.xlsx")
combined_dataframe.to_excel('output/output.xlsx'.format(datetime.now().strftime('%Y-%m-%d')), index=False)
print('Output saved to output directory')
for file in os.listdir(input_directory):
file_path = os.path.join(input_directory, file)
os.remove(file_path)
print('All input files removed')
I have a data frame which contains 3 columns(Issue id, Creator, Versions).I need to extract the row which does not contain the value "<JIRA Version" in the "versions" column(Which is the third and fifth row in my case.Similarly there could be multiple rows in the data frame)
Below is the code i'm trying, but this is actually printing all the rows from the data frame. Any help/suggestions are appreciated.
allissues = []
for i in issues:
d = {
'Issue id': i.id,
'creator' : i.fields.creator,
'resolution': i.fields.resolution,
'status.name': i.fields.status.name,
'versions': i.fields.versions,
}
allissues.append(d)
df = pd.DataFrame(allissues, columns=['Issue id', 'creator', 'versions'])
matchers = ['<JIRA Version']
for ind in df.values:
if matchers not in df.values:
print(df['versions'][ind], df['Issue id'][ind])
some minor changes in your code:
allissues = []
for i in issues:
d = {
'Issue id': i.id,
'creator' : i.fields.creator,
'resolution': i.fields.resolution,
'status.name': i.fields.status.name,
'versions': i.fields.versions,
}
allissues.append(d)
df = pd.DataFrame(allissues, columns=['Issue id', 'creator', 'versions'])
matchers = '<JIRA Version'
for ind,row in df.iterrows():
if matchers not in row.versions:
print(row['versions'], row['Issue id'])