Related
I've been trying to edit a dataframe into a formated Excel spreadsheet for work, and though most issues were solved elsewhere (Formatting an Excel file with XlsxWriter - Locale settings appear not to be applied to the output file) some remained.
The problem now is with the header and border formatting passed to date columns:
[]
I expected to have borders on all sides of each cell, but colmuns J, L and M, which receive information in datetime type, present border formatting issues (no borders actually). Also, there is an extra colmun (R) which is formated.
Keep in mind that columns B, C and F were also formated using xlsxwriter and have no border formatting problems.
Below is the code I have so far:
# Importar bibliotecas
import os
from typing import Self
import pandas as pd
import pandas.io.formats.excel
import pandas.io.excel
import numpy as np
import time
import xlsxwriter
template_excel_file = r"C:\CriarTabelaOpme\Modelo Material Alto Custo - Intranet.xlsx"
depara_nome_espec_file = r"C:\CriarTabelaOpme\Especialidade_Dicionario.csv"
report_csv_file = r"C:\CriarTabelaOpme\ReportServiceIntranet.csv"
csv_dataframe = pd.read_csv(report_csv_file, sep = ',', encoding = "ISO-8859-1", engine = 'python', index_col = None, names = ['TIPO', 'CODIGO', 'PTU',
'DESCRICAO', 'FORNECEDOR', 'VALOR', 'COD_PRINCP_ATIVO', 'PRINCIPIO_ATIVO', 'ANVISA', 'VALIDADE_RMS', 'FABRICANTE', 'DT_ATUALIZACAO', 'PTU_LIMITE', 'COD_ESP', 'NOME_ESPEC', 'REFERENCIA', 'OBSERVACAO'])
csv_dataframe.insert(16, "", "")
csv_dataframe["VALOR"] = csv_dataframe["VALOR"].apply(lambda x: x.replace(",", "")).astype('float')
csv_dataframe["VALIDADE_RMS"] = pd.to_datetime(csv_dataframe["VALIDADE_RMS"])
csv_dataframe["DT_ATUALIZACAO"] = pd.to_datetime(csv_dataframe["DT_ATUALIZACAO"])
csv_dataframe["PTU_LIMITE"] = pd.to_datetime(csv_dataframe["PTU_LIMITE"])
#print(csv_dataframe.dtypes)
csv_depara_espec = pd.read_csv(depara_nome_espec_file, sep = ',', header = None, encoding = "ISO-8859-1", engine = 'python')
#print(csv_depara_espec)
csv_dataframe = csv_dataframe.iloc[:, [0,1,2,3,4,5,6,7,8,9,10,11,12,13,16,14,15]]
#print(csv_dataframe)
dict = {'TIPO' : 'TIPO', 'CODIGO' : 'CODIGO', 'PTU': 'PTU',
'DESCRICAO' : 'DESCRICAO', 'FORNECEDOR' : 'FORNECEDOR', 'VALOR' : 'VALOR', 'COD_PRINCP_ATIVO' : 'COD_PRINCP_ATIVO',
'PRINCIPIO_ATIVO' : 'PRINCIPIO_ATIVO', 'ANVISA' : 'ANVISA', 'VALIDADE_RMS' : 'VALIDADE_RMS', 'FABRICANTE' : 'FABRICANTE',
'DT_ATUALIZACAO' : 'DT_ATUALIZACAO', 'PTU_LIMITE' : 'PTU_LIMITE', 'COD_ESP' : 'COD_ESP', '' : 'NOME_ESPEC',
'NOME_ESPEC' : 'REFERENCIA', 'REFERENCIA' : 'OBSERVACAO'}
csv_dataframe.rename(columns = dict, inplace = True)
for row in range(len(csv_dataframe)):
cod_esp_row = (csv_dataframe.iloc[row, 13])
csv_dataframe.iloc[row,14] = csv_depara_espec.iloc[cod_esp_row, 1]
pandas.io.formats.excel.header_style = None
writer = pd.ExcelWriter(template_excel_file, engine = 'xlsxwriter', date_format = 'dd/mm/yyyy', datetime_format = 'dd/mm/yyyy')
excel_dataframe = csv_dataframe.to_excel(writer, sheet_name = 'Material Alto Custo', index = False, header = ['TIPO', 'CODIGO', 'PTU',
'DESCRICAO', 'FORNECEDOR', 'VALOR', 'COD_PRINCP_ATIVO', 'PRINCIPIO_ATIVO', 'ANVISA', 'VALIDADE_RMS', 'FABRICANTE', 'DT_ATUALIZACAO',
'PTU_LIMITE', 'COD_ESP', 'NOME_ESPEC', 'REFERENCIA', 'OBSERVACAO'])
(max_row, max_col) = csv_dataframe.shape
workbook = writer.book
worksheet = writer.sheets['Material Alto Custo']
header_format = workbook.add_format({'bold' : True,
'font' : 'Arial',
'size' : 10,
'border' : 1})
font_size_and_border = workbook.add_format({'font' : 'Arial', 'size' : 10, 'border' : 1})
column_valor_format_and_border = workbook.add_format({'num_format': '[$R$-pt-BR] #,##0.00','font' : 'Arial', 'size' : 10, 'border' : 1})
column_date_format_and_border = workbook.add_format({'num_format' : 'dd/mm/yyyy','font' : 'Arial', 'size' : 10, 'border' : 1})
column_left_zeroes_format_and_border = workbook.add_format({'num_format' : '00000000','font' : 'Arial', 'size' : 10, 'border' : 1})
worksheet.set_row(0, None, header_format)
worksheet.set_column(0,max_col, 20.0, font_size_and_border)
worksheet.set_column(1, 1, 20.0, column_left_zeroes_format_and_border)
worksheet.set_column(2, 2, 20.0, column_left_zeroes_format_and_border)
worksheet.set_column(5, 5, 20.0, column_valor_format_and_border)
worksheet.set_column(9, 9, 20.0, column_date_format_and_border)
worksheet.set_column(11, 11, 20.0, column_date_format_and_border)
worksheet.set_column(12, 12, 20.0, column_date_format_and_border)
worksheet.set_row(0, None, header_format)
writer.close()
I've been trying to spot the error, but I have failed. Could someone help me out?
Thanks in advance for any help you can provide!
I have a multi sheet excel file like the one pictured below. I want to highlight with the condition:
if value 'app' in column 'Kind' matches with value 'v6.0' in column 'NetFrameworkVersion'
then highlight it yellow
if value 'functionapp' in column 'Kind' matches with value 'v4.0' in column 'NetFrameworkVersion'
then highlight it green
else highlight it blue
import pandas as pd
import pathlib
import xlsxwriter
with pd.ExcelWriter('*/environment.xlsx' , engine='xlsxwriter') as writer:
for filename in pathlib.Path('*/FD').glob('*.csv'):
df = pd.read_csv(filename)
df_custom = df.filter(['Kind', 'NetFrameworkVersion', 'Use32BitWorkerProcess', 'AlwaysOn' ]) #Select column and arrange custom
sheetname = filename.stem.split('-')[3] #Set sheet name save as short name
df_custom.style.to_excel(writer, sheet_name=sheetname,index=True, header = True) #write out file excel after read all data from csv files
#set border#
workbook = writer.book
worksheet = writer.sheets[sheetname]
border_fmt = workbook.add_format({'bottom':1, 'top':1, 'left':1, 'right':1})
worksheet.conditional_format(xlsxwriter.utility.xl_range(0, 0, len(df), len(df_custom.columns)), {'type': 'no_errors', 'format': border_fmt})
worksheet.set_column('C1:Z200', 25) #set range column width
worksheet.set_column('B:B', 35) #set 1 column width
red_format = workbook.add_format({'bg_color': '#FFC7CE',
'font_color': '#9C0006'})
worksheet.conditional_format('F1:F1000', {'type': 'text',
'criteria': 'containing',
'value': 'True',
'format': red_format})
worksheet.conditional_format('G1:G100', {'type': 'text',
'criteria': 'containing',
'value': 'False',
'format': red_format})
writer.save()
Example Picture:
Let df be the DataFrame from your picture.
Something like this should work:
import pandas as pd # Jinja2 must be installed
def styler(series):
color_attr = ''
if series['Kind'] == 'app' and series['NetFrameworkVersion'] == 'v6.0':
color_attr = 'background-color: yello'
elif series['Kind'] == 'functionapp' and series['NetFrameworkVersion'] == 'v4.0':
color_attr = 'background-color: green'
else:
color_attr = 'background-color: blue'
return pd.Series(color_attr, series.index)
left_df_p = df[['Kind', 'NetFrameworkVersion']]
left_df_p.style.apply(styler, axis=1) # iterates over every row
colored_df = left_df_p.join(df[['Use32BitWorkerProcess', 'AlwaysOn']])
Next time, please provide a Minimal, Reproducible Example, so that people can actually test the code. Instead of posting a picture, it is better to call and post the output of df.to_dict.
I have this df
df = pd.read_csv("00001234.csv")
and the head is:
file_name = "00001234.csv"
with open(file_name, "r") as csv_file:
csv_reader = reader(csv_file)
head = next(csv_reader)
print("head:")
print(", ".join(head))
print("Values:")
for row in csv_reader:
print(", ".join(row))
print(head)
which shows this result:
['Player', 'Chips Won', 'Hands', 'WWSF', 'WSD', 'Call R & WSD']
I have this Dropdown:
dropdown1_scatter_chart = dcc.Dropdown(
id="dropdown1_scatter_chart",
options=[{"value": label, "label": label} for label in df.head],
value=df.head[0],
className="six columns",
clearable=False
)
but it is wrong because of this error:
options=[{"value":label, "label":label} for label in df.head],
TypeError: 'method' object is not iterable
How can I use the header to choose in the dropdown?
I desperately need help here. I am trying to get the dimension of a dataframe. I always get 31 columns instead of 30: Value should be 30, found 31. I tried to reset_index(drop = True) but I still get the same error. any help is appreciated. Stay safe.
def read_data(dataset_id):
data = None
# Begin CODE
if dataset_id == 'breast_cancer':
disease = 'breast_cancer'
datafile = 'wdbc.data'
bc_columns = ['ptid', 'diagnosis', 'mean_radius', 'mean_texture',
'mean_perimeter', 'mean_area',
'mean_smoothness', 'mean_compactness', 'mean_concavity',
'mean_concave_pts', 'mean_symmetry ',
'mean_fractal_dim', 'std_err_radius', 'std_err_texture',
'std_err_perimeter', 'std_err_area',
'std_err_smoothness', 'std_err_compactness',
'std_err_concavity', 'std_err_concave_pts',
'std_err_symmetry ', 'std_err_fractal_dim', 'worst_radius',
'worst_texture', 'worst_perimeter',
'worst_area', 'worst_smoothness', 'worst_compactness',
'worst_concavity', 'worst_concave_pts',
'worst_symmetry ', 'worst_fractal_dim']
data = pd.read_csv(datafile, skipinitialspace=True, names=bc_columns)
data.drop(labels=['ptid'], axis=1, inplace=True)
bc_diag_class = get_class_list_dict(data['diagnosis'])
elif dataset_id == 'hyperthyroidism':
disease = 'hyperthyroidism'
datafile1 = 'allhyper.data' # tab delimited, no header
datafile2 = 'allhyper.test' # comma delimited, no header
ht_columns = ['age', 'Gender', 'on thyroxine', 'query on thyroxine', 'on
antithyroid medication', 'sick',
'pregnant', 'thyroid surgery', 'I131 treatment', 'query
hypothyroid', 'query hyperthyroid',
'lithium', 'goitre', 'tumor', 'hypopituitary', 'psych',
'TSH measured', 'TSH', 'T3 measured',
'T3', 'TT4 measured', 'TT4', 'T4U measured', 'T4U', 'FTI
measured', 'FTI', 'TBG measured', 'TBG',
'referral source', 'diag_class']
data1 = pd.read_csv(datafile1, sep='\t', skipinitialspace=True,
names=ht_columns)
data2 = pd.read_csv(datafile2, skipinitialspace=True, names=ht_columns)
data = data1.append(data2, ignore_index=True)
data = data.replace(to_replace='?', value=float('nan'))
data[['diag_class', 'ptid']] = data['diag_class'].str.split(pat='.\|',
expand=True)
diag_class = data['diag_class']
data.drop(labels=['diag_class', 'ptid'], axis=1, inplace=True)
data.insert(0, 'diag_class', diag_class)
data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI', 'TBG']] \
= data[['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI',
'TBG']].apply(pd.to_numeric)
elif dataset_id == 'cervical_cancer':
disease = 'cervical_cancer'
datafile = 'risk_factors_cervical_cancer.csv'
cc_columns = ('Age', 'Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies',
'Smokes', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps',
'Hormonal_Contraceps_years', 'IUD', 'IUD_years', 'STD',
'STD_number',
'STD_condylomatosis', 'STDscervical_condylomatosis',
'STD_vaginal_condylomatosis',
'STD_vulvo_perin_condylomatosis', 'STD_syphilis',
'STD_pelvic_inflam_disease',
'STD_genital_herpes', 'STD_molluscum_contagiosum',
'STD_AIDS', 'STD_HIV', 'STD_HepB',
'STD_HPV', 'STD_Num_diagnosis',
'STD_Time_since_first_diag', 'STDs_Time_since_last_diag',
'Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann', 'Schiller',
'Citology', 'Biopsy')
data = pd.read_csv(datafile, skipinitialspace=True)
data.columns = cc_columns
data = data.replace(to_replace='?', value=float('nan'))
biopsy_class = data['Biopsy']
data.drop(labels=['Dx_Cancer', 'Dx_CIN', 'Dx_HPV', 'Dx', 'Hinselmann',
'Schiller', 'Citology', 'Biopsy'],
axis=1, inplace=True)
data.insert(0, 'Biopsy', biopsy_class)
data[['Num_sex_partners', 'First_sex_intercourse', 'Num_pregnancies',
'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']] \
= data[['Num_sex_partners', 'First_sex_intercourse',
'Num_pregnancies', 'Smokes_years', 'Smokes_packs_year',
'Hormonal_Contraceps_years', 'IUD_years',
'STD_number', 'STD_Time_since_first_diag',
'STDs_Time_since_last_diag']].apply(pd.to_numeric)
elif dataset_id == 'liver_cancer':
disease = 'liver_cancer'
datafile = 'Indian Liver Patient Dataset (ILPD).csv' # comma delimited,
no header
ld_columns = ['Age', 'Gender', 'TB', 'DB', 'Alkphos', 'Sgpt', 'Sgot',
'TP', 'ALB', 'A/G Ratio', 'Selector']
data = pd.read_csv(datafile, skipinitialspace=True, names=ld_columns)
data.loc[data['Gender'] == 'Male', 'Gender'] = 'M'
data.loc[data['Gender'] == 'Female', 'Gender'] = 'F'
selector_class = data['Selector']
data.drop(labels=['Selector'], axis=1, inplace=True)
data.insert(0, 'Selector', selector_class)
data.reset_index(drop=True, inplace=True)
# End CODE
print(data.head(20))
return data
def dimensions(dataset_id, dataset):
dim = None
# dim = dataset.shape
num_inst = len(dataset)
num_feat = len(dataset.iloc[0].reset_index())
dim = (num_inst, num_feat)
return dim
If you want to drop a column from DataFrame, You can do like this.
If you want to drop single column:
df.drop(['column_name'], axis = 1)
If you want to drop multiple columns:
df.drop(['Column1', 'Column2'], axis = 1)
If you want to drop based on some other condition instead of column name. You can comment below. I'll update the answer accordingly. Hope it helps!.
After working on this for far to long.
How do I set the format of a cell that I either have written a formula to or will be writing a formula to?
Every other write_(), except write_formula(), includes a format parameter.
for example:
ws.write_number(1,1,quantity, fmt)
ws.write_number(1,2,price, fmt)
# ws.write_formula("C1","=A1*B1",fmt) <-- doesn't exists
ws.write_formula("C1","=A1*B1")
This works:
extendedprice = (quantity*price)
ws.write_formula("C1", "=A1*B1", extendedprice, fmt)
I even figured out I can:
ws.write_number(1,1,quantity, fmt)
if (<price has an error>):
ws.write_number(1,2,"n/a",fmt)
ws.write_formula("C1", "=A1*B1", "n/a", fmt)
else:
ws.write_number(1,2,price,fmt)
ws.write_formula("C1", "=A1*B1", (quantity*price), fmt)
A format can be applied to a formula with XlsxWriter in the same way as any other data type:
import xlsxwriter
workbook = xlsxwriter.Workbook('test.xlsx')
worksheet = workbook.add_worksheet()
my_format = workbook.add_format({'bold': True, 'color': 'red'})
worksheet.write(0, 0, 'Hello', my_format)
worksheet.write(1, 0, 123, my_format)
worksheet.write(2, 0, '=1+1', my_format)
worksheet.write('A4', 'World', my_format)
worksheet.write('A5', 456, my_format)
worksheet.write('A6', '=2+2', my_format)
workbook.close()
Output: