Python / Excel Automation - Text from 2 cells into 1 cell

Python / Excel Automation - Text from 2 cells into 1 cell - python

I'm using python to automate the process of creating multiple name tags using an excel sheet list.
My problem is that I need to take the 'name' column and 'enterprise' column values and put them in a single cell of a new document.
Like this:
To this:
Right now I'm using openpyxl and although I manage to transfer one of the columns I can't do it with both.
The code below was one of the things that I've tried.
import openpyxl as xl
e = xl.load_workbook('etiquetas.xlsx')
eplan = e['Planilha1']
c = xl.load_workbook('Crachá Relação 15.10.19.xlsx')
cplan = c['Plan1']
maxlinhas = cplan.max_row
for i in range (2, maxlinhas+1):
nome = cplan.cell(row = i, column = 1).value
preenchernome = eplan.cell(row = i-1, column = 1)
empresa = cplan.cell(row=i, column=2).value
preencherempresa = eplan.cell(row=i - 1, column=1)
preenchernome.value = nome, empresa
e.save('teste.xlsx')
But this code returns the following error:
ValueError: Cannot convert ('Gladisson Garcia Westphal', 'Agro Divel') to Excel

As per the docs preenchernome.value can only have one value
try to use this
preenchernome.value = '{}\n{}'.format(nome, empresa)

The value, that is passed on the target cell should be a single string. Thus:
wksTarget.cell(row = i, column = 1).value = '{}\n{}'.format(name, family)
should be ok. This is the whole code, that worked for me:
import openpyxl as xl
import os
wbSource = xl.load_workbook(os.path.dirname(os.path.realpath(__file__)) + '\myExcel.xlsx')
wksSourse = wbSource['Sheet1']
wbTarget = xl.load_workbook(os.path.dirname(os.path.realpath(__file__)) + '\Target.xlsx')
wksTarget = wbTarget['Sheet1']
for i in range (1, wksSourse.max_row+1):
name = wksSourse.cell(row = i, column = 1).value
family = wksSourse.cell(row = i, column = 2).value
wksTarget.cell(row = i, column = 1).value = '{}\n{}'.format(name, family)
wbTarget.save(os.path.dirname(os.path.realpath(__file__)) + '\Target.xlsx')
wbTarget.close()

Related

Why openpyxl is hidding some rows?

I'm having a little trouble with openpyxl. I'm updating an excel with historic data something like this:
The excel has 2 sheets with this data, both are almost the same. The thing is that after I add the new rows and aply the formats, in the second sheet for some reason a lot of row are hidden, like this:
This is my code:
from copy import copy
import pandas as pd
from openpyxl import load_workbook
datos = pd.Dataframe(my_data) # This is not right but I already have the data in a DF
today = datos[col1].loc[0].strftime("%Y-%m-%d")
tomorrow = datos[col1].loc[1].strftime("%Y-%m-%d")
historic = 'my_excel.xlsx'
logger.info('Reading Excel')
wb = load_workbook(historic)
sheets = wb.sheetnames
for sheet in sheets:
logger.warning(f'************* {sheet} *************')
ws = wb[sheet]
logger.info(f'active: {wb.active}')
logger.info(f"len D: {len(ws['D'])}")
logger.info(f"len E: {len(ws['E'])}")
max_row = len(ws['D'])
logger.info('Last D cell')
last_D_cell = ws.cell(row=max_row, column=4)
last_D_cell_value = last_D_cell.value.strftime("%d/%m/%Y")
logger.info('New Cell')
new_D_cell = ws.cell(row=max_row + 1, column=4)
new_D_cell.font = copy(last_D_cell.font)
new_D_cell.border = copy(last_D_cell.border)
new_D_cell.number_format = copy(last_D_cell.number_format)
new_D_cell.alignment = copy(last_D_cell.alignment)
new_D_cell.fill = copy(last_D_cell.fill)
new_D_cell.value = datos[col1].loc[1].strftime("%d/%m/%Y")
logger.info('Penultimate D Cell')
penultimate_D_cell = ws.cell(row=max_row - 1, column=4)
last_D_cell.font = copy(penultimate_D_cell.font)
last_D_cell.border = copy(penultimate_D_cell.border)
last_D_cell.number_format = copy(penultimate_D_cell.number_format)
last_D_cell.alignment = copy(penultimate_D_cell.alignment)
last_D_cell.fill = copy(penultimate_D_cell.fill)
logger.info('Last E Cell')
last_E_cell = ws[f'E{max_row}']
new_E_cell = ws.cell(row=max_row + 1, column=5)
new_E_cell.font = copy(last_E_cell.font)
new_E_cell.border = copy(last_E_cell.border)
new_E_cell.number_format = copy(last_E_cell.number_format)
new_E_cell.alignment = copy(last_E_cell.alignment)
new_E_cell.fill = copy(last_E_cell.fill)
new_E_cell.value = tomorrow_value
logger.info('Penultimate E')
penultimate_E_cell = ws[f'E{max_row - 1}']
last_E_cell.font = copy(penultimate_E_cell.font)
last_E_cell.border = copy(penultimate_E_cell.border)
last_E_cell.number_format = copy(penultimate_E_cell.number_format)
last_E_cell.alignment = copy(penultimate_E_cell.alignment)
last_E_cell.fill = copy(penultimate_E_cell.fill)
logger.info('SAving Excel')
wb.save(historic)
With this code, the last sheet it works will have the hidden rows, and I don't know why is this happening.
Hope someone can help me thanks
EDIT: I'm on Ubuntu 20.4 LTS, and the resulting files has been opened in both Ubuntu and Windows 10 and the same situation appears.

How can I collapse or expand a pivot table in python using win32com

I'm creating a pivot table using the win32com.client module.
I'm managing to create it exactly as I want,
but the finishing touches that I need is to collapse the entire pivot table.
any suggestions?
This is my Code:
**
def insert_pt_field_set(pt):
field_rows = {}
field_rows['Period'] = pt.PivotFields('Period')
field_rows['Name'] = pt.PivotFields('Name')
field_values = {}
field_values['Name'] = pt.PivotFields("Name")
field_rows['Period'].Orientation = 1
field_rows['Period'].Position = 1
field_rows['Name'].Orientation = 1
field_rows['Name'].Position = 2
field_values['Name'].Orientation = 4
field_values['Name'].Function = -4112
xlApp = win32.Dispatch('Excel.Application')
xlApp.Visible = True
wb = xlApp.Workbooks.Open('output.xlsx')
ws = wb.Worksheets('Sheet1')
ws_pivot = wb.Worksheets('Pivot_Sheet1')
pt_cache = wb.PivotCaches().Create(1, ws.Range("A1").CurrentRegion)
pt = pt_cache.CreatePivotTable(ws_pivot.Range("B3"), "PivotName")
pt.ColumnGrand = True
pt.RowGrand = False
pt.SubtotalLocation(2)
pt.RowAxisLayout(2)
pt.TableStyle2 = "PivotStyleMedium2"
# Entering the function that arrange the fields in the pivot table
insert_pt_field_set(pt)
wb.Close(True)
**

Working in Excel 365
If you need to roll over the field:
ws.PivotTables("PivotName").PivotFields("SomeFiled").DrillTo("SomeFiled")
Or
ws.PivotTables("PivotName").PivotFields("SomeFiled").ShowDetail = False
If you just want to hide the fields:
ws.Columns('A:B').OutlineLevel = 2
ws.Columns('A:B').EntireColumn.Hidden = True
Similar question
VBA Pivot Table Collapse all fields
It says it's better to use "DrillTo". maybe
More information here
https://learn.microsoft.com/en-us/office/vba/api/excel.pivotfield.drillto

Is there a way to store output dataframes and appending them to the last output in the same dataframe

I am trying to fetch data from API for 50 parcels. I want them to be in a single data frame. While running this loop the data frame is storing only the last parcel which is satisfying the loop condition. Is there any way to store all the previous outputs also in the same dataframe.
For e.g upon running this code it only returns the data frame for foreign id=50, I want the dataframe for all 1-50.
import requests
import pandas as pd
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
dfinal_p6

The issue is resolved by first creating an empty data frame and then appending the outputs in the dataframe within the loop.
The updated code is as follows:
column_names = ["parcel_foreign_id_x", "s1product_end_time", "s1product_ron","cohvh_avg", "cohvv_avg", "vhvv_avg","parcel_foreign_id_y", "s2product_start_time", "s2product_ron", "ndvi_avg" ]
df = pd.DataFrame(columns = column_names)
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
df = pd.concat([dfinal_p6,df],ignore_index = True)
foreign = foreign+1

Pandas Dataframe Only Returning first Row of JSON Data

I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')

When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])

Can't create a new sheet in GoogleColab and make calculcation

I have an xlsx file on my computer with initial data (value) - two sheets were created. I decide to calculate some value in sheet1 and in sheet2. After this I try to save the result, but when I download a file from GoogleColab and get my start file with initial values.
And I don't know how to creat a new Sheet3 and make this calcuation in it: copy one column from Sheet1 and another column Sheet2 and make P_roz = P_nav + P-osvit the result in a new column in Sheet3.
How to download the xlsx file with the result in picture 2 and picture 4?
from google.colab import drive
drive.mount('/content/gdrive')
!pip install -q xlrd
import pandas as pd
import numpy as np
# Works with Sheet1
df_1 = pd.read_excel('my_path', name_sheet = 'Sheet1')
df_1['T_potyz'] = round((((1 / df_1['K_potyz']**2) - 1))**(1/2.0),3)
df_1['P_nav, кВт'] = df_1['P_ust, кВт']\
* df_1['K_poputy1']
df_1['Q_nav, кВАр'] = round(df_1['P_ust, кВт']\
* df_1['T_potyz'],3)
df_1['S_nav, кВА'] = round((df_1['P_nav, кВт']**2\
+ df_1['Q_nav, кВАр']**2)\
**(1/2.0),3)
df_1['P_nav, кВт'].sum(), df_1['Q_nav, кВАр'].sum(), df_1['S_nav, кВА'].sum()
sum_row1 = df_1[['P_nav, кВт', 'Q_nav, кВАр', 'S_nav, кВА']].sum()
sum_row1
# transpose the data and convert the Series to a DataFrame so that it is easier to concat onto our existing data.
#The T function allows us to switch the data from being row-based to column-based.
df_sum1 = pd.DataFrame(data = sum_row1).T
df_sum1
# to add the missing columns.
df_sum1 = df_sum1.reindex(columns = df_1.columns)
df_sum1
# add it to our existing one using append.
df_final_1 = df_1.append(df_sum1, ignore_index = True)
df_final_1.tail()
# Works with Sheet2
df_2 = pd.read_excel('my_path', sheet_name='Sheet2')
K_po = float(input("Коефіцієнт попиту загально освітлення: "))
df_2['P_ust'] = (round(df_2['k'] * df_2['P_put'] * 10**-3 *
df_2['A, м'] * df_2['B, м'],3))
df_2['P_osvit'] = round(K_po * df_2['P_ust'],3)
df_2['T_potyz2'] = round((((1 / df_2['K_poputy2']**2) - 1))**(1/2.0),3)
df_2['Q_osvit'] = round(df_2['P_osvit'] * df_2['T_potyz2'],3)
df_2['S_osvit'] = round((df_2['P_osvit']**2\
+ df_2['Q_osvit']**2)\
**(1/2.0),3)
df_2['P_osvit'].sum(), df_2['Q_osvit'].sum(), df_2['S_osvit'].sum()
sum_row2 = df_2[['P_osvit', 'Q_osvit', 'S_osvit']].sum()
sum_row2
df_sum2 = pd.DataFrame(data = sum_row2).T
df_sum2
df_sum2 = df_sum2.reindex(columns = df_2.columns)
df_sum2
df_final2 = df_2.append(df_sum2, ignore_index = True)
df_final2.tail()
# Here I want create a new Sheet3 and make some calculation
P_roz = df_1['P_nav, кВт'] + df_2['P_osvit']
Q_roz = df_1['Q_nav, кВАр'] + df_2['Q_osvit']
S_roz = (P_roz**2 + Q_roz**2)**(1 / 2.0)
frame_data3 = {'P_roz' : [P_roz], 'Q_roz' : [Q_roz], 'S_roz' : [S_roz]}
df_4 = pd.DataFrame(frame_data3)
df_4
Sheet 1 Sheet 1 + sum Sheet 2 Sheet 2 + sum

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Python / Excel Automation - Text from 2 cells into 1 cell - python

As per the docs preenchernome.value can only have one value try to use this preenchernome.value = '{}\n{}'.format(nome, empresa)

Related

Why openpyxl is hidding some rows?

How can I collapse or expand a pivot table in python using win32com

Is there a way to store output dataframes and appending them to the last output in the same dataframe

Pandas Dataframe Only Returning first Row of JSON Data

Can't create a new sheet in GoogleColab and make calculcation

Categories

Resources