I am currently creating adfv2 delete activity with python using below code
My python code looks like below
#Create a delete activity
blob_ls_name = 'AzureBlobLS'
ds_name = 'Dataset_Test'
dataset_name = DatasetReference(reference_name= ds_name)
ds_ls = LinkedServiceReference(reference_name=blob_ls_name)
logsettings = LogStorageSettings(linked_service_name = ds_ls, path = 'mycontainer/path')
act_name = 'CleanUp_Dest_Folder'
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, log_storage_settings = logsettings)
Even after creating logsettings, when the delete activity is created the log settings are not properly applied in the activity and it is selecting nothing.
I can reproduce your issue, you missed the enable_logging = True in the last line, add it like below, then it will work fine.
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, enable_logging = True, log_storage_settings = logsettings )
My test sample:
It creates a pipeline with a delete activity.
from azure.common.credentials import ServicePrincipalCredentials
from azure.mgmt.datafactory import DataFactoryManagementClient
from azure.mgmt.datafactory.models import *
subscription_id = 'xxxxx'
credentials = ServicePrincipalCredentials(client_id='xxxx', secret='xxxx', tenant='xxx')
adf_client = DataFactoryManagementClient(credentials, subscription_id)
blob_ls_name = 'AzureBlobStorage2'
ds_name = 'Binary1'
dataset_name = DatasetReference(reference_name= ds_name)
ds_ls = LinkedServiceReference(reference_name=blob_ls_name)
logsettings = LogStorageSettings(linked_service_name = ds_ls, path = 'test/d2')
act_name = 'CleanUp_Dest_Folder'
act_delete = DeleteActivity(name = act_name , dataset = dataset_name, enable_logging = True, log_storage_settings = logsettings )
rg_name = 'xxxx'
df_name = 'joyfactory'
p_name = 'Pipeline1234'
params_for_pipeline = {}
p_obj = PipelineResource(
activities=[act_delete], parameters=params_for_pipeline)
p = adf_client.pipelines.create_or_update(rg_name, df_name, p_name, p_obj)
print(p.activities[0].log_storage_settings.linked_service_name)
Check in the portal:
Related
I want to change the voice of azure from python, with these characteristics
languageCode = 'es‑MX'
ssmlGender = 'FEMALE'
voicName = 'es‑MX‑DaliaNeural'
but i'm new to azure so i don't know how, this is my code:
import PyPDF2
import azure.cognitiveservices.speech as sdk
key = "fake key"
region = "fake region"
config = sdk.SpeechConfig(subscription=key, region=region)
synthesizer = sdk.SpeechSynthesizer(speech_config=config)
book = open("prueba.pdf", "rb")
reader = PyPDF2.PdfFileReader(book)
for num in range(0,reader.numPages):
text = reader.getPage(num).extractText()
result = synthesizer.speak_text_async(text).get()
Acording to the documentation https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-speech-synthesis?tabs=browserjs%2Cterminal&pivots=programming-language-python#select-synthesis-language-and-voice you should be able to do:
config.speech_synthesis_language = "es‑MX"
config.speech_synthesis_voice_name ="es-MX-DaliaNeural"
The list of voices is here https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=tts
I have a python file called 'clean_data.py' which has all the data frames I need, and I want to import them for use in another python file called 'main.py' to use in creating a dashboard.
Is it possible to create a class in my clean_data.py, and if so can someone direct me to an article (which I struggled to find so far) so that I can figure it out?
The aim is to shift from CSV to an API overtime, so I wanted to keep data side wrangling side of things in a different file while the web app components in the main.py file.
Any help would be much appreciated.
The code from the clean_data.py is:
import pandas as pd
import csv
import os # To access my file directory
print(os.getcwd()) # Let's me know the Current Work Directory
fdi_data = pd.read_csv(r'Data/fdi_data.csv')
fdi_meta = pd.read_csv(r'Data/fdi_metadata.csv')
debt_data = pd.read_csv(r'Data/debt_data.csv')
debt_meta = pd.read_csv(r'Data/debt_metadata.csv')
gdp_percap_data = pd.read_csv(r'Data/gdp_percap_data.csv', header=2)
gdp_percap_meta = pd.read_csv(r'Data/gdp_percap_metadata.csv')
gov_exp_data = pd.read_csv(r'Data/gov_exp_data.csv', header=2)
gov_exp_meta = pd.read_csv(r'Data/gov_exp_metadata.csv')
pop_data = pd.read_csv(r'Data/pop_data.csv', header=2)
pop_meta = pd.read_csv(r'Data/pop_metadata.csv')
"""
'wb' stands for World Bank
"""
def wb_merge_data(data, metadata):
merge = pd.merge(
data,
metadata,
on = 'Country Code',
how = 'inner'
)
return merge
fdi_merge = wb_merge_data(fdi_data, fdi_meta)
debt_merge = wb_merge_data(debt_data, debt_meta)
gdp_percap_merge = wb_merge_data(gdp_percap_data, gdp_percap_meta)
gov_exp_merge = wb_merge_data(gov_exp_data, gov_exp_meta)
pop_merge = wb_merge_data(pop_data, pop_meta)
def wb_drop_data(data):
drop = data.drop(['Country Code','Indicator Name','Indicator Code','TableName','SpecialNotes','Unnamed: 5'], axis=1)
return drop
fdi_merge = wb_drop_data(fdi_merge)
debt_merge = wb_drop_data(debt_merge)
gdp_percap_merge = wb_drop_data(gdp_percap_merge)
gov_exp_merge = wb_drop_data(gov_exp_merge)
pop_merge = wb_drop_data(pop_merge)
def wb_mr_data(data, value_name):
data = data.melt(['Country Name','Region','IncomeGroup']).reset_index()
data = data.rename(columns={'variable': 'Year', 'value': value_name})
data = data.drop('index', axis = 1)
return data
fdi_merge = wb_mr_data(fdi_merge, 'FDI')
debt_merge = wb_mr_data(debt_merge, 'Debt')
gdp_percap_merge = wb_mr_data(gdp_percap_merge, 'GDP per Cap')
gov_exp_merge = wb_mr_data(gov_exp_merge, 'Gov Expend.')
pop_merge = wb_mr_data(pop_merge, 'Population')
def avg_groupby(data, col_cal, cn=False, ig=False, rg=False):
if cn == True:
return data.groupby('Country Name')[col_cal].mean().reset_index()
elif ig == True:
return data.groupby('IncomeGroup')[col_cal].mean().reset_index()
elif rg == True:
return data.groupby('Region')[col_cal].mean().reset_index()
"""
avg_cn_... For country
avg_ig_... Income Group
avg_rg_... Region
"""
avg_cn_fdi = avg_groupby(fdi_merge, 'FDI', cn=True)
avg_ig_fdi = avg_groupby(fdi_merge, 'FDI', ig=True)
avg_rg_fdi = avg_groupby(fdi_merge, 'FDI', rg=True)
avg_cn_debt = avg_groupby(debt_merge, 'Debt', cn=True)
avg_ig_debt = avg_groupby(debt_merge, 'Debt', ig=True)
avg_rg_debt = avg_groupby(debt_merge, 'Debt', rg=True)
avg_cn_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', cn=True)
avg_ig_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', ig=True)
avg_rg_gdp_percap = avg_groupby(gdp_percap_merge, 'GDP per Cap', rg=True)
avg_cn_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', cn=True)
avg_ig_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', ig=True)
avg_rg_gexp = avg_groupby(gov_exp_merge, 'Gov Expend.', rg=True)
avg_cn_pop = avg_groupby(pop_merge, 'Population', cn=True)
avg_ig_pop = avg_groupby(pop_merge, 'Population', ig=True)
avg_rg_pop = avg_groupby(pop_merge, 'Population', rg=True)
In Python, every file is a module. So if you want to re-use your code, you can simple import this module. For example,
# main.py
import clean_data
print(clean_data.avg_cn_fdi)
Maybe you needn't create a class for this
You can import the whole python file like you'd import any other locally created files and have access to the DataFrames in them. Here's an example:
I created a file called temporary.py:
import pandas as pd
data = pd.read_csv("temp.csv")
And then in a separate file I was able to use data like so:
import temporary
print(temporary.data)
Or, you could also do:
from temporary import data
print(data)
All that being said, I don't believe that this would be the best way to handle your data.
i am trying to use python's package for influxdb to upload dataframe into the database
i am using the write_points class to write point into the database as given in the documentation(https://influxdb-python.readthedocs.io/en/latest/api-documentation.html)
every time i try to use the class it only updates the last line of the dataframe instead of the complete dataframe.
is this a usual behavior or there is some problem here?
given below is my script:
from influxdb import InfluxDBClient, DataFrameClient
import pathlib
import numpy as np
import pandas as pd
import datetime
db_client = DataFrameClient('dbserver', port, 'username', 'password', 'database',
ssl=True, verify_ssl=True)
today = datetime.datetime.now().strftime('%Y%m%d')
path = pathlib.Path('/dir1/dir/2').glob(f'pattern_to_match*/{today}.filename.csv')
for file in path:
order_start = pd.read_csv(f'{file}')
if not order_start.empty:
order_start['data_line1'] = (order_start['col1'] - \
order_start['col2'])*1000
order_start['data_line2'] = (order_start['col3'] - \
order_start['col4'])*1000
d1 = round(order_start['data_line1'].quantile(np.arange(0,1.1,0.1)), 3)
d2 = round(order_start['data_line2'].quantile(np.arange(0,1.1,0.1)), 3)
out_file = pd.DataFrame()
out_file = out_file.append(d1)
out_file = out_file.append(d2)
out_file = out_file.T
out_file.index = out_file.index.set_names(['percentile'])
out_file = out_file.reset_index()
out_file['percentile'] = out_file.percentile.apply(lambda x: f'{100*x:.0f}%')
out_file['tag_col'] = str(file).split('/')[2]
out_file['time'] = pd.to_datetime('today').strftime('%Y%m%d')
out_file = out_file.set_index('time')
out_file.index = pd.to_datetime(out_file.index)
db_client.write_points(out_file, 'measurement', database='database',
retention_policy='rp')
can anyone please help?
Could anyone please help me in setting a title to a powerpoint slide using win32com library in Python. The following is the code. I have used the slide layout 11 which denotes Title only
import openpyxl as op
import pptx
import os
import win32com.client
import smtplib
os.chdir(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT')
path= r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx'
wb = op.load_workbook(path)
ExcelApp = win32com.client.Dispatch("Excel.Application")
ExcelApp.Visible = False
workbook = ExcelApp.Workbooks.open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\Summary2.xlsx')
worksheet = workbook.Worksheets("Summary")
excelrange = worksheet.Range("A2:R24")
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
#pptSlide.Title.Characters.Text ='Metrics'
#title = pptSlide.Shapes.Title
#title.Text ='Metrics Summary'
pptSlide.Shapes.PasteSpecial(z)
PPtPresentation.Save()
Just a small syntax issue:
PptApp = win32com.client.Dispatch("Powerpoint.Application")
PptApp.Visible = True
z= excelrange.Copy()
PPtPresentation = PptApp.Presentations.Open(r'C:\Users\aju.mathew.thomas\Desktop\PBC\Pepsi\PBC\Performance Reports\2019\PPT\PBC Performance Update.pptx')
pptSlide = PPtPresentation.Slides.Add(1,11)
title = pptSlide.Shapes.Title
title.TextFrame.TextRange.Text = 'My title here'
I have a Django Rest Framework view that generates python-docx objects, and I want to save such files in a model. I read in the documentation of python-docx that you can specify a "file-like" object to save the object, but I don't understand what it means.
This is my view:
class RoomingWordView(viewsets.ViewSet):
def list(self, request, *args, **kwargs):
... some code
documents_to_return = self.get_hotel_document(start_date, end_date, confirmed, hotel, bookings)
return Response(documents_to_return)
def get_hotel_document(self, start_date, end_date, confirmed, hotel, bookings):
from django.core.files import File
from docx import Document
from docx.shared import Inches, Pt
document = Document()
section = document.sections[-1]
section.left_margin = Inches(0.5)
section.right_margin = Inches(0.5)
style = document.styles['Normal']
font = style.font
font.name ='Arial'
font.size = Pt(10)
document.add_heading("MY COMPANY")
if confirmed:
document.add_paragraph("ROOMING LIST DEL 01-12-2018 AL 31-01-2019 INCLUYE RESERVAS CONFIRMADAS")
else:
document.add_paragraph("ROOMING LIST DEL 01-12-2018 AL 31-01-2019")
document.add_paragraph("Hotel: {}".format(hotel))
table = document.add_table(rows=len(bookings), cols=10)
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Booking'
hdr_cells[1].text = 'Reservado a'
hdr_cells[2].text = '# Pax'
hdr_cells[3].text = 'Agencia'
hdr_cells[4].text = 'Habs'
hdr_cells[5].text = 'Hab./Plan'
hdr_cells[6].text = 'Entrada'
hdr_cells[7].text = 'Salida'
hdr_cells[8].text = 'Confirmación'
hdr_cells[9].text = 'Producción'
for cell in table.rows[0].cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
run.underline = True
for booking in bookings['bookings']:
row_cells = table.add_row().cells
row_cells[0].text = booking['booking']
row_cells[1].text = "\n".join(booking['people'])
row_cells[2].text = booking['pax']
row_cells[3].text = booking['agency']
row_cells[4].text = booking['rooms']
row_cells[5].text = "{}\n{}".format(booking['room_type'], booking['plan_type'])
row_cells[6].text = booking['check_in']
row_cells[7].text = booking['check_out']
row_cells[8].text = booking['confirmation']
row_cells[9].text = str(booking['production'])
for row in table.rows:
for cell in row.cells:
paragraphs = cell.paragraphs
for paragraph in paragraphs:
for run in paragraph.runs:
font = run.font
font.size = Pt(8)
file_object = "rooming_reports/Rooming {} {}-{}.docx".format(hotel, start_date, end_date)
document.save(file_object)
return file_object
I want the view to save the resulting document object in a model called RoomingWordDocument, instead of saving it as a file, but I don't know how to assign document variable to RoomingWordDocument.file field.
Assuming you have a model as
class RoomingWordDocument(models.Model):
doc = models.FileField(null=True)
name = models.CharField(max_length=50)
Then try this snippet
class RoomingWordView(viewsets.ViewSet):
def list(self, request, *args, **kwargs):
# your list method
# return Response(documents_to_return)
def get_hotel_document(self, start_date, end_date, confirmed, hotel, bookings):
# your code
# ......
file_object = "rooming_reports/Rooming {} {}-{}.docx".format(hotel, start_date, end_date)
document.save(file_object)
# here is the new snippet
# found a simple way now
model_object = RoomingWordDocument.objects.create(name="somename")
model_object.doc.name = file_object
model_object.save() # this is importent
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
from django.core.files import File
fp = open(file_object, 'rb')
model_object = RoomingWordDocument.objects.create(name="somename")
model_object.doc.save(file_object, File(fp))
return Response("some response")
Reference
1. StringIo in Python2.X and 3.X
2. How to create a file and save it to a model's FileField?
3. Set Django's FileField to an existing file