Facing trouble creating a function to store the response in columns,
Like passing the city name and as the response getting details of max_temp, min_temp, Pressure. which I want to store that in the new column.
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
api_key = {key from openweather(free)}
mgr = owm.weather_manager()
data =[]
def get_weather(city):
observation = mgr.weather_at_place(city)
l = observation.weather
Wind_Speed = l.wind()['speed']
Temp = l.temperature('celsius')['temp']
Max_temp = l.temperature('celsius')['temp_max']
Min_temp = l.temperature('celsius')['temp_min']
#Heat_index = l.heat_index
Humidity = l.humidity
Pressure = l.pressure['press']
weather = {"City": city,"Wind_Speed" : Wind_Speed, "Temp":
Temp,"Max_temp":Max_temp, "Min_temp":Min_temp, "Humidity":Humidity,
"Pressure":Pressure}
return weather
for city in df2['City']:
get_weather(city)
df = df.append(data, True)
Want to add each weather details as column based on city name
Want to create one function which stores all the details in columns,
don't want to create separate functions.
Data Frame is like:
You can return a dictionary from your fucntion.
def get_weather(city):
observation = mgr.weather_at_place(city)
l = observation.weather
Wind_Speed = l.wind()['speed']
Temp = l.temperature('celsius')['temp']
Max_temp = l.temperature('celsius')['temp_max']
resp=dict()
resp['Wind_Speed ']=Wind_Speed
resp['Temp']=Temp
resp['Max_temp']=Max_temp
return resp
df["Wind_speed"] = df["city"].apply(lambda x: get_weather(x)['Wind_Speed'])
df["Temp"] = df["city"].apply(lambda x: get_weather(x)['Temp'])
df["Max_temp"] = df["city"].apply(lambda x: get_weather(x)['Max_temp'])
Related
I am trying to fetch data from API for 50 parcels. I want them to be in a single data frame. While running this loop the data frame is storing only the last parcel which is satisfying the loop condition. Is there any way to store all the previous outputs also in the same dataframe.
For e.g upon running this code it only returns the data frame for foreign id=50, I want the dataframe for all 1-50.
import requests
import pandas as pd
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
dfinal_p6
The issue is resolved by first creating an empty data frame and then appending the outputs in the dataframe within the loop.
The updated code is as follows:
column_names = ["parcel_foreign_id_x", "s1product_end_time", "s1product_ron","cohvh_avg", "cohvv_avg", "vhvv_avg","parcel_foreign_id_y", "s2product_start_time", "s2product_ron", "ndvi_avg" ]
df = pd.DataFrame(columns = column_names)
foreign=1
while (foreign <=50):
s1_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s1?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs1product_end_time%2Cs1product_ron%2Ccohvh_avg%2Ccohvv_avg%2Cvhvv_avg'
s2_time_series_url_p6 = 'https://demodev2.kappazeta.ee/ard_api_demo/v1/time_series/s2?limit_to_rasters=true&parcel_foreign_id=0&properties=parcel_foreign_id%2Cs2product_start_time%2Cs2product_ron%2Cndvi_avg'
position = 101
foreign_n=str(foreign)
s1_time_series_url_p6 = s1_time_series_url_p6[:position] + foreign_n + s1_time_series_url_p6[position+1:]
s2_time_series_url_p6 = s2_time_series_url_p6[:position] + foreign_n + s2_time_series_url_p6[position+1:]
r_s1_time_series_p6 = requests.get(s1_time_series_url_p6)
r_s2_time_series_p6 = requests.get(s2_time_series_url_p6)
json_s1_time_series_p6 = r_s1_time_series_p6.json()
json_s2_time_series_p6 = r_s2_time_series_p6.json()
df_s1_time_series_p6 = pd.DataFrame(json_s1_time_series_p6['s1_time_series'])
df_s2_time_series_p6 = pd.DataFrame(json_s2_time_series_p6['s2_time_series'])
df_s2_time_series_p6.s2product_start_time=df_s2_time_series_p6.s2product_start_time.str[0:11]
df_s1_time_series_p6.s1product_end_time=df_s1_time_series_p6.s1product_end_time.str[0:11]
dfinal_p6 = df_s1_time_series_p6.merge(df_s2_time_series_p6, how='inner', left_on='s1product_end_time', right_on='s2product_start_time')
cols_p6 = ['parcel_foreign_id_x', 's1product_ron','parcel_foreign_id_y','s2product_ron']
dfinal_p6[cols_p6] = dfinal_p6[cols_p6].apply(pd.to_numeric, errors='coerce', axis=1)
df = pd.concat([dfinal_p6,df],ignore_index = True)
foreign = foreign+1
I am trying to convert excel database into python.
I have a trading data which I need to import into the system in xml format.
my code is following:
df = pd.read_excel("C:/Users/junag/Documents/XML/Portfolio2.xlsx", sheet_name="Sheet1", dtype=object)
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
tree = ET.ElementTree(root)
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
for row in df.itertuples():
Portfolio = ET.SubElement(Portfolios, "Portfolio", Name=row.Name, BaseCurrency=row.BaseCurrency2, TradingPower=str(row.TradingPower),
ValidationProfile=row.ValidationProfile, CommissionProfile=row.CommissionProfile)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume))
Cash = ET.SubElement(PortfolioPosition, 'Cash', Currency=str(row.Currency))
else:
PortfolioPosition = ET.SubElement(PortfolioPositions, "PortfolioPosition", Type=row.Type, Volume=str(row.Volume),
Invested=str(row.Invested), BaseInvested=str(row.BaseInvested))
Instrument = ET.SubElement(PortfolioPosition, 'Instrument', Ticker=str(row.Ticker), ISIN=str(row.ISIN), Market=str(row.Market),
Currency=str(row.Currency2), CFI=str(row.CFI))
ET.indent(tree, space="\t", level=0)
tree.write("Portfolios_converted2.xml", encoding="utf-8")
The output looks like this:
enter image description here
While I need it to look like this:
enter image description here
How can I improve my code to make the output xml look better? please advise
here the excel data:
Since you need a single <Portfolio> and <PortfolioPositions> as parent grouping, consider a nested loop by iterating through a list of data frames. Then, within each data frame loop through its rows:
import xml.etree.ElementTree as ET
import pandas as pd
import xml.dom.minidom as md
df = pd.read_excel("Input.xlsx", sheet_name="Sheet1", dtype=object)
# LIST OF DATA FRAME SPLITS
df_list = [g for i,g in df.groupby(
["Name", "BaseCurrency2", "TradingPower", "ValidationProfile", "CommissionProfile"]
)]
# ROOT LEVEL
root = ET.Element('trading-data')
root.set('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance')
# ROOT CHILD LEVEL
Portfolios = ET.SubElement(root, "Portfolios")
Defaults = ET.SubElement(Portfolios, "Defaults", BaseCurrency="USD")
# GROUP LEVEL ITERATION
for df in df_list:
Portfolio = ET.SubElement(
Portfolios,
"Portfolio",
Name = df["Name"][0],
BaseCurrency = df["BaseCurrency2"][0],
TradingPower = str(df["TradingPower"][0]),
ValidationProfile = df["ValidationProfile"][0],
CommissionProfile = df["CommissionProfile"][0]
)
PortfolioPositions = ET.SubElement(Portfolio, "PortfolioPositions")
# ROW LEVEL ITERATION
for row in df.itertuples():
if row.Type == "Cash":
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume)
)
Cash = ET.SubElement(
PortfolioPosition,
"Cash",
Currency = str(row.Currency)
)
else:
PortfolioPosition = ET.SubElement(
PortfolioPositions,
"PortfolioPosition",
Type = row.Type,
Volume = str(row.Volume),
Invested = str(row.Invested),
BaseInvested = str(row.BaseInvested)
)
Instrument = ET.SubElement(
PortfolioPosition,
"Instrument",
Ticker = str(row.Ticker),
ISIN = str(row.ISIN),
Market = str(row.Market),
Currency = str(row.Currency2),
CFI = str(row.CFI)
)
# SAVE PRETTY PRINT OUTPUT
with open("Output.xml", "wb") as f:
dom = md.parseString(ET.tostring(root))
f.write(dom.toprettyxml().encode("utf-8"))
Converting excel to XML in python
import openpyxl
import xml.etree.ElementTree as ET
def convert_excel_to_xml(file_name, sheet_name):
wb = openpyxl.load_workbook(file_name)
sheet = wb[sheet_name]
root = ET.Element("root")
for row in sheet.rows:
for cell in row:
ET.SubElement(root, "cell", value=cell.value)
tree = ET.ElementTree(root)
tree.write("{}.xml".format(sheet_name))
Run the function
convert_excel_to_xml("test.xlsx", "Sheet1")
I'm working on a web scraping project, and have all the right code that returns me the json data in the format that I want if I used the #print command below, but when I got to run the same code except through Pandas Dataframe it only returns the first row of Data that I'm looking for. Just running the print, it returns the expected 17 rows of data I'm looking for. Dataframe to CSV gives me the first row only. Totally stumped! So grateful for anyone's help!
for item in response['body']:
DepartureDate = item['legs'][0][0]['departDate']
ReturnDate = item['legs'][1][0]['departDate']
Airline = item['legs'][0][0]['airline']['code']
Origin = item['legs'][0][0]['depart']
Destination = item['legs'][0][0]['destination']
OD = (Origin + Destination)
TrueBaseFare = item['breakdown']['baseFareAmount']
YQYR = item['breakdown']['fuelSurcharge']
TAX = item['breakdown']['totalTax']
TTL = item['breakdown']['totalFareAmount']
MARKEDUPTTL = item['breakdown']['totalCalculatedFareAmount']
MARKUP = ((MARKEDUPTTL - TTL) / (TTL)*100)
FBC = item['fareBasisCode']
#print(DepartureDate,ReturnDate,Airline,OD,TrueBaseFare,YQYR,TAX,TTL,MARKEDUPTTL,MARKUP,FBC)
MI = pd.DataFrame(
{'Dept': [DepartureDate],
'Ret': [ReturnDate],
'AirlineCode': [Airline],
'Routing': [OD],
'RealFare': [TrueBaseFare],
'Fuel': [YQYR],
'Taxes': [TAX],
'RealTotal': [TTL],
'AgencyTotal': [MARKEDUPTTL],
'Margin': [MARKUP],
'FareBasis': [FBC],
})
df = pd.DataFrame(MI)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
df.to_csv('MITest7.csv')
When you print all your values after the cycle, you will see that you get only the last values. To resolve this problem you need to create lists and put there your values.
Try this:
DepartureDate = []
ReturnDate = []
Airline = []
Origin = []
Destination = []
OD = []
TrueBaseFare = []
YQYR = []
TAX = []
TTL = []
MARKEDUPTTL = []
MARKUP = []
FBC = []
for item in response['body']:
DepartureDate.append(item['legs'][0][0]['departDate'])
ReturnDate.append(item['legs'][1][0]['departDate'])
Airline.append(item['legs'][0][0]['airline']['code'])
Origin.append(item['legs'][0][0]['depart'])
Destination.append(item['legs'][0][0]['destination'])
OD.append((Origin[-1] + Destination[-1]))
TrueBaseFare.append(item['breakdown']['baseFareAmount'])
YQYR.append(item['breakdown']['fuelSurcharge'])
TAX.append(item['breakdown']['totalTax'])
TTL.append(item['breakdown']['totalFareAmount'])
MARKEDUPTTL.append(item['breakdown']['totalCalculatedFareAmount'])
MARKUP.append(((MARKEDUPTTL[-1] - TTL[-1]) / (TTL[-1])*100))
FBC.append(item['fareBasisCode'])
Good evening,
I will start by saying I am very early in my coding journey. Currently using a number of excel sheets from government data for a pandas project. Each of these sheets represents a year. I am attempting to add a column to each dataframe before I concat the entire list so I know which year the data came from at each point. Currently, the code looks like this:
'''
df10 = pd.read_excel(r'C:\Market research\national_m2010_dl.xls')
df11 = pd.read_excel(r'C:\Market research\national_m2011_dl.xls')
df12 = pd.read_excel(r'C:\Market research\national_m2012_dl.xls')
df13 = pd.read_excel(r'C:\Market research\national_m2013_dl.xls')
df14 = pd.read_excel(r'C:\Market research\national_m2014_dl.xlsx')
df15 = pd.read_excel(r'C:\Market research\national_m2015_dl.xlsx')
df16 = pd.read_excel(r'C:\Market research\national_m2016_dl.xlsx')
df17 = pd.read_excel(r'C:\Market research\national_m2017_dl.xlsx')
df18 = pd.read_excel(r'C:\Market research\national_m2018_dl.xlsx')
df19 = pd.read_excel(r'C:\Market research\national_m2019_dl.xlsx')
df10['Year'] = '2010'
df11['Year'] = '2011'
df12['Year'] = '2012'
df13['Year'] = '2013'
df14['Year'] = '2014'
df15['Year'] = '2015'
df16['Year'] = '2016'
df17['Year'] = '2017'
df18['Year'] = '2018'
df19['Year'] = '2019'
'''
However, I am sure there is a cleaner way to do this and was wondering if there might be a better way. I originally attempted a For loop similar to this:
'''
for num in range(10,20):
df+str(num)['Year'] = '20'+str(num)
'''
but I had no luck. Thoughts?
Try this:
df_dic = dict()
for n in range(10,20): #remember, the second number in range is exclusive
year = f"20{n}"
df = pd.read_excel(f'C:\Market research\national_m{year}_dl.xls')
df["Year"] = year
df_dic[year] = df
instead of using df10, df11,... you can use df[10], df[11],... which is make the code very easy.
for num in range(10,20):
df[num] = pd.read_excel(r'C:\Market research\national_m20'+str(num)+'_dl.xlsx')
df[num]['Year'] = '20'+str(num)
I have written the following code that has a function model_data to perform a particular set of tasks. I have to pass the list of Badges and the type of category 1 or 2 along with an empty dataframe data.
But while running the code I am getting an error. I searched SO for answers but this type of Question was not found.
CODE
#Model Function
def model_data(badge_list, data):
for key, value in badge_list.items():
#Check for Post Type
if (value == 1):
badge_type = posts.loc[posts.PostTypeId == '1']
elif (value == 2):
badge_type = posts.loc[posts.PostTypeId == '2']
#Obtain required fields from Badge Data
badge_type = badge_type[['OwnerUserId', 'Id','Score', 'CreationDate']]
badge_type.columns = ['UserId', 'Id', 'Score','CreationDate']
Badge = key
#Obtain time when user first obtained Badge
badge_data = user_badge_dt(Badge)
#Find the number of posts made before and after 1 week of Badge Attainment
post_data = post_details(df1 = badge_data, df2 = badge_type)
post_data.date = pd.to_datetime(post_data.date)
#Calculate APR
post_data = APR(post_data)
#Calculate Score
post_data = score(df = post_data, post_type = badge_type)
#Generate Final Dataframe with Badge Count
data1 = badge_number(post_data)
data1 = data1[['1','2','3','date','Score','APR']]
#Append Dataframe
data = data.append(data1)
return data
#Function Call
questionBadge_list = {'Good Question':1, 'Explainer':2}
data = pd.DataFrame()
badge1_data = model_data(badge_list = questionBadge_list, data = data)
ERROR
IndexError: Too many levels: Index has only 1 level, not 2
ERROR LINE
The code line badge_data = user_badge_dt(Badge) gives this error so I am adding the complete function.
#Function to obtain UserId with the date-time of obtaining given badge for the first time
def user_badge_dt(badge):
#Creating DataFrame to obtain all UserId and date-Time of given badge
df = badges[['UserId','Date']].loc[badges.Name == badge]
#Obtaining the first date-time of badge attainment
v = df.groupby("UserId", group_keys=False)['Date'].nsmallest(1)
v.index = v.index.droplevel(1)
df['date'] = df['UserId'].map(v)
df.drop(columns='Date',inplace=True)
#Removing all duplicate values of Users
df.drop_duplicates(subset='UserId', inplace=True )
return df