I have a pandas dataframe that I have extracted from a JSON file for breweries I'm interested in. most of these columns are as nested list of dictionaries. However two columns 'hours' and 'memberships' are being problematic.
I'd like to extract the 'hours' column into 7 columns "Mon_Hours","Tue_hours"...'Sun_Hours'.
I have tried and tried to figure this out but these two columns are proving challenging.
Here is a link to the initial data: https://www.coloradobrewerylist.com/wp-json/cbl_api/v1/locations/?location-type%5Bnin%5D=404,405&page_size=1000&page_token=1
and here is my code:
import requests
import re
import pandas as pd
import numpy as np
import csv
import json
from datetime import datetime
### get the data from the Colorado Brewery list
url = "https://www.coloradobrewerylist.com/wp-json/cbl_api/v1/locations/?location-type%5Bnin%5D=404,405&page_size=1000&page_token=1"
payload={}
headers = {}
response = requests.request("GET", url, headers=headers, data=payload)
data=response.json()
### convert results to table
pd.set_option('display.max_columns', None)
brewdf = pd.DataFrame.from_dict(data['results'])
#brewdf
############################################
#### CLEAN UP NESTED LIST-DICT COLUMNS #####
############################################
## cleanup dogs column
dogs = pd.json_normalize(brewdf['dogs'])
dogs2 = dogs.squeeze()
dogsdf = pd.json_normalize(dogs2)
dogsdf = dogsdf.drop(columns =['id','slug'])
dogsdf = dogsdf.rename(columns={'name':'dogs_allowed'})
#dogsdf
## cleanup parking column
parking = pd.json_normalize(brewdf['parking'])
parking = parking.rename(columns = {0:'Parking1',1:'Parking2',2:'Parking3'})
a = pd.json_normalize(parking['Parking1'])
b = pd.json_normalize(parking['Parking2'])
c = pd.json_normalize(parking['Parking3'])
parkcombo = pd.concat([a,b,c],ignore_index=True, axis=1)
parkcombo = parkcombo.rename(columns = {2:'P1',5:'P2',8:'P3'})
parkcombo['parking_type'] = parkcombo['P1'].map(str) + ',' + parkcombo['P2'].map(str) + ',' + parkcombo['P3'].map(str)
parkcombo['parking_type'] = parkcombo['parking_type'].str.replace(",nan",'')
parkdf = parkcombo['parking_type'].to_frame()
#parkdf
## cleanup food type column
food = pd.json_normalize(brewdf['food_type'])
food
food = food.rename(columns = {0:'Food1',1:'Food2',2:'Food3',3:'Food4',4:'Food5',5:'Food6'})
a = pd.json_normalize(food['Food1'])
b = pd.json_normalize(food['Food2'])
c = pd.json_normalize(food['Food3'])
d = pd.json_normalize(food['Food4'])
e = pd.json_normalize(food['Food5'])
f = pd.json_normalize(food['Food6'])
foodcombo = pd.concat([a,b,c,d,e,f],ignore_index=True, axis =1)
foodcombo
foodcombo = foodcombo.rename(columns = {2:'F1',5:'F2',8:'F3',11:'F4',14:'F5',17:'F6'})
foodcombo['food_type'] = foodcombo['F1'].map(str) + ',' + foodcombo['F2'].map(str) + ',' + foodcombo['F3'].map(str) + ',' + foodcombo['F4'].map(str)+ ',' + foodcombo['F5'].map(str) + ',' + foodcombo['F6'].map(str)
foodcombo['food_type'] = foodcombo['food_type'].str.replace(",nan",'')
fooddf = foodcombo['food_type'].to_frame()
#fooddf
## cleanup patio column
patio = pd.json_normalize(brewdf['patio'])
patio = patio.rename(columns = {0:'P1',1:'P2',2:'P3'})
a = pd.json_normalize(patio['P1'])
b = pd.json_normalize(patio['P2'])
c = pd.json_normalize(patio['P3'])
patiocombo = pd.concat([a,b,c],ignore_index=True, axis =1)
patiocombo
patiocombo = patiocombo.rename(columns = {2:'P1',5:'P2',8:'P3'})
patiocombo['patio_type'] = patiocombo['P1'].map(str) + ',' + patiocombo['P2'].map(str) + ',' + patiocombo['P3'].map(str)
patiocombo['patio_type'] = patiocombo['patio_type'].str.replace(",nan",'')
patiodf = patiocombo['patio_type'].to_frame()
#patiodf
## clean visitor type column
visitor = pd.json_normalize(brewdf['visitors'])
visitor
visitor = visitor.rename(columns = {0:'V1',1:'V2',2:'V3'})
a = pd.json_normalize(visitor['V1'])
b = pd.json_normalize(visitor['V2'])
c = pd.json_normalize(visitor['V3'])
visitorcombo = pd.concat([a,b,c],ignore_index=True, axis =1)
visitorcombo
visitorcombo = visitorcombo.rename(columns = {2:'V1',5:'V2',8:'V3'})
visitorcombo['visitor_type'] = visitorcombo['V1'].map(str) + ',' + visitorcombo['V2'].map(str) + ',' + visitorcombo['V3'].map(str)
visitorcombo['visitor_type'] = visitorcombo['visitor_type'].str.replace(",nan",'')
visitordf = visitorcombo['visitor_type'].to_frame()
#visitordf
## clean tour type column
tour = pd.json_normalize(brewdf['tour_type'])
tour
tour = tour.rename(columns = {0:'T1',1:'T2',2:'T3',3:'T4'})
a = pd.json_normalize(tour['T1'])
b = pd.json_normalize(tour['T2'])
c = pd.json_normalize(tour['T3'])
d = pd.json_normalize(tour['T4'])
tourcombo = pd.concat([a,b,c,d],ignore_index=True, axis =1)
tourcombo
tourcombo = tourcombo.rename(columns = {2:'T1',5:'T2',8:'T3',11:'T4'})
tourcombo['tour_type'] = tourcombo['T1'].map(str) + ',' + tourcombo['T2'].map(str) + ',' + tourcombo['T3'].map(str) + ','+ tourcombo['T4'].map(str)
tourcombo['tour_type'] = tourcombo['tour_type'].str.replace(",nan",'')
tourdf = tourcombo['tour_type'].to_frame()
#tourdf
## clean other drinks column
odrink = pd.json_normalize(brewdf['otherdrinks_type'])
odrink
odrink = odrink.rename(columns = {0:'O1',1:'O2',2:'O3',3:'O4',4:'O5',5:'O6',6:'O7',7:'O8',8:'O9'})
a = pd.json_normalize(odrink['O1'])
b = pd.json_normalize(odrink['O2'])
c = pd.json_normalize(odrink['O3'])
d = pd.json_normalize(odrink['O4'])
e = pd.json_normalize(odrink['O5'])
f = pd.json_normalize(odrink['O6'])
g = pd.json_normalize(odrink['O7'])
h = pd.json_normalize(odrink['O8'])
i = pd.json_normalize(odrink['O9'])
odrinkcombo = pd.concat([a,b,c,d,e,f,g,h,i],ignore_index=True, axis =1)
odrinkcombo
odrinkcombo = odrinkcombo.rename(columns = {2:'O1',5:'O2',8:'O3',11:'O4',14:'O5',17:'O6',20:'O7',23:'O8',26:'O9'})
odrinkcombo['odrink_type'] = odrinkcombo['O1'].map(str) + ',' + odrinkcombo['O2'].map(str) + ',' + odrinkcombo['O3'].map(str) + ','+ odrinkcombo['O4'].map(str) + ','+ odrinkcombo['O5'].map(str)+ ','+ odrinkcombo['O6'].map(str)+ ','+ odrinkcombo['O7'].map(str)+','+ odrinkcombo['O8'].map(str)+','+ odrinkcombo['O9'].map(str)
odrinkcombo['odrink_type'] = odrinkcombo['odrink_type'].str.replace(",nan",'')
odrinkdf = odrinkcombo['odrink_type'].to_frame()
#odrinkdf
## clean to-go column
togo = pd.json_normalize(brewdf['togo_type'])
togo
togo = togo.rename(columns = {0:'TG1',1:'TG2',2:'TG3',3:'TG4',4:'TG5'})
a = pd.json_normalize(togo['TG1'])
b = pd.json_normalize(togo['TG2'])
c = pd.json_normalize(togo['TG3'])
d = pd.json_normalize(togo['TG4'])
e = pd.json_normalize(togo['TG5'])
togocombo = pd.concat([a,b,c,d,e],ignore_index=True, axis =1)
togocombo
togocombo = togocombo.rename(columns = {2:'TG1',5:'TG2',8:'TG3',11:'TG4',14:'TG5'})
togocombo['togo_type'] = togocombo['TG1'].map(str) + ',' + togocombo['TG2'].map(str) + ',' + togocombo['TG3'].map(str) + ','+ togocombo['TG4'].map(str) + ','+ togocombo['TG5'].map(str)
togocombo['togo_type'] = togocombo['togo_type'].str.replace(",nan",'')
togodf = togocombo['togo_type'].to_frame()
#togodf
## clean merch column
merch = pd.json_normalize(brewdf['merch_type'])
merch
merch = merch.rename(columns = {0:'M1',1:'M2',2:'M3',3:'M4',4:'M5',5:'M6',6:'M7',7:'M8',8:'M9',9:'M10',10:'M11',11:'M12'})
a = pd.json_normalize(merch['M1'])
b = pd.json_normalize(merch['M2'])
c = pd.json_normalize(merch['M3'])
d = pd.json_normalize(merch['M4'])
e = pd.json_normalize(merch['M5'])
f = pd.json_normalize(merch['M6'])
g = pd.json_normalize(merch['M7'])
h = pd.json_normalize(merch['M8'])
i = pd.json_normalize(merch['M9'])
j = pd.json_normalize(merch['M10'])
k = pd.json_normalize(merch['M11'])
l = pd.json_normalize(merch['M12'])
merchcombo = pd.concat([a,b,c,d,e,f,g,h,i,j,k,l],ignore_index=True, axis =1)
merchcombo
merchcombo = merchcombo.rename(columns = {2:'M1',5:'M2',8:'M3',11:'M4',14:'M5',17:'M6',20:'M7',23:'M8',26:'M9',29:'M10',32:'M11',35:'M12'})
merchcombo['merch_type'] = (merchcombo['M1'].map(str) + ',' + merchcombo['M2'].map(str) + ',' + merchcombo['M3'].map(str) + ','+ merchcombo['M4'].map(str) + ','
+ merchcombo['M5'].map(str) + ',' + merchcombo['M6'].map(str)+ ',' + merchcombo['M7'].map(str) + ',' + merchcombo['M8'].map(str)
+ ',' + merchcombo['M9'].map(str)+ ',' + merchcombo['M10'].map(str)+ ',' + merchcombo['M11'].map(str)+ ',' + merchcombo['M12'].map(str))
merchcombo['merch_type'] = merchcombo['merch_type'].str.replace(",nan",'')
merchdf = merchcombo['merch_type'].to_frame()
#merchdf
### clean description column
brewdf['description'] = brewdf['description'].str.replace(r'<[^<>]*>', '', regex=True)
#brewdf
### replace nan with null
brewdf = brewdf.replace('nan',np.nan)
brewdf = brewdf.replace('None',np.nan)
brewdf
cleanedbrewdf = brewdf.drop(columns = {'food_type','tour_type','otherdrinks_type','articles','merch_type','togo_type','patio','visitors','parking','dogs'})
mergedbrewdf = pd.concat([cleanedbrewdf,dogsdf,parkdf,fooddf,patiodf,
visitordf,tourdf,odrinkdf,togodf,merchdf,],ignore_index=False,axis=1)
mergedbrewdf
### remove non-existing
finalbrewdf = mergedbrewdf.loc[(mergedbrewdf['lon'].notnull())].copy()
finalbrewdf['lon'] = finalbrewdf['lon'].astype(float)
finalbrewdf['lat'] = finalbrewdf['lat'].astype(float)
finalbrewdf
Can someone please point me in the right direction for the hours and memberships columns? Also, is there a more efficient way to look through these different columns? They have different nested list-dict lengths which I thought might prevent me from writing a function.
I made this code in order that every time it runs once it gives me a list in the correct sequence, I wanted the code to form a list from the repetition forming p1+p2+p3, but the code gives me all the p1 plus all the p2 plus all the p3, I wish every time it ran I was given a list.
p11 = []
p22 = []
p33 = []
for n in range(0, quantidade):
print('EPISÓDIO', n+1)
for g in range(0, prot):
p1 = ' ' + protagonistasn[g] + ' ' + random.choice(açoes) + ' ' + random.choice(coadjuvantesn) + ' ' + random.choice(locais)
if g <= prot/2 :
p1_1 = p1.replace("#","a")
p11.append(p1_1)
print(p1_1)
elif g > prot/2 :
p1_1 = p1.replace("#", "o")
p11.append(p1_1)
print(p1_1)
for j in range(0, vil):
p2 = ' ' +viloesn[j]+ ' ' + random.choice(açoes) + ' ' + random.choice(protagonistasn) + ' ' + random.choice(locais)
if j <= vil/2:
p2_2 = p2.replace("#", "a")
p22.append(p2_2)
print(p2_2)
elif j > vil/2:
p2_2 = p2.replace("#", "o")
p22.append(p2_2)
print(p2_2)
for h in range(0, prot):
p3 = ' ' + protagonistasn[h] + ' ' + random.choice(açoes) + ' ' + random.choice(viloesn) + ' ' + random.choice(locais)
if h <= prot/2 :
p3_3 = p3.replace("#","a")
p33.append(p3_3)
print(p3_3)
elif h > prot/2 :
p3_3 = p3.replace("#", "o")
p33.append(p3_3)
print(p3_3)
p_total = [p11, p22, p33]
p_tot = []
If I understand you right, you want your p_total list to contain all the p1s, p2s, and p3s from the first iteration, and then all the p1s, p2s, and p3d from the second iteration. Is that right? If so:
p_total = []
for n in range(0, quantidade):
p11 = []
p22 = []
p33 = []
print('EPIS?DIO', n+1)
for g in range(0, prot):
...
print(p3_3)
p_total.extend( [p11, p22, p33] )
p_tot = []
So, start each loop with an empty p11, p22, and p33. At the end of each iteration, append those on to your p_total, instead of holding them until the end.
I wrote a webscraper which is downloading table tennis data. There is info about players, match score etc. I would like to display players which lost the most matches per day. I've created data frame and I would like to sum p1_status and p2_status, then I would like to display Surname and number of loses next to player.
https://gyazo.com/19c70e071db78071e83045bfcea0e772
Here is my code:
s = Service("D:/setka/chromedriver.exe")
option = webdriver.ChromeOptions()
driver = webdriver.Chrome(service=s)
hall = 10
num =1
filename = "C:/Users/filip/result2.csv"
f=open(filename,"w")
headers = "p1_surname, p1_name, p1_score, p2_surname, p2_name, p2_score, p1_status, p2_status \n"
f.write(headers)
while hall <= 10:
for period in [1]:
url = 'https://tabletennis.setkacup.com/en/schedule?date=2021-12-04&hall=' + \
str(hall) + '&' + 'period=' + str(period)
driver.get(url)
time.sleep(5)
divs = driver.find_elements(By.CSS_SELECTOR, "div.score-result")
for div in divs:
data = div.text.split()
#print(data)
if(num % 2) == 0:
f.write(str(data[0]) + "," + str(data[1]) + "," + str(data[2] + "," + "\n"))
else:
f.write(str(data[0]) + "," + str(data[1]) + "," + str(data[2] + ","))
num = num +1
hall =hall + 1
f.close()
df_results=pd.read_csv('C:/Users/filip/result2.csv', sep = r',',
skipinitialspace = True)
df_results.reset_index(drop=True, inplace=True)
df_results.loc[df_results['p1_score'] > df_results['p2_score'], ['p1_status','p2_status']] = ['won','lost']
df_results.loc[df_results['p1_score'] < df_results['p2_score'], ['p1_status','p2_status']] = ['lost','won']
df_results.loc[df_results['p1_score'] == df_results['p2_score'], ['p1_status','p2_status']] = ['not played','not played']
df_results.loc[((df_results['p1_score'] < 3) & (df_results['p1_score']!=0) & (df_results['p2_score'] <3) & (df_results['p2_score']!=0)), ['p1_status','p2_status']] = ['inplay','inplays']
df_results.loc[df_results['p1_status'] != df_results['p2_status'], ['match_status']] = ['finished']
df_results.loc[df_results['p1_status'] == df_results['p2_status'], ['match_status']] = ['not played']
df_results.loc[((df_results['p1_status'] =='inplay') & (df_results['p2_status']=='inplays')), ['match_status']] = ['inplay']
df_results = df_results.dropna(axis=1)
df_results.head(30)
Split your dataframe in 2 parts (p1_, p2_) to count defeats of each player then merge them:
Setup a MRE:
df = pd.DataFrame({'p1_surname': list('AABB'), 'p2_surname': list('CDCD'),
'p1_status': list('LWWW'), 'p2_status': list('WLLL')})
print(df)
# Output:
p1_surname p2_surname p1_status p2_status
0 A C L W
1 A D W L
2 B C W L
3 B D W L
>>> pd.concat([
df.filter(like='p1_').set_index('p1_surname')['p1_status'].eq('L').rename('loses'),
df.filter(like='p2_').set_index('p2_surname')['p2_status'].eq('L').rename('loses')]) \
.groupby(level=0).sum().rename_axis('surname').reset_index()
surname loses
0 A 1
1 B 0
2 C 1
3 D 2
I am currently doing a python exercise for my University studies. I am very stuck at this task:
The taylor polynomial of degree N for the exponential function e^x is given by:
N
p(x) = Sigma x^k/k!
k = 0
Make a program that (i) imports class Polynomial (found under), (ii) reads x and a series of N values from the command line, (iii) creates a Polynomial instance representing the Taylor polynomial, and (iv) prints the values of p(x) for the given N values as well as the exact value e^x. Try the program out with x = 0.5, 3, 10 and N = 2, 5, 10, 15, 25.
Polynomial.py
import numpy
class Polynomial:
def __init__(self, coefficients):
self.coeff = coefficients
def __call__(self, x):
"""Evaluate the polynomial."""
s = 0
for i in range(len(self.coeff)):
s += self.coeff[i]*x**i
return s
def __add__(self, other):
# Start with the longest list and add in the other
if len(self.coeff) > len(other.coeff):
result_coeff = self.coeff[:] # copy!
for i in range(len(other.coeff)):
result_coeff[i] += other.coeff[i]
else:
result_coeff = other.coeff[:] # copy!
for i in range(len(self.coeff)):
result_coeff[i] += self.coeff[i]
return Polynomial(result_coeff)
def __mul__(self, other):
c = self.coeff
d = other.coeff
M = len(c) - 1
N = len(d) - 1
result_coeff = numpy.zeros(M+N+1)
for i in range(0, M+1):
for j in range(0, N+1):
result_coeff[i+j] += c[i]*d[j]
return Polynomial(result_coeff)
def differentiate(self):
"""Differentiate this polynomial in-place."""
for i in range(1, len(self.coeff)):
self.coeff[i-1] = i*self.coeff[i]
del self.coeff[-1]
def derivative(self):
"""Copy this polynomial and return its derivative."""
dpdx = Polynomial(self.coeff[:]) # make a copy
dpdx.differentiate()
return dpdx
def __str__(self):
s = ''
for i in range(0, len(self.coeff)):
if self.coeff[i] != 0:
s += ' + %g*x^%d' % (self.coeff[i], i)
# Fix layout
s = s.replace('+ -', '- ')
s = s.replace('x^0', '1')
s = s.replace(' 1*', ' ')
s = s.replace('x^1 ', 'x ')
#s = s.replace('x^1', 'x') # will replace x^100 by x^00
if s[0:3] == ' + ': # remove initial +
s = s[3:]
if s[0:3] == ' - ': # fix spaces for initial -
s = '-' + s[3:]
return s
def simplestr(self):
s = ''
for i in range(0, len(self.coeff)):
s += ' + %g*x^%d' % (self.coeff[i], i)
return s
def _test():
p1 = Polynomial([1, -1])
p2 = Polynomial([0, 1, 0, 0, -6, -1])
p3 = p1 + p2
print p1, ' + ', p2, ' = ', p3
p4 = p1*p2
print p1, ' * ', p2, ' = ', p4
print 'p2(3) =', p2(3)
p5 = p2.derivative()
print 'd/dx', p2, ' = ', p5
print 'd/dx', p2,
p2.differentiate()
print ' = ', p5
p4 = p2.derivative()
print 'd/dx', p2, ' = ', p4
if __name__ == '__main__':
_test()
Now I'm really stuck at this, and I would love to get an explaination! I am supposed to write my code in a separate file. I'm thinking about making an instance of the Polynomial class, and sending in the list in argv[2:], but that doesn't seem to be working. Do I have to make a def to calculate the taylor polynomial for the different values of N before sending it in to the Polynomial class?
Any help is great, thanks in advance :)
Not quite finished, but this answers your main question I believe. Put class Polynomial in poly.p and import it.
from poly import Polynomial as p
from math import exp,factorial
def get_input(n):
''' get n numbers from stdin '''
entered = list()
for i in range(n):
print 'input number '
entered.append(raw_input())
return entered
def some_input():
return [[2,3,4],[4,3,2]]
get input from cmd line
n = 3
a = get_input(n)
b = get_input(n)
#a,b = some_input()
ap = p(a)
bp = p(b)
print 'entered : ',a,b
c = ap+bp
print 'a + b = ',c
print exp(3)
x = ap
print x
sum = p([0])
for k in range(1,5):
el = x
for j in range(1,k):
el el * x
print 'el: ',el
if el!=None and sum!=None:
sum = sum + el
print 'sum ',sum
output
entered : [2, 3, 4] [4, 3, 2]
a + b = 6*1 + 6*x + 6*x^2
20.0855369232
2*1 + 3*x + 4*x^2
sum 2*1 + 3*x + 4*x^2
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
sum 6*1 + 15*x + 29*x^2 + 24*x^3 + 16*x^4
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
el: 8*1 + 36*x + 102*x^2 + 171*x^3 + 204*x^4 + 144*x^5 + 64*x^6
sum 14*1 + 51*x + 131*x^2 + 195*x^3 + 220*x^4 + 144*x^5 + 64*x^6
el: 4*1 + 12*x + 25*x^2 + 24*x^3 + 16*x^4
el: 8*1 + 36*x + 102*x^2 + 171*x^3 + 204*x^4 + 144*x^5 + 64*x^6
el: 16*1 + 96*x + 344*x^2 + 792*x^3 + 1329*x^4 + 1584*x^5 + 1376*x^6 + 768*x^7 + 256*x^8
sum 30*1 + 147*x + 475*x^2 + 987*x^3 + 1549*x^4 + 1728*x^5 + 1440*x^6 + 768*x^7 + 256*x^8
I solved the task in the following way, though im not sure if it answers question (iv).
The output just compares the exact value of e**x to the calculated value from module Polynomial.
from math import factorial, exp
from Polynomial import *
from sys import *
#Reads x and N from the command line on the form [filename.py, x-value, N-value]
x = eval(argv[1])
N = eval(argv[2])
#Creating list of coefficients on the form [1 / i!]
list_coeff = [1./factorial(i) for i in range(N)]
print list_coeff
#Creating an instance of class Polynomial
p1 = Polynomial(list_coeff)
print 'Calculated value of e**%f = %f ' %(x, p1.__call__(x))
print 'Exact value of e**%f = %f'% (x, exp(x))
"""Test Execution
Terminal > python Polynomial_exp.py 0.5 5
[1.0, 1.0, 0.5, 0.16666666666666666, 0.041666666666666664]
Calculated value of e**0.500000 = 1.648438
Exact value of e**0.500000 = 1.648721
"""