Related
I have a json object like this:
[{'currency_pair': 'UOS_USDT',
'orders': [{'account': 'spot',
'amount': '1282.84',
'create_time': '1655394430',
'create_time_ms': 1655394430129,
'currency_pair': 'UOS_USDT',
'fee': '0',
'fee_currency': 'UOS',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208865523',
'left': '1282.84',
'point_fee': '0',
'price': '0.1949',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394430',
'update_time_ms': 1655394430129}],
'total': 1},
{'currency_pair': 'RMRK_USDT',
'orders': [{'account': 'spot',
'amount': '79.365',
'create_time': '1655394431',
'create_time_ms': 1655394431249,
'currency_pair': 'RMRK_USDT',
'fee': '0',
'fee_currency': 'RMRK',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208877018',
'left': '79.365',
'point_fee': '0',
'price': '2.52',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394431',
'update_time_ms': 1655394431249}],
'total': 1}]
I want to convert it to a dataframe.
The data comes from an api call to a crypto exchange. I converted this to json, using the .json() method. So it's proper json. I have tried:
df = pd.DataFrame(data)
df = pd.DataFrame(data["orders")
df = pd.DataFrame(data["currency_pair"]["orders"])
and every other imaginable path.
I want a df which has as columns ["currency_pair", "amount", "create_time", "price", "side"]
I some times get an error TypeError: list indices must be integers or slices, not str or the df works but the orders object is not unpacked. All help gratefully received. Thank you.
import pandas as pd
data = [{'currency_pair': 'UOS_USDT',
'orders': [{'account': 'spot',
'amount': '1282.84',
'create_time': '1655394430',
'create_time_ms': 1655394430129,
'currency_pair': 'UOS_USDT',
'fee': '0',
'fee_currency': 'UOS',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208865523',
'left': '1282.84',
'point_fee': '0',
'price': '0.1949',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394430',
'update_time_ms': 1655394430129}],
'total': 1},
{'currency_pair': 'RMRK_USDT',
'orders': [{'account': 'spot',
'amount': '79.365',
'create_time': '1655394431',
'create_time_ms': 1655394431249,
'currency_pair': 'RMRK_USDT',
'fee': '0',
'fee_currency': 'RMRK',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208877018',
'left': '79.365',
'point_fee': '0',
'price': '2.52',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394431',
'update_time_ms': 1655394431249}],
'total': 1}]
Use:
df = pd.json_normalize(data, record_path=['orders'])
And keep the columns you need.
It's only one line and it should cover your case since 'currency_pair' that you want is already in the 'orders' dictionary and from what I understand from your data it will always be the same as the 'currency_pair' value outside 'orders. As you said you don't need 'total' too.
Use:
df = pd.json_normalize(data, record_path=['orders'], meta=['currency_pair', 'total'], record_prefix='orders_')
If you want them all
import pandas as pd
data = [{'currency_pair': 'UOS_USDT',
'orders': [{'account': 'spot',
'amount': '1282.84',
'create_time': '1655394430',
'create_time_ms': 1655394430129,
'currency_pair': 'UOS_USDT',
'fee': '0',
'fee_currency': 'UOS',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208865523',
'left': '1282.84',
'point_fee': '0',
'price': '0.1949',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394430',
'update_time_ms': 1655394430129}],
'total': 1},
{'currency_pair': 'RMRK_USDT',
'orders': [{'account': 'spot',
'amount': '79.365',
'create_time': '1655394431',
'create_time_ms': 1655394431249,
'currency_pair': 'RMRK_USDT',
'fee': '0',
'fee_currency': 'RMRK',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208877018',
'left': '79.365',
'point_fee': '0',
'price': '2.52',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394431',
'update_time_ms': 1655394431249}],
'total': 1}]
df = pd.DataFrame(data)
df['amount'] = df.apply( lambda row: row.orders[0]['amount'] , axis=1)
df['create_time'] = df.apply( lambda row: row.orders[0]['create_time'] , axis=1)
df['price'] = df.apply( lambda row: row.orders[0]['price'] , axis=1)
df['side'] = df.apply( lambda row: row.orders[0]['side'] , axis=1)
required_df = df[['currency_pair', 'amount', 'create_time', 'price', 'side']]
required_df
Result:
currency_pair amount create_time price side
0 UOS_USDT 1282.84 1655394430 0.1949 buy
1 RMRK_USDT 79.365 1655394431 2.52 buy
HI, hope this process can help you
#Import pandas library
import pandas as pd
#Your data
data = [{'currency_pair': 'UOS_USDT',
'orders': [{'account': 'spot',
'amount': '1282.84',
'create_time': '1655394430',
'create_time_ms': 1655394430129,
'currency_pair': 'UOS_USDT',
'fee': '0',
'fee_currency': 'UOS',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208865523',
'left': '1282.84',
'point_fee': '0',
'price': '0.1949',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394430',
'update_time_ms': 1655394430129}],
'total': 1},
{'currency_pair': 'RMRK_USDT',
'orders': [{'account': 'spot',
'amount': '79.365',
'create_time': '1655394431',
'create_time_ms': 1655394431249,
'currency_pair': 'RMRK_USDT',
'fee': '0',
'fee_currency': 'RMRK',
'fill_price': '0',
'filled_total': '0',
'gt_discount': False,
'gt_fee': '0',
'iceberg': '0',
'id': '169208877018',
'left': '79.365',
'point_fee': '0',
'price': '2.52',
'rebated_fee': '0',
'rebated_fee_currency': 'USDT',
'side': 'buy',
'status': 'open',
'text': 'apiv4',
'time_in_force': 'gtc',
'type': 'limit',
'update_time': '1655394431',
'update_time_ms': 1655394431249}],
'total': 1}]
#Accessing nested values
#you cloud transform the specific column
#into a DataFrame and access it values with indices
#then parse the value to the type you need
#i.e
float(pd.DataFrame(data[0]['orders'])['amount'].values[0])
int(pd.DataFrame(data[0]['orders'])['create_time'].values[0])
float(pd.DataFrame(data[0]['orders'])['price'].values[0])
pd.DataFrame(data[0]['orders'])['side'].values[0]
#Create a dictionary with your chosen structure
#["currency_pair", "amount", "create_time", "price", "side"]
# then insert the corresponding columns
custom_dictionary = {
'currency_pair': [data[0]['currency_pair'], data[1]['currency_pair']],
'amount': [float(pd.DataFrame(data[0]['orders'])['amount'].values[0]),
float(pd.DataFrame(data[1]['orders'])['amount'].values[0])],
'create_time': [int(pd.DataFrame(data[0]['orders'])['create_time'].values[0]),
int(pd.DataFrame(data[1]['orders'])['create_time'].values[0])],
'price': [float(pd.DataFrame(data[0]['orders'])['price'].values[0]),
float(pd.DataFrame(data[1]['orders'])['price'].values[0])],
'side': [pd.DataFrame(data[0]['orders'])['side'].values[0],
pd.DataFrame(data[1]['orders'])['side'].values[0]]}
#Create a DataFrame with your custom dictionary and voila
df = pd.DataFrame(custom_dictionary)
df
the dataframe (df) could look like:
custom DataFrame
I am trying to check for the data-id is equal or not from the given list array
how do I check the complete array to know where the id is found or not?
order_id= 121553197332
inf = {data of array given below}
if inf[n]["id"] == order_id:
info = inf[n]["info"]
elif
do someting here
return (info)
the array in need to check?
[{'amount': 0.3,
'id': '121553197332',
'info': {'avgFillPrice': None,
'id': '121553197332',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.0',
'reduceOnly': False,
'remainingSize': '0.3',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit'},
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit'},
{'amount': 0.3,
'id': '121553197726',
'info': {'avgFillPrice': None,
'future': None,
'id': '121553197726',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.062',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit'},
'postOnly': True,
'price': 40.062,
'remaining': 0.3,
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit'}]
I need to return the info of the array at last.
You Can try use a for loop
like this:
inf = [{ 'amount': 0.3,
'id': '121553197332',
'info': { 'avgFillPrice': None,
'id': '121553197332',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.0',
'reduceOnly': False,
'remainingSize': '0.3',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit' },
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit' },
{ 'amount': 0.3,
'id': '121553197726',
'info': { 'avgFillPrice': None,
'future': None,
'id': '121553197726',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.062',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit' },
'postOnly': True,
'price': 40.062,
'remaining': 0.3,
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit' }]
order_id= 121553197332
for inner_data in inf:
if inner_data['id'] == order_id:
print(inner_data)
info = inner_data["info"]
Use next:
# Sample data (reduced)
orders = [{
'id': '121553197332',
'info': { 'id': '121553197332' },
}, {
'id': '121553197726',
'info': { 'id': '121553197726' },
}]
# The order to find
order_id = 121553197332
# Find it
found = next((d for d in orders if int(d["id"]) == order_id), None)
Here is the snippet that is ready to run. target variable is the one you are searching for in the records.
data = [{'amount': 0.3,
'id': '121553197332',
'info': {'avgFillPrice': None,
'id': '121553197332',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.0',
'reduceOnly': False,
'remainingSize': '0.3',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit'},
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit'},
{'amount': 0.3,
'id': '121553197726',
'info': {'avgFillPrice': None,
'future': None,
'id': '121553197726',
'ioc': False,
'liquidation': False,
'market': 'FTT/USD',
'postOnly': True,
'price': '40.062',
'side': 'buy',
'size': '0.3',
'status': 'open',
'type': 'limit'},
'postOnly': True,
'price': 40.062,
'remaining': 0.3,
'side': 'buy',
'status': 'open',
'stopPrice': None,
'symbol': 'FTT/USD',
'trades': [],
'type': 'limit'}]
target= '121553197726'
for d in data:
if d['id']==target:
info = d["info"]
print(info)
else:
pass
I have a dataframe that I created from a Data Dictionary format in the following way:
df = pd.DataFrame( info_closed, columns = [ 'type', 'origQty', 'executedQty' ] )
The result is as follows:
type origQty executedQty
0 LIMIT 0.00362000 0.00362000
1 MARKET 0.00200000 0.00200000
2 MARKET 0.00150000 0.00150000
3 MARKET 0.00150000 0.00150000
4 LIMIT 0.00150000 0.00150000
5 LIMIT 0.00150000 0.00150000
6 MARKET 0.00199500 0.00199500
7 LIMIT 0.00150000 0.00150000
8 MARKET 0.00149800 0.00149800
9 LIMIT 0.00150000 0.00150000
10 LIMIT 0.00149900 0.00149900
11 LIMIT 0.00150000 0.00150000
12 MARKET 0.00149800 0.00149800
[... snip ...]
I am trying to create a result in the following manner:
type origQty executedQty Count
0 LIMIT 13.03 15.01 23
1 MARKET 122.0l 40.00 54
[.. snip ...]
Basically, this would be a group_by (type) and a sum( origQty ) and sum ( origQty ) within each 'type' and a count of records that were use to calculate the values of sum( origQty ) and sum (origQty)
I tried:
g = df.groupby(['type'])['origQty', 'executedQty'].sum().reset_index()
but the results come out as follows:
type origQty executedQty
0 LIMIT 0.003620000.001500000.001500000.001500000.0015... 0.003620000.001500000.001500000.001500000.0015...
1 LIMIT_MAKER 0.001499000.001500000.001500000.001500000.0014... 0.001499000.001500000.001500000.001500000.0014...
2 MARKET 0.002000000.001500000.001500000.001995000.0014... 0.002000000.001500000.001500000.001995000.0014...
3 STOP_LOSS_LIMIT 0.00150000 0.00150000
Question: what am I doing wrong?
TIA
ETA:
Thanks all for the provided solutions!
I ran some but I was still getting this type of output:
origQty
executedQty
type
LIMIT_MAKER 0.001499000.001500000.001500000.001500000.0014... 0.001499000.001500000.001500000.001500000.0014...
The original data was like this (it is a combination of data from the Binance exchange and the ccxt wrapper code. I was attempting to isolate the Binance data ~only~ (whichi is associated with ['info'])
[{'info': {'symbol': 'BTCUSDT', 'orderId': 2538903025, 'orderListId':
-1, 'clientOrderId': 'ENDsgXoqtv2ct5jizrfeQe', 'price': '9638.00000000', 'origQty': '0.00150000', 'executedQty': '0.00150000',
'cummulativeQuoteQty': '14.45700000', 'status': 'FILLED',
'timeInForce': 'GTC', 'type': 'LIMIT_MAKER', 'side': 'BUY',
'stopPrice': '0.00000000', 'icebergQty': '0.00000000', 'time':
1592879158045, 'updateTime': 1592879162299, 'isWorking': True,
'origQuoteOrderQty': '0.00000000'}, 'id': '2538903025',
'clientOrderId': 'ENDsgXoqtv2ct5jizrfeQe', 'timestamp': 1592879158045,
'datetime': '2020-06-23T02:25:58.045Z', 'lastTradeTimestamp': None,
'symbol': 'BTC/USDT', 'type': 'limit', 'side': 'buy', 'price': 9638.0,
'amount': 0.0015, 'cost': 14.457, 'average': 9638.0, 'filled': 0.0015,
'remaining': 0.0, 'status': 'closed', 'fee': None, 'trades': None},
{'info': {'symbol': 'BTCUSDT', 'orderId': 2539250884, 'orderListId':
-1, 'clientOrderId': '5UFBYwDF6b9qJ1UWNsvOYU', 'price': '9653.00000000', 'origQty': '0.00299700', 'executedQty': '0.00299700',
'cummulativeQuoteQty': '28.93004100', 'status': 'FILLED',
'timeInForce': 'GTC', 'type': 'LIMIT_MAKER', 'side': 'SELL',
'stopPrice': '0.00000000', 'icebergQty': '0.00000000', 'time':
1592883883927, 'updateTime': 1592884056113, 'isWorking': True,
'origQuoteOrderQty': '0.00000000'}, 'id': '2539250884',
'clientOrderId': '5UFBYwDF6b9qJ1UWNsvOYU', 'timestamp': 1592883883927,
'datetime': '2020-06-23T03:44:43.927Z', 'lastTradeTimestamp': None,
'symbol': 'BTC/USDT', 'type': 'limit', 'side': 'sell', 'price':
9653.0, 'amount': 0.002997, 'cost': 28.930041, 'average': 9653.0, 'filled': 0.002997, 'remaining': 0.0, 'status': 'closed', 'fee': None,
'trades': None}, {'info': {'symbol': 'BTCUSDT', 'orderId': 2539601261,
'orderListId': -1, 'clientOrderId': 'testme-15928890617592764',
'price': '9633.00000000', 'origQty': '0.00150000', 'executedQty':
'0.00150000', 'cummulativeQuoteQty': '14.44950000', 'status':
'FILLED', 'timeInForce': 'GTC', 'type': 'LIMIT_MAKER', 'side': 'BUY',
'stopPrice': '0.00000000', 'icebergQty': '0.00000000', 'time':
1592889061852, 'updateTime': 1592889136305, 'isWorking': True,
'origQuoteOrderQty': '0.00000000'}, 'id': '2539601261',
'clientOrderId': 'testme-15928890617592764', 'timestamp':
1592889061852, 'datetime': '2020-06-23T05:11:01.852Z',
'lastTradeTimestamp': None, 'symbol': 'BTC/USDT', 'type': 'limit',
'side': 'buy', 'price': 9633.0, 'amount': 0.0015, 'cost': 14.4495,
'average': 9633.0, 'filled': 0.0015, 'remaining': 0.0, 'status':
'closed', 'fee': None, 'trades': None}]
I paired it back by executing the following :
info_closed = []
for index,item in enumerate( orders_closed ):
info_closed.append( item['info'] )
The results of what I had is listed above in the first post.
I then ran:
df = pd.DataFrame( final_output, columns = [ 'type', 'origQty', 'executedQty' ] )
I am starting to wonder if there is something amiss with the dataframe ... will start looking at this area ...
try this, before groupby cast the values to float.
df[['origQty', 'executedQty']] = df[['origQty', 'executedQty']].astype(float)
(
df.groupby(['type'])
.agg({"origQty": sum, "executedQty": sum, "type": len})
.rename(columns={'type': 'count'})
.reset_index()
)
I am 99% sure you get the result you want by just doing this:
df.groupby(['type'])[['origQty', 'executedQty']].sum()
My Code:
import requests
import json
web_page = requests.get("http://api.bart.gov/api/etd.aspx?cmd=etd&orig=mont&key=MW9S-E7SL-26DU-VV8V&json=y")
response = web_page.text
parsed_json = json.loads(response)
#print(parsed_json)
print(parsed_json['root']['date'])
print(parsed_json['root']['time'])
print(parsed_json['root']['station']['name'])
How to extract value of destination and minutes from below in Python.
[{'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'WHITE', 'hexcolor': '#ffffff', 'bikeflag': '1', 'delay': '220'}]}, {'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '16', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '132'}, {'minutes': '26', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '69'}]}]}]
Try this:
json_obj = {'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Antioch', 'abbreviation': 'ANTC', 'limited': '0', 'estimate': [{'minutes': '1', 'platform': '2', 'direction': 'North', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '254'}]},
{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]},
{'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '38', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '0'}]}]}
for item in json_obj['etd']:
dest = item['destination']
minute = item['estimate'][0]['minutes']
print(dest, minute)
Output:
Antioch 1
Daly City 39
SF Airport 38
The problem is in parsed_json['root']['station']['name']. parsed_json['root']['station'] is a list, not a dict, so it doesn't have name key. You need to use index 0 or iterate over it
for station in parsed_json['root']['station']:
for etd in station['etd']:
for estimate in etd['estimate']:
print(etd['destination'], estimate['minutes'])
Output
Daly City 35
SF Airport 16
SF Airport 26
Try this to get json data:
import json
# some JSON:
json_data= {'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]}
# parse json_data:
data = json.dumps(json_data)
extract_json = json.loads(data)
print("Destination: "+extract_json["destination"])
print("Minutes: "+extract_json["estimate"][0]["minutes"])
Output:
Destination: Daly City
Minutes: 39
Assuming the data is in d_MONT:
d_MONT = {'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Antioch', 'abbreviation': 'ANTC', 'limited': '0', 'estimate': [{'minutes': '1', 'platform': '2', 'direction': 'North', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '254'}]},
{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]},
{'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '38', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '0'}]}]}
This will find the next train to destinationRequired:
destinationList = d_MONT['etd']
destinationRequired = 'Daly City'
for destinationDict in destinationList:
if destinationDict['destination'] == destinationRequired:
earliest = None
for estimate in destinationDict['estimate']:
if earliest is None or estimate['minutes'] < eariest:
earliest = estimate['minutes']
print("Next train to {0}: {1} minutes".format(destinationRequired, earliest))
break
else:
print("No trains to {0}".format(destinationRequired))
Note there are more Pythonic ways to do this, and the code example above does not follow PEP8, but I think it is important you understand the basic logic of how to do what you want rather than a complex Python one-liner.
You do not document the JSON object format, so I don't think it is safe to assume the list of trains to destination will be in order, therefore the safest is to step through each one and find the earliest. It isn't even clear if more than one train will ever be returned in the list, in which case a simple [0] would be sufficient rather than stepping through each one.
I am new to Python, trying to get a list of all the drop down values from the following website "https://www.sfma.org.sg/member/category" but failing to do so.
The below code is producing an empty list
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import pandas as pd
page = "https://www.sfma.org.sg/member/category"
information = requests.get(page)
soup = BeautifulSoup(information.content, 'html.parser')
categories = soup.find_all('select', attrs={'class' :'w3-select w3-border'})
The desired output is the below list :-
['Alcoholic Beverage','Beer','Bottled
Beverage',..........,'Trader','Wholesaler']
Thanks !!
The options are loaded through Javascript, but the data is on the page. With some crude regexes you can extract it:
import re
import json
import requests
url = 'https://www.sfma.org.sg/member/category/'
text = requests.get(url).text
d = re.findall(r'var\s*cObject\s*=\s*(.*)\s*;', text)[0]
d = re.sub(r'(\w+)(?=:)', r'"\1"', d)
d = json.loads(d.replace("'", '"'))
from pprint import pprint
pprint(d, width=200)
Prints:
{'category': [{'cat_type': '1', 'id': '1', 'name': 'Alcoholic Beverage', 'permalink': 'alcoholic-beverage', 'status': '2'},
{'cat_type': '1', 'id': '2', 'name': 'Beer', 'permalink': 'beer', 'status': '2'},
{'cat_type': '1', 'id': '3', 'name': 'Bottled Beverage', 'permalink': 'bottled-beverage', 'status': '2'},
{'cat_type': '1', 'id': '4', 'name': 'Canned Beverage', 'permalink': 'canned-beverage', 'status': '2'},
{'cat_type': '1', 'id': '5', 'name': 'Carbonated Beverage', 'permalink': 'carbonated-beverage', 'status': '2'},
{'cat_type': '1', 'id': '6', 'name': 'Cereal / Grain Beverage', 'permalink': 'cereal-grain-beverage', 'status': '2'},
{'cat_type': '1', 'id': '7', 'name': 'Cider', 'permalink': 'cider', 'status': '2'},
{'cat_type': '1', 'id': '8', 'name': 'Coffee', 'permalink': 'coffee', 'status': '2'},
{'cat_type': '1', 'id': '9', 'name': 'Distilled Water', 'permalink': 'distilled-water', 'status': '2'},
{'cat_type': '1', 'id': '10', 'name': 'Fruit / Vegetable Juice', 'permalink': 'fruit-vegetable-juice', 'status': '2'},
{'cat_type': '1', 'id': '11', 'name': 'Herbal Beverage', 'permalink': 'herbal-beverage', 'status': '2'},
{'cat_type': '1', 'id': '12', 'name': 'Instant Beverage', 'permalink': 'instant-beverage', 'status': '2'},
{'cat_type': '1', 'id': '13', 'name': 'Milk', 'permalink': 'milk', 'status': '2'},
{'cat_type': '1', 'id': '14', 'name': 'Mineral Water', 'permalink': 'mineral-water', 'status': '2'},
...and so on.
EDIT: To print just names of categories, you can do this:
for c in d['category']:
print(c['name'])
Prints:
Alcoholic Beverage
Beer
Bottled Beverage
Canned Beverage
Carbonated Beverage
Cereal / Grain Beverage
Cider
...
Manufacturer
Restaurant
Retail Outlet
Supplier
Trader
Wholesaler
This is not really a proper question but still.
categories = soup.find("select", attrs={"name": "ctype"}).find_all('option')
result = [cat.get_text() for cat in categories]