Web scraping using Beautifulsoup to collect dropdown values - python

I am new to Python, trying to get a list of all the drop down values from the following website "https://www.sfma.org.sg/member/category" but failing to do so.
The below code is producing an empty list
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import pandas as pd
page = "https://www.sfma.org.sg/member/category"
information = requests.get(page)
soup = BeautifulSoup(information.content, 'html.parser')
categories = soup.find_all('select', attrs={'class' :'w3-select w3-border'})
The desired output is the below list :-
['Alcoholic Beverage','Beer','Bottled
Beverage',..........,'Trader','Wholesaler']
Thanks !!

The options are loaded through Javascript, but the data is on the page. With some crude regexes you can extract it:
import re
import json
import requests
url = 'https://www.sfma.org.sg/member/category/'
text = requests.get(url).text
d = re.findall(r'var\s*cObject\s*=\s*(.*)\s*;', text)[0]
d = re.sub(r'(\w+)(?=:)', r'"\1"', d)
d = json.loads(d.replace("'", '"'))
from pprint import pprint
pprint(d, width=200)
Prints:
{'category': [{'cat_type': '1', 'id': '1', 'name': 'Alcoholic Beverage', 'permalink': 'alcoholic-beverage', 'status': '2'},
{'cat_type': '1', 'id': '2', 'name': 'Beer', 'permalink': 'beer', 'status': '2'},
{'cat_type': '1', 'id': '3', 'name': 'Bottled Beverage', 'permalink': 'bottled-beverage', 'status': '2'},
{'cat_type': '1', 'id': '4', 'name': 'Canned Beverage', 'permalink': 'canned-beverage', 'status': '2'},
{'cat_type': '1', 'id': '5', 'name': 'Carbonated Beverage', 'permalink': 'carbonated-beverage', 'status': '2'},
{'cat_type': '1', 'id': '6', 'name': 'Cereal / Grain Beverage', 'permalink': 'cereal-grain-beverage', 'status': '2'},
{'cat_type': '1', 'id': '7', 'name': 'Cider', 'permalink': 'cider', 'status': '2'},
{'cat_type': '1', 'id': '8', 'name': 'Coffee', 'permalink': 'coffee', 'status': '2'},
{'cat_type': '1', 'id': '9', 'name': 'Distilled Water', 'permalink': 'distilled-water', 'status': '2'},
{'cat_type': '1', 'id': '10', 'name': 'Fruit / Vegetable Juice', 'permalink': 'fruit-vegetable-juice', 'status': '2'},
{'cat_type': '1', 'id': '11', 'name': 'Herbal Beverage', 'permalink': 'herbal-beverage', 'status': '2'},
{'cat_type': '1', 'id': '12', 'name': 'Instant Beverage', 'permalink': 'instant-beverage', 'status': '2'},
{'cat_type': '1', 'id': '13', 'name': 'Milk', 'permalink': 'milk', 'status': '2'},
{'cat_type': '1', 'id': '14', 'name': 'Mineral Water', 'permalink': 'mineral-water', 'status': '2'},
...and so on.
EDIT: To print just names of categories, you can do this:
for c in d['category']:
print(c['name'])
Prints:
Alcoholic Beverage
Beer
Bottled Beverage
Canned Beverage
Carbonated Beverage
Cereal / Grain Beverage
Cider
...
Manufacturer
Restaurant
Retail Outlet
Supplier
Trader
Wholesaler

This is not really a proper question but still.
categories = soup.find("select", attrs={"name": "ctype"}).find_all('option')
result = [cat.get_text() for cat in categories]

Related

Scrape values from json python requests

So I am building a scraper for sizes on a site and I am confused how to extract the "EUR" and "pieces" from this json.... I want to print later all sizes like "EU 41 = Pieces 6". probably I need a for loop
Here ist the output of the json : "{'translations': {'en': {'lang': 'en', 'title': 'Nike Dunk Low Retro Premium', 'subtitle': 'Black / Pure Platinum-Anthracite', 'slug': 'nike-dunk-low-retro-premium', 'description': 'DH7913-001'}}, 'id': 'vpEW0nkBHBhvh4GFDXSb', 'prices': {'EUR': {'currency': 'EUR', 'value': 119}}, 'sizeSets': {'Men': {'name': 'Men', 'sizes': [{'id': '685d200c-c470-11eb-b5ee-a66da43170c1', 'us': '8', 'eur': '41', 'uk': '7', 'cm': '26', 'ean': '194955875308', 'pieces': 6}, {'id': '685d21c4-c470-11eb-9f9f-a66da43170c1', 'us': '8.5', 'eur': '42', 'uk': '7.5', 'cm': '26.5', 'ean': '194955875315', 'pieces': 18}, {'id': '685d232c-c470-11eb-8bda-a66da43170c1', 'us': '9', 'eur': '42.5', 'uk': '8', 'cm': '27', 'ean': '194955875322', 'pieces': 10}, {'id': '685d248a-c470-11eb-bf78-a66da43170c1', 'us': '9.5', 'eur': '43', 'uk': '8.5', 'cm': '27.5', 'ean': '194955875339', 'pieces': 17}, {'id': '685d25de-c470-11eb-8741-a66da43170c1', 'us': '10', 'eur': '44', 'uk': '9', 'cm': '28', 'ean': '194955875346', 'pieces': 15}, {'id': '685d2732-c470-11eb-bfb5-a66da43170c1', 'us': '10.5', 'eur': '44.5', 'uk': '9.5', 'cm': '28.5', 'ean': '194955875353', 'pieces': 5}, {'id': '685d2886-c470-11eb-ac68-a66da43170c1', 'us': '11', 'eur': '45', 'uk': '10', 'cm': '29', 'ean': '194955875360', 'pieces': 1}, {'id': '685d29e4-c470-11eb-8578-a66da43170c1', 'us': '11.5', 'eur': '45.5', 'uk': '10.5', 'cm': '29.5', 'ean': '194955875377', 'pieces': 2}, {'id': '685d2b38-c470-11eb-a729-a66da43170c1', 'us': '12', 'eur': '46', 'uk': '11', 'cm': '30', 'ean': '194955875384', 'pieces': 3}]}, 'Unisex': {'name': 'Unisex', 'sizes': []}, 'Women': {'name': 'Women', 'sizes': []}, 'Kids': {'name': 'Kids', 'sizes': []}}, 'images': ['0/08/083/0837c383a3212d52f2e4455e0d876f47.jpeg', 'c/ca/ca0/ca01c2ca1dfb35013a06723b60c062cc.jpeg', '8/8e/8e9/8e9d04f6d1e8712da6d85c3db98ff989.jpeg', '3/37/376/3769e3f56186e46b91c725d09dff3252.jpeg', 'a/aa/aa5/aa5a8934a05be2badfe9cff5e07f122c.jpeg', '7/7b/7b0/7b088912b94bb0b2e41d527d573d568d.jpeg', 'b/b8/b8b/b8b214b6e1a33d56880e412b7ef8fe01.jpeg', '5/56/562/562809e497cc98b69cf8789e3238e482.jpeg'], 'imagesPortrait': ['a/ab/abc/abc1eac4bcdf74bd899f8e2f7827f30c.jpeg'], 'createdAt': '2021-06-03T13:34:22+00:00', 'publishAt': '2021-06-10T10:00:00+00:00', 'openRegistrationAt': '2021-06-10T10:00:00+00:00', 'closeRegistrationAt': '2021-06-18T23:00:00+00:00', 'finished': True, 'headliner': False, 'code': 'DH7913-001', 'footshopLink': 'https://www.footshop.eu/en/723-limited-edition/orderby-activated_at/orderway-desc', 'soldout': False, 'deleted': False, 'limitedShipping': True, 'delayedExport': False, 'productIdentifier': '115147', 'status': 'Closed',
'resultAt': '2021-06-19T03:00:00+00:00'}"
from os import error
import requests
from bs4 import BeautifulSoup
from discord_webhook import DiscordWebhook,DiscordEmbed
import time
import json
URL= "https://releases.footshop.com/api/raffles/vpEW0nkBHBhvh4GFDXSb"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36"}
page = requests.get(URL,headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
site_info= page.json()
print(site_info)
For the Json, I would recommend you first put it in a Json viewer/reader you can find online to see how is the data is more clearly and where is the information you want to get.
Here something like that should get you the information you want :
for s in site_info['sizeSets']['Men']['sizes']:
print(s['eur']+' '+ str(s['pieces']))

Search for dictionary key, values that match in nested dictionary, wrapped in a list

consider this list:
d1 = [{'data': {'id': '1', 'label': 'ID #1', 'expanded': True}, 'classes': 'genesis'}, {'data': {'id': '2', 'label': 'ID #2', 'expanded': True}, 'classes': 'followerNode'}, {'data': {'id': '3', 'label': 'ID #3'}, 'classes': 'followerNode'}, {'data': {'id': '4', 'label': 'ID #4'}, 'classes': 'followerNode'}, {'data': {'id': '5', 'label': 'ID #5'}, 'classes': 'followerNode'}, {'data': {'id': '6', 'label': 'ID #6'}, 'classes': 'followerNode'}, {'data': {'id': '7', 'label': 'ID #7'}, 'classes': 'followerNode'}, {'data': {'id': '21', 'source': '2', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '31', 'source': '3', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '41', 'source': '4', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '51', 'source': '5', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '61', 'source': '6', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '71', 'source': '7', 'target': '1'}, 'classes': 'followerEdge'}, {'data': {'id': '8', 'label': 'ID #8'}, 'classes': 'followerNode'}, {'data': {'id': '13', 'label': 'ID #13'}, 'classes': 'followerNode'}, {'data': {'id': '14', 'label': 'ID #14'}, 'classes': 'followerNode'}, {'data': {'id': '15', 'label': 'ID #15'}, 'classes': 'followerNode'}, {'data': {'id': '18', 'label': 'ID #18'}, 'classes': 'followerNode'}, {'data': {'id': '19', 'label': 'ID #19'}, 'classes': 'followerNode'}, {'data': {'id': '20', 'label': 'ID #20'}, 'classes': 'followerNode'}, {'data': {'id': '82', 'source': '8', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '132', 'source': '13', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '142', 'source': '14', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '152', 'source': '15', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '182', 'source': '18', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '192', 'source': '19', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '202', 'source': '20', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '8', 'label': 'ID #8'}, 'classes': 'followerNode'}, {'data': {'id': '13', 'label': 'ID #13'}, 'classes': 'followerNode'}, {'data': {'id': '14', 'label': 'ID #14'}, 'classes': 'followerNode'}, {'data': {'id': '15', 'label': 'ID #15'}, 'classes': 'followerNode'}, {'data': {'id': '18', 'label': 'ID #18'}, 'classes': 'followerNode'}, {'data': {'id': '19', 'label': 'ID #19'}, 'classes': 'followerNode'}, {'data': {'id': '20', 'label': 'ID #20'}, 'classes': 'followerNode'}, {'data': {'id': '82', 'source': '8', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '132', 'source': '13', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '142', 'source': '14', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '152', 'source': '15', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '182', 'source': '18', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '192', 'source': '19', 'target': '2'}, 'classes': 'followerEdge'}, {'data': {'id': '202', 'source': '20', 'target': '2'}, 'classes': 'followerEdge'}]
how do I search for say "1" when there are other key types (like 'source' or 'target'):
I want to remove the sub dict which matches any of my search criteria... for instance id with val of 1... it should remove this:
{'data': {'id': '1', 'label': 'ID #1', 'expanded': True}, 'classes': 'genesis'}
Here's what I tried:
[{k:v} for lst in d1 for k, v in lst.items() if (("1" not in (v.get('source', None), (v!=None))) and
("1" not in (v.get('target', None), (v!=None))) and
("1" not in (v.get('id', None), (v!=None))))]
but I'm getting
AttributeError: 'str' object has no attribute 'get'
Each item in your list had dictionary with two key-value pairs, the first had a dictionary as the value, the second a string. When you looped through for k, v in lst.items() on each item in the list, it would work find on the first key-value pair, but for the second key-value pair it would see a string as the value, not a dictionary which is what caused the issue.
I think this should do the trick:
[lst for lst in d1 if "1" not in lst["data"].get('source', "None")
and "1" not in lst["data"].get('target', "None")
and "1" not in lst["data"].get('id', "None")]
or
for lst in d1:
if "1" not in lst["data"].get('source', "None")\
and "1" not in lst["data"].get('target', "None")\
and "1" not in lst["data"].get('id', "None"):
lst
In your code the "for k,v in lst.items()" gives k,v as 'data','classes' hence it gives the attribute error.
To get dictionary with required Id:
ans = []
ID = '1'
for k in d1:
if k['data']['id'] == ID:
ans.append(k)

Extract value in Python

My Code:
import requests
import json
web_page = requests.get("http://api.bart.gov/api/etd.aspx?cmd=etd&orig=mont&key=MW9S-E7SL-26DU-VV8V&json=y")
response = web_page.text
parsed_json = json.loads(response)
#print(parsed_json)
print(parsed_json['root']['date'])
print(parsed_json['root']['time'])
print(parsed_json['root']['station']['name'])
How to extract value of destination and minutes from below in Python.
[{'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'WHITE', 'hexcolor': '#ffffff', 'bikeflag': '1', 'delay': '220'}]}, {'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '16', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '132'}, {'minutes': '26', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '69'}]}]}]
Try this:
json_obj = {'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Antioch', 'abbreviation': 'ANTC', 'limited': '0', 'estimate': [{'minutes': '1', 'platform': '2', 'direction': 'North', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '254'}]},
{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]},
{'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '38', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '0'}]}]}
for item in json_obj['etd']:
dest = item['destination']
minute = item['estimate'][0]['minutes']
print(dest, minute)
Output:
Antioch 1
Daly City 39
SF Airport 38
The problem is in parsed_json['root']['station']['name']. parsed_json['root']['station'] is a list, not a dict, so it doesn't have name key. You need to use index 0 or iterate over it
for station in parsed_json['root']['station']:
for etd in station['etd']:
for estimate in etd['estimate']:
print(etd['destination'], estimate['minutes'])
Output
Daly City 35
SF Airport 16
SF Airport 26
Try this to get json data:
import json
# some JSON:
json_data= {'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]}
# parse json_data:
data = json.dumps(json_data)
extract_json = json.loads(data)
print("Destination: "+extract_json["destination"])
print("Minutes: "+extract_json["estimate"][0]["minutes"])
Output:
Destination: Daly City
Minutes: 39
Assuming the data is in d_MONT:
d_MONT = {'name': 'Montgomery St.', 'abbr': 'MONT', 'etd': [{'destination': 'Antioch', 'abbreviation': 'ANTC', 'limited': '0', 'estimate': [{'minutes': '1', 'platform': '2', 'direction': 'North', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '254'}]},
{'destination': 'Daly City', 'abbreviation': 'DALY', 'limited': '0', 'estimate': [{'minutes': '39', 'platform': '1', 'direction': 'South', 'length': '0', 'color': 'BLUE', 'hexcolor': '#0099cc', 'bikeflag': '1', 'delay': '0'}]},
{'destination': 'SF Airport', 'abbreviation': 'SFIA', 'limited': '0', 'estimate': [{'minutes': '38', 'platform': '1', 'direction': 'South', 'length': '10', 'color': 'YELLOW', 'hexcolor': '#ffff33', 'bikeflag': '1', 'delay': '0'}]}]}
This will find the next train to destinationRequired:
destinationList = d_MONT['etd']
destinationRequired = 'Daly City'
for destinationDict in destinationList:
if destinationDict['destination'] == destinationRequired:
earliest = None
for estimate in destinationDict['estimate']:
if earliest is None or estimate['minutes'] < eariest:
earliest = estimate['minutes']
print("Next train to {0}: {1} minutes".format(destinationRequired, earliest))
break
else:
print("No trains to {0}".format(destinationRequired))
Note there are more Pythonic ways to do this, and the code example above does not follow PEP8, but I think it is important you understand the basic logic of how to do what you want rather than a complex Python one-liner.
You do not document the JSON object format, so I don't think it is safe to assume the list of trains to destination will be in order, therefore the safest is to step through each one and find the earliest. It isn't even clear if more than one train will ever be returned in the list, in which case a simple [0] would be sufficient rather than stepping through each one.

How to sort list of dictrionaries in the right way Python

I have list as follows:
data = [
{'items': [
{'key': u'3', 'id': 1, 'name': u'Typeplaatje'},
{'key': u'2', 'id': 2, 'name': u'Aanduiding van het chassisnummer '},
{'key': u'1', 'id': 3, 'name': u'Kilometerteller: Kilometerstand '},
{'key': u'5', 'id': 4, 'name': u'Inschrijvingsbewijs '},
{'key': u'4', 'id': 5, 'name': u'COC of gelijkvormigheidsattest '}
], 'id': 2, 'key': u'B', 'name': u'Onderdelen'},
{'items': [
{'key': u'10', 'id': 10, 'name': u'Koppeling'},
{'key': u'7', 'id': 11, 'name': u'Differentieel '},
{'key': u'9', 'id': 12, 'name': u'Cardanhoezen '},
{'key': u'8', 'id': 13, 'name': u'Uitlaat '},
{'key': u'6', 'id': 15, 'name': u'Batterij'}
], 'id': 2, 'key': u'B', 'name': u'Onderdelen'}
]
And I want to sort items by key.
Thus the wanted result is as follows:
res = [
{'items': [
{'key': u'1', 'id': 3, 'name': u'Kilometerteller: Kilometerstand '},
{'key': u'2', 'id': 2, 'name': u'Aanduiding van het chassisnummer '},
{'key': u'3', 'id': 1, 'name': u'Typeplaatje'},
{'key': u'4', 'id': 5, 'name': u'COC of gelijkvormigheidsattest '},
{'key': u'5', 'id': 4, 'name': u'Inschrijvingsbewijs '},
], 'id': 2, 'key': u'B', 'name': u'Onderdelen'},
{'items': [
{'key': u'6', 'id': 15, 'name': u'Batterij'},
{'key': u'7', 'id': 11, 'name': u'Differentieel '},
{'key': u'8', 'id': 13, 'name': u'Uitlaat '},
{'key': u'9', 'id': 12, 'name': u'Cardanhoezen '},
{'key': u'10', 'id': 10, 'name': u'Koppeling'}
], 'id': 2, 'key': u'B', 'name': u'Onderdelen'}
]
I've tried as follows:
res = []
for item in data:
new_data = {
'id': item['id'],
'key': item['key'],
'name': item['name'],
'items': sorted(item['items'], key=lambda k : k['key'])
}
res.append(new_data)
print(res)
The first is sorted fine, but the second one not.
What am I doing wrong and is there a better way of doing it?
Your sort is wrong in the second case because the keys are strings, and strings are sorted by their first character which is '1' if your key is '10'. A slight modification to your sorting function would do the trick:
'items': sorted(item['items'], key=lambda k : int(k['key'])
I'm doing an int because you want to sort them as if they are numbers. Here it is in your code:
res = []
for item in data:
new_data = {
'id': item['id'],
'key': item['key'],
'name': item['name'],
'items': sorted(item['items'], key=lambda k : int(k['key']) )
}
res.append(new_data)
print(res)
And here's the result:
[{'id': 2,
'items': [{'id': 3, 'key': '1', 'name': 'Kilometerteller: Kilometerstand '},
{'id': 2, 'key': '2', 'name': 'Aanduiding van het chassisnummer '},
{'id': 1, 'key': '3', 'name': 'Typeplaatje'},
{'id': 5, 'key': '4', 'name': 'COC of gelijkvormigheidsattest '},
{'id': 4, 'key': '5', 'name': 'Inschrijvingsbewijs '}],
'key': 'B',
'name': 'Onderdelen'},
{'id': 2,
'items': [{'id': 15, 'key': '6', 'name': 'Batterij'},
{'id': 11, 'key': '7', 'name': 'Differentieel '},
{'id': 13, 'key': '8', 'name': 'Uitlaat '},
{'id': 12, 'key': '9', 'name': 'Cardanhoezen '},
{'id': 10, 'key': '10', 'name': 'Koppeling'}],
'key': 'B',
'name': 'Onderdelen'}]
You need to replace the old items in the data with the sorted items based on key numerically instead of string sort. So use int(item['key']) in sort like,
>>> data
[{'items': [{'key': '1', 'id': 3, 'name': 'Kilometerteller: Kilometerstand '}, {'key': '2', 'id': 2, 'name': 'Aanduiding van het chassisnummer '}, {'key': '3', 'id': 1, 'name': 'Typeplaatje'}, {'key': '4', 'id': 5, 'name': 'COC of gelijkvormigheidsattest '}, {'key': '5', 'id': 4, 'name': 'Inschrijvingsbewijs '}], 'id': 2, 'key': 'B', 'name': 'Onderdelen'}, {'items': [{'key': '6', 'id': 15, 'name': 'Batterij'}, {'key': '7', 'id': 11, 'name': 'Differentieel '}, {'key': '8', 'id': 13, 'name': 'Uitlaat '}, {'key': '9', 'id': 12, 'name': 'Cardanhoezen '}, {'key': '10', 'id': 10, 'name': 'Koppeling'}], 'id': 2, 'key': 'B', 'name': 'Onderdelen'}]
>>>
>>> for item in data:
... item['items'] = sorted(item['items'], key=lambda x: int(x['key']))
...
>>> import pprint
>>> pprint.pprint(data)
[{'id': 2,
'items': [{'id': 3, 'key': '1', 'name': 'Kilometerteller: Kilometerstand '},
{'id': 2, 'key': '2', 'name': 'Aanduiding van het chassisnummer '},
{'id': 1, 'key': '3', 'name': 'Typeplaatje'},
{'id': 5, 'key': '4', 'name': 'COC of gelijkvormigheidsattest '},
{'id': 4, 'key': '5', 'name': 'Inschrijvingsbewijs '}],
'key': 'B',
'name': 'Onderdelen'},
{'id': 2,
'items': [{'id': 15, 'key': '6', 'name': 'Batterij'},
{'id': 11, 'key': '7', 'name': 'Differentieel '},
{'id': 13, 'key': '8', 'name': 'Uitlaat '},
{'id': 12, 'key': '9', 'name': 'Cardanhoezen '},
{'id': 10, 'key': '10', 'name': 'Koppeling'}],
'key': 'B',
'name': 'Onderdelen'}]
So list comes with a handy method called sort which sorts itself inplace. I'd use that to your advantage:
for d in data:
d['items'].sort(key=lambda x: int(x['key']))
Results:
[{'id': 2,
'items': [{'id': 3, 'key': '1', 'name': 'Kilometerteller: Kilometerstand '},
{'id': 2, 'key': '2', 'name': 'Aanduiding van het chassisnummer '},
{'id': 1, 'key': '3', 'name': 'Typeplaatje'},
{'id': 5, 'key': '4', 'name': 'COC of gelijkvormigheidsattest '},
{'id': 4, 'key': '5', 'name': 'Inschrijvingsbewijs '}],
'key': 'B',
'name': 'Onderdelen'},
{'id': 2,
'items': [{'id': 15, 'key': '6', 'name': 'Batterij'},
{'id': 11, 'key': '7', 'name': 'Differentieel '},
{'id': 13, 'key': '8', 'name': 'Uitlaat '},
{'id': 12, 'key': '9', 'name': 'Cardanhoezen '},
{'id': 10, 'key': '10', 'name': 'Koppeling'}],
'key': 'B',
'name': 'Onderdelen'}]

How to make dictionary data in json format in python

I am new in python and tried to understand the working with dictionaries operations but stuck in between.
I have data like below:
[{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'}, {'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'}, {'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'}, {'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'}, {'mesure':'30', 'name': 'chennai', 'age': '25', 'class':'kg3'}, {'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'}, {'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'}{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'}, {'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}]
I want to convert it in format like:
[{"Name": "mumbai",
"data": [{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'},
{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'},
{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'}]}
{"Name": "hyd",
"data":[{'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},
{'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'},
{'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'}]}
{"Name": "pune",
"data":[{'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'},
{'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'},
{'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}]}]
I Tried:
def dir_data(data):
main_list = []
main_dir = []
for i in data:
names = i["name"]
main_dir.append({"name": names, "data": i})
print(main_dir)
if __name__== "__main__":
data = [{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'}, {'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'}, {'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'}, {'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'}, {'mesure':'30', 'name': 'chennai', 'age': '25', 'class':'kg3'}, {'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'}, {'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'}{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'}, {'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}]
dir_data(data)
I tried above code but couldn't get exact output so please guide me to get it....
Thank you
def dir_data(data):
items = []
names = []
for i in data:
if i['name'] not in names:
items.append({"Name": i['name'], "data": [i]})
names.append(i['name'])
else:
iname = names.index(i['name'])
items[iname]['data'].append(i)
return items
data = [{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'},
{'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},
{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'},
{'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'},
{'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'},
{'mesure':'30', 'name': 'chennai', 'age': '25', 'class':'kg3'},
{'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'},
{'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'},
{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'},
{'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}
]
print(dir_data(data))
Try that one.
I can see the code you have written seems to be working but, you haven't returning the function, comma missing in data and also there is some mistakes in the way of call the function.
Just call the function like this,
def dir_data(data):
main_list = []
main_dir = []
for i in data:
names = i["name"]
main_dir.append({"name": names, "data": i})
return(main_dir)
data = [{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'}, {'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'}, {'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'}, {'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'}, {'mesure':'30', 'name': 'chennai', 'age': '25', 'class':'kg3'}, {'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'}, {'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'}, {'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}]
dir_data(data)
You can get desired solution by using below code
test_data = [{'mesure': '10', 'name': 'mumbai', 'age': '15', 'class': 'kg1'}, {'mesure': '20', 'name': 'hyd', 'age': '20', 'class': 'kg2'}, {'mesure': '11', 'name': 'mumbai', 'age': '145', 'class': 'kg6'}, {'mesure': '21', 'name': 'hyd', 'age': '20', 'class': 'kg2'}, {'mesure': '40', 'name': 'pune', 'age': '30', 'class': 'kg4'}, {'mesure': '30', 'name': 'chennai', 'age': '25', 'class': 'kg3'}, {'mesure': '41', 'name': 'pune', 'age': '30', 'class': 'kg7'}, {'mesure': '22', 'name': 'hyd', 'age': '20', 'class': 'kg2'}, {'mesure': '12', 'name': 'mumbai', 'age': '40', 'class': 'kg7'}, {'mesure': '46', 'name': 'pune', 'age': '30', 'class': 'kg8'}]
dic = dict()
for i in test_data:
dic.setdefault(i['name'].title(),[]).append(i)
result = [{"name":k ,"data":v} for k,v in dic.items()]
Output
[{'data': [{'class': 'kg4', 'age': '30', 'name': 'pune', 'mesure': '40'},
{'class': 'kg7', 'age': '30', 'name': 'pune', 'mesure': '41'},
{'class': 'kg8', 'age': '30', 'name': 'pune', 'mesure': '46'}], 'name': 'Pune'},
{'data': [{'class': 'kg3', 'age': '25', 'name': 'chennai', 'mesure': '30'}], 'name': 'Chennai'},
{ 'data': [{'class': 'kg2', 'age': '20', 'name': 'hyd', 'mesure': '20'},
{'class': 'kg2', 'age': '20', 'name': 'hyd', 'mesure': '21'},
{'class': 'kg2', 'age': '20', 'name': 'hyd', 'mesure': '22'}], 'name': 'Hyd'},
{
'data': [{'class': 'kg1', 'age': '15', 'name': 'mumbai', 'mesure': '10'},
{'class': 'kg6', 'age': '145', 'name': 'mumbai', 'mesure': '11'},
{'class': 'kg7', 'age': '40', 'name': 'mumbai', 'mesure': '12'}], 'name': 'Mumbai'}]
Try this:
import json
data = [{'mesure':'10', 'name': 'mumbai', 'age': '15', 'class':'kg1'}, {'mesure':'20', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'11', 'name': 'mumbai', 'age': '145', 'class':'kg6'}, {'mesure':'21', 'name': 'hyd', 'age': '20', 'class':'kg2'}, {'mesure':'40', 'name': 'pune', 'age': '30', 'class':'kg4'}, {'mesure':'30', 'name': 'chennai', 'age': '25', 'class':'kg3'}, {'mesure':'41', 'name': 'pune', 'age': '30', 'class':'kg7'}, {'mesure':'22', 'name': 'hyd', 'age': '20', 'class':'kg2'},{'mesure':'12', 'name': 'mumbai', 'age': '40', 'class':'kg7'}, {'mesure':'46', 'name': 'pune', 'age': '30', 'class':'kg8'}]
def dir_data(data):
# set guarantees the uniqueness of each name
names = set([item['name'] for item in data])
main_dir = []
# collect the data for each name
for name in names:
name_data = [d for d in data if d['name']==name]
main_dir.append({"Name":name,"data":name_data})
return json.dumps(main_dir)
Below is the solution which will give you the exact result as described by you:
def checkKey(dict, key):
if key in dict:
return True
else:
return False
def dir_data(data):
for item in test:
if checkKey(tem_dict, item['name']):
tem_dict[item['name']].append(item)
else:
tem_dict[item['name']] = []
tem_dict[item['name']].append(item)
res_dict = {}
res = []
for item in tem_dict:
print item
res_dict['Name'] = item
res_dict['data'] = tem_dict[item]
res.append(res_dict)
res_dict = {}
return res
let me know if this works for you or not.

Categories

Resources