Python: Make duplicate keys unique json response - python

I am extracting data through POST method and let's say we have different post data in a list and for each request there will be a different post data like this:
cookies = {
"_sp_id.cf1a": "205e16a5-8970-4c92-97b8-969eebfcbb63.1647289721.7.1648545896.1648465133.73852927-e047-4c36-bae7-90c001509900",
"_sp_ses.cf1a": "*",
}
headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:98.0) Gecko/20100101 Firefox/98.0",
"Accept": "text/plain, */*; q=0.01",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate, br",
"Referer": "https://www.tradingview.com/",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Origin": "https://www.tradingview.com",
"Connection": "keep-alive",
# Requests sorts cookies= alphabetically
# 'Cookie': '_sp_id.cf1a=205e16a5-8970-4c92-97b8-969eebfcbb63.1647289721.7.1648545896.1648465133.73852927-e047-4c36-bae7-90c001509900; _sp_ses.cf1a=*',
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Sec-GPC": "1",
# Requests doesn't support trailers
# 'TE': 'trailers',
}
data = [
'{"filter":[{"left":"name","operation":"nempty"},{"left":"exchange","operation":"in_range","right":["AMEX","NASDAQ","NYSE"]},{"left":"High.All","operation":"eless","right":"high"},{"left":"is_primary","operation":"equal","right":true},{"left":"subtype","operation":"nequal","right":"preferred"}],"options":{"lang":"en","active_symbols_only":true},"markets":["america"],"symbols":{"query":{"types":[]},"tickers":[]},"columns":["logoid","name","close","change","change_abs","Recommend.All","volume","Value.Traded","market_cap_basic","price_earnings_ttm","earnings_per_share_basic_ttm","number_of_employees","sector","description","type","subtype","update_mode","pricescale","minmov","fractional","minmove2","currency","fundamental_currency_code"],"sort":{"sortBy":"name","sortOrder":"asc"},"range":[0,150]}',
'{"filter":[{"left":"name","operation":"nempty"},{"left":"exchange","operation":"in_range","right":["AMEX","NASDAQ","NYSE"]},{"left":"High.All","operation":"eless","right":"high"},{"left":"is_primary","operation":"equal","right":true},{"left":"subtype","operation":"nequal","right":"preferred"}],"options":{"lang":"en","active_symbols_only":true},"markets":["america"],"symbols":{"query":{"types":[]},"tickers":[]},"columns":["logoid","name","change|1","change|5","change|15","change|60","change|240","change","change|1W","change|1M","Perf.3M","Perf.6M","Perf.YTD","Perf.Y","beta_1_year","Volatility.D","description","type","subtype","update_mode","currency","fundamental_currency_code"],"sort":{"sortBy":"name","sortOrder":"asc"},"range":[0,150]}',
]
with httpx.Client() as client:
for d in data:
r = client.post(
"https://scanner.tradingview.com/america/scan",
headers=headers,
cookies=cookies,
data=d,
)
I store post data in a list and iterating over it and passing it in request object, so far so good. Post requests returns json object and problem begins here, every request returns different json object except keys, they are the same for e.g
'{"filter":[{"left":"name","operation":"nempty"},{"left":"exchange","operation":"in_range","right":["AMEX","NASDAQ","NYSE"]},{"left":"High.All","operation":"eless","right":"high"},{"left":"is_primary","operation":"equal","right":true},{"left":"subtype","operation":"nequal","right":"preferred"}],"options":{"lang":"en","active_symbols_only":true},"markets":["america"],"symbols":{"query":{"types":[]},"tickers":[]},"columns":["logoid","name","change|1","change|5","change|15","change|60","change|240","change","change|1W","change|1M","Perf.3M","Perf.6M","Perf.YTD","Perf.Y","beta_1_year","Volatility.D","description","type","subtype","update_mode","currency","fundamental_currency_code"],"sort":{"sortBy":"name","sortOrder":"asc"},"range":[0,150]}'
this post data returns this json:
{
"totalCount": 141,
"data": [
{
"s": "NYSE:ABBV",
"d": [
"abbvie",
"ABBV",
0,
0.13602918,
0.2662209,
0.37497288,
0.6400696,
0.39670241,
0.39670241,
9.60952832,
20.52236029,
48.81477398,
19.62333826,
51.75676942,
0.5128781,
1.56710337,
"AbbVie Inc.",
"stock",
"common",
"delayed_streaming_900",
"USD",
"USD"
]
}
]
}
and post data
{"filter":[{"left":"name","operation":"nempty"},{"left":"exchange","operation":"in_range","right":["AMEX","NASDAQ","NYSE"]},{"left":"High.All","operation":"eless","right":"high"},{"left":"is_primary","operation":"equal","right":true},{"left":"subtype","operation":"nequal","right":"preferred"}],"options":{"lang":"en","active_symbols_only":true},"markets":["america"],"symbols":{"query":{"types":[]},"tickers":[]},"columns":["logoid","name","close","change","change_abs","Recommend.All","volume","Value.Traded","market_cap_basic","price_earnings_ttm","earnings_per_share_basic_ttm","number_of_employees","sector","description","type","subtype","update_mode","pricescale","minmov","fractional","minmove2","currency","fundamental_currency_code"],"sort":{"sortBy":"name","sortOrder":"asc"},"range":[0,150]}'
returns this which is similar to the last one i.e. keys but values are different
{
"totalCount": 141,
"data": [
{
"s": "NYSE:ABBV",
"d": [
"abbvie",
"ABBV",
161.97,
0.39670241,
0.64,
0.42121212,
4516453,
731529892.41,
286085169370,
25.01356652,
6.4775,
50000,
"Health Technology",
"AbbVie Inc.",
"stock",
"common",
"delayed_streaming_900",
100,
1,
"false",
0,
"USD",
"USD"
]
}
]
}
Hence I am getting just one json object which is the last one in the loop instead of both.
Desired Output:
{
"overview": {
"totalCount": 141,
"data": [
{
"s": "NYSE:ABBV",
"d": [
"abbvie",
"ABBV",
161.97,
0.39670241,
0.64,
0.42121212,
4516453,
731529892.41,
286085169370,
25.01356652,
6.4775,
50000,
"Health Technology",
"AbbVie Inc.",
"stock",
"common",
"delayed_streaming_900",
100,
1,
"false",
0,
"USD",
"USD"
]
}
]
},
"performance": {
"totalCount": 141,
"data": [
{
"s": "NYSE:ABBV",
"d": [
"abbvie",
"ABBV",
0,
0.13602918,
0.2662209,
0.37497288,
0.6400696,
0.39670241,
0.39670241,
9.60952832,
20.52236029,
48.81477398,
19.62333826,
51.75676942,
0.5128781,
1.56710337,
"AbbVie Inc.",
"stock",
"common",
"delayed_streaming_900",
"USD",
"USD"
]
}
]
}
}
I am looking for some solution to handle this kind of duplication in json response and get data from different post data on each request by making the duplicate keys to unique.

Related

Web Scraping content-type:JSON

I am attempting to scrape location details from
here.
Using Beatifulsoup I got empty [ ]List as a result. The issue is the data which I want to scrape is not available in viewpagesource. In Developertool > network, content-type is JSON. So I have tried with below code:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
url = 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290'
s = requests.Session()
cookies = {
'locale': 'en-GB',
'_gcl_au': '1.1.79711829.1614933155',
'_ga': 'GA1.2.693390019.1614933178',
'__atssc': 'google^%^3B1',
'_gid': 'GA1.2.1213481278.1618077337',
'__atuvc': '1^%^7C10^%^2C0^%^7C11^%^2C9^%^7C12^%^2C14^%^7C13^%^2C28^%^7C14',
'__atuvs': '6071e67dc413e3d6001',
}
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '^\\^Google',
'tzname': 'Asia/Calcutta',
'sec-ch-ua-mobile': '?0',
'tz': 'GMT+05:30',
'Content-Type': 'application/json',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'User-Agent': '###MY USER AGENT HERE####',
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://ngc.taleo.net',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290',
'Accept-Language': 'en-US,en;q=0.9',
}
params = (
('lang', 'en_GB'),
('portal', '34140031600'),
)
data = '^{^\\^multilineEnabled^\\^:true,^\\^sortingSelection^\\^:^{^\\^sortBySelectionParam^\\^:^\\^3^\\^,^\\^ascendingSortingOrder^\\^:^\\^false^\\^^},^\\^fieldData^\\^:^{^\\^fields^\\^:^{^\\^KEYWORD^\\^:^\\^^\\^,^\\^LOCATION^\\^:^\\^756140022608^\\^,^\\^JOB_TITLE^\\^:^\\^^\\^^},^\\^valid^\\^:true^},^\\^filterSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^POSTING_DATE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_TYPE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SCHEDULE^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^advancedSearchFiltersSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^ORGANIZATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_NUMBER^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^URGENT_JOB^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^EMPLOYEE_STATUS^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^STUDY_LEVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^WILL_TRAVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SHIFT^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^pageNo^\\^:1^}'
response = s.post(url, headers=headers, cookies=cookies, data=data).json()
#res_json = json.loads(response)
#print(response.status_code)
But in response line I got an error as JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Would really appreciate any help on this!!
I am unfortunately currently limited to using only requests or other popular python libraries.
Thanks in advance..
You need to send JSON data, you can use json = data with the python requests module. You also need to format the data into a dictionary:
import requests
r = requests.post("https://ngc.taleo.net/careersection/rest/jobboard/searchjobs",
params={
"lang": "en_GB",
"location": "756140022608",
"radiusType": "K",
"searchExpanded": "true",
"radius": "1",
"portal": "34140031600"
},
headers={
"tzname": "Asia/Calcutta",
"tz": "GMT+05:30"
},
json={
"multilineEnabled": True,
"sortingSelection": {
"sortBySelectionParam": "3",
"ascendingSortingOrder": "false"
},
"fieldData": {
"fields": {
"KEYWORD": "",
"LOCATION": "756140022608",
"JOB_TITLE": ""
},
"valid": True
},
"filterSelectionParam": {
"searchFilterSelections": [{
"id": "POSTING_DATE",
"selectedValues": []
}, {
"id": "LOCATION",
"selectedValues": []
}, {
"id": "JOB_FIELD",
"selectedValues": []
}, {
"id": "JOB_TYPE",
"selectedValues": []
}, {
"id": "JOB_SCHEDULE",
"selectedValues": []
}]
},
"advancedSearchFiltersSelectionParam": {
"searchFilterSelections": [{
"id": "ORGANIZATION",
"selectedValues": []
}, {
"id": "LOCATION",
"selectedValues": []
}, {
"id": "JOB_FIELD",
"selectedValues": []
}, {
"id": "JOB_NUMBER",
"selectedValues": []
}, {
"id": "URGENT_JOB",
"selectedValues": []
}, {
"id": "EMPLOYEE_STATUS",
"selectedValues": []
}, {
"id": "STUDY_LEVEL",
"selectedValues": []
}, {
"id": "WILL_TRAVEL",
"selectedValues": []
}, {
"id": "JOB_SHIFT",
"selectedValues": []
}]},
"pageNo": 1
})
print(r.json())

Filtering JSON in python

I want to filter a json file where it only show me entries where content-type is application/json.
For now this is my code :
import json
with open('rob.json', 'r', encoding="utf8") as original_file:
data = json.load(original_file)
for line in data:
if line['value'] == 'application/json':
print(line)
The code I have written is very basic as I am quite a beginner when it comes to scripting. However it is not working and I have an error:
TypeError: string indices must be integers
I require some help on why I am having this error and whether there is a better alternative to filter a JSON file.
TIA
You have to understand the structure of the returned data. It is a dictionary containing one key ("log") that is also a dictionary. That dictionary contains an "entries" key which is a list. That list consists of dictionaries that have keys for "request" and "response". The "request" key has a "headers" key, which is a list of dictionaries containing "name" and "value" keys.
import json
with open('rob.json',encoding='utf8') as f:
data = json.load(f)
# Traverse the list of log entries:
for entry in data['log']['entries']:
# Traverse the list of headers:
for header in entry['response']['headers']:
# Look for the appropriate name and value.
if header['name'] == 'Content-Type' and header['value'] == 'application/json':
# I just print the request as the response is very long...
print(json.dumps(entry['request'],indent=2))
Output:
{
"method": "GET",
"url": "http://ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min.map",
"httpVersion": "HTTP/1.1",
"headers": [
{
"name": "Pragma",
"value": "no-cache"
},
{
"name": "Accept-Encoding",
"value": "gzip,deflate,sdch"
},
{
"name": "Host",
"value": "ajax.googleapis.com"
},
{
"name": "Accept-Language",
"value": "en-US,en;q=0.8"
},
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36"
},
{
"name": "Accept",
"value": "*/*"
},
{
"name": "Referer",
"value": "http://ericduran.github.io/chromeHAR/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Cache-Control",
"value": "no-cache"
}
],
"queryString": [],
"cookies": [],
"headersSize": 412,
"bodySize": 0
}
import json
with open('rob.json','r',encoding="utf8") as original_file:
data = json.load(original_file)
for entry in data["log"]["entries"]:
res = entry["response"]
for header in res["headers"]:
if "application/json" in header["value"]:
print(header)
I really don't know what you want to look for but pretty sure this code will print out the header which has value including "application/json".

Unable to send an application in the right way using post requests having multiple parameters

I'm trying to send an application after filling in a form available in a webpage using python. I've tried to mimic the process that I see in chrome dev tools but it seems I've gone somewhere wrong and that is the reason when I execute the following script I get this error:
{
"message":"415 Unsupported Media Type returned for /apply-app/rest/jobs/PIDFK026203F3VBQB79V77VIY-87592/submissions with message: ",
"key":"Exception_server_error",
"errorId":"d6b128bd-426d-4bee-8dbb-03e232829f5e"
}
It seems to me that I need to use the value of token and version in an automatic manner as they are different in every application but I don't find them in page source and stuff.
I've selected No as value for all the dropdowns (when there is any) within Additional Information.
Link to the application page
Link to the attachment that I've used thrice.
I've tried with:
import requests
main_link = "https://karriere.hsbc.de/stellenangebote/stellenboerse/apply?jobId=PIDFK026203F3VBQB79V77VIY-87592&langCode=de_DE"
post_link = "https://emea3.recruitmentplatform.com/apply-app/rest/jobs/PIDFK026203F3VBQB79V77VIY-87592/submissions"
payload = {
"candidateIdentity":{"firstName":"syed","lastName":"mushfiq","email":"mthmt80#gmail.com"},
"answeredDocuments":[{"documentType":"answeredForm","formId":"hsbc_bewerbungsprozess_pers_nliche_daten",
"answers":[
{"questionId":"form_of_address","type":"options","value":["form_of_address_m"]},
{"questionId":"academic_title","type":"simple","value":" Dr.","questionIds":[]},
{"questionId":"first_name","type":"simple","value":"syed","questionIds":[]},
{"questionId":"last_name","type":"simple","value":"mushfiq","questionIds":[]},
{"questionId":"e-mail_address","type":"simple","value":"mthmt80#gmail.com","questionIds":[]},
{"questionId":"phone__mobile_","type":"phone","countryCode":"+880","isoCountryCode":"BD","subscriberNumber":"1790128884"}]},
{"documentType":"answeredForm","formId":"hsbc_bewerbungsprozess_standard_fragebogen","answers":[{"questionId":"custom_question_450","type":"options","value":["custom_question_450_ja"]},
{"questionId":"custom_question_451","type":"options","value":["custom_question_451_nein"]},
{"questionId":"custom_question_452","type":"options","value":["custom_question_452_unter_keine_der_zuvor_genannten"]},
{"questionId":"custom_question_580","type":"options","value":["custom_question_580_nein_978"]},
{"questionId":"custom_question_637","type":"options","value":["custom_question_637_nein"]},
{"questionId":"custom_question_579","type":"options","value":["custom_question_579_nein"]},
{"questionId":"custom_question_583","type":"options","value":["custom_question_583_hsbc_deutschland_karriereseite"]}]},
#============The following three lines are supposed to help upload three files============
{"documentType":"attachment","attachmentId":"cover_letter","token":"2d178469-cdb5-4d65-9f67-1e7637896953","filename": open("demo.pdf","rb")},
{"documentType":"attachment","attachmentId":"attached_resume","token":"81a5a661-66bb-4918-a35c-ec260ffb7d02","filename": open("demo.pdf","rb")},
{"documentType":"attachment","attachmentId":"otherattachment","token":"4c3f7500-b072-48d4-83cf-0af1399bc8ba","filename": open("demo.pdf","rb")}],
#============The version's value should not be hardcoded=========================
"version":"V2:3:14dfac80702d099625d0274121b0dba68ac0fd96:861836b7d86adae8cc1ce69198b69b8ca59e2ed5","lastModifiedDate":1562056029000,"answeredDataPrivacyConsents":[{"identifier":"urn:lms:ta:tlk:data-privacy-consent:mtu531:101","consentProvided":True},
{"identifier":"urn:lms:ta:tlk:data-privacy-consent:mtu531:102","consentProvided":True}],
"metaInformation":{"applicationFormUrl":"https://karriere.hsbc.de/stellenangebote/stellenboerse/apply?jobId=PIDFK026203F3VBQB79V77VIY-87592&langCode=de_DE","jobsToLink":[]}
}
def send_application(s,link):
res = s.post(link,data=payload)
print(res.text)
if __name__ == '__main__':
with requests.Session() as s:
send_application(s,post_link)
How can I send the application in the right way?
PS I can send the application manually multiple times using the same documents to the same email.
The best way to go about something like this is to open the page in a browser and view the network tab in the developer tools. From there as you're filling out the form you'll be able to see that each time you attach a document it sends an ajax request and receives the token in a json response. With those tokens you can build the final payload which should be submitted in json format.
Here's some example code that's working:
import requests
headers = {
'Host': 'emea3.recruitmentplatform.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'apply-config-key': 'AAACEwAA-55cd88d4-c9fd-41ce-95a4-f238402b898f',
'Origin': 'https://karriere.hsbc.de',
'DNT': '1',
'Connection': 'close',
'Referer': 'https://karriere.hsbc.de/',
'Cookie': 'lumesse_language=de_DE'
}
main_link = "https://karriere.hsbc.de/stellenangebote/stellenboerse/apply?jobId=PIDFK026203F3VBQB79V77VIY-87592&langCode=de_DE"
post_link = "https://emea3.recruitmentplatform.com/apply-app/rest/jobs/PIDFK026203F3VBQB79V77VIY-87592/submissions"
ajax_link = "https://emea3.recruitmentplatform.com/apply-app/rest/jobs/PIDFK026203F3VBQB79V77VIY-87592/attachments"
def build_payload(cover_letter_token, attached_resume_token, otherattachment_token):
return {
"candidateIdentity": {
"firstName": "Syed",
"lastName": "Mushfiq",
"email": "mthmt80#gmail.com"
},
"answeredDocuments": [
{
"documentType": "answeredForm",
"formId": "hsbc_bewerbungsprozess_pers_nliche_daten",
"answers": [
{
"questionId": "form_of_address",
"type": "options",
"value": [
"form_of_address_m"
]
},
{
"questionId": "academic_title",
"type": "simple",
"value": "prof",
"questionIds": []
},
{
"questionId": "first_name",
"type": "simple",
"value": "Syed",
"questionIds": []
},
{
"questionId": "last_name",
"type": "simple",
"value": "Mushfiq",
"questionIds": []
},
{
"questionId": "e-mail_address",
"type": "simple",
"value": "mthmt80#gmail.com",
"questionIds": []
},
{
"questionId": "phone__mobile_",
"type": "phone",
"countryCode": "+49",
"isoCountryCode": "DE",
"subscriberNumber": "30 33850062"
}
]
},
{
"documentType": "answeredForm",
"formId": "hsbc_bewerbungsprozess_standard_fragebogen",
"answers": [
{
"questionId": "custom_question_450",
"type": "options",
"value": [
"custom_question_450_ja"
]
},
{
"questionId": "custom_question_451",
"type": "options",
"value": [
"custom_question_451_nein"
]
},
{
"questionId": "custom_question_452",
"type": "options",
"value": [
"custom_question_452_unter_keine_der_zuvor_genannten"
]
},
{
"questionId": "custom_question_580",
"type": "options",
"value": [
"custom_question_580_ja"
]
},
{
"questionId": "custom_question_637",
"type": "options",
"value": [
"custom_question_637_nein"
]
},
{
"questionId": "custom_question_579",
"type": "options",
"value": [
"custom_question_579_nein"
]
},
{
"questionId": "custom_question_583",
"type": "options",
"value": [
"custom_question_583_linkedin"
]
}
]
},
{
"documentType": "attachment",
"attachmentId": "cover_letter",
"token": cover_letter_token,
"filename": "demo.pdf"
},
{
"documentType": "attachment",
"attachmentId": "attached_resume",
"token": attached_resume_token,
"filename": "demo.pdf"
},
{
"documentType": "attachment",
"attachmentId": "otherattachment",
"token": otherattachment_token,
"filename": "demo.pdf"
}
],
"version": "V2:3:14dfac80702d099625d0274121b0dba68ac0fd96:861836b7d86adae8cc1ce69198b69b8ca59e2ed5",
"lastModifiedDate": "1562056029000",
"answeredDataPrivacyConsents": [
{
"identifier": "urn:lms:ta:tlk:data-privacy-consent:mtu531:101",
"consentProvided": "true"
},
{
"identifier": "urn:lms:ta:tlk:data-privacy-consent:mtu531:102",
"consentProvided": "true"
}
],
"metaInformation": {
"applicationFormUrl": "https://karriere.hsbc.de/stellenangebote/stellenboerse/apply?jobId=PIDFK026203F3VBQB79V77VIY-87592&langCode=de_DE",
"jobsToLink": []
}
}
def submit_attachment(s, link, f):
d = open(f, 'rb').read()
r = s.post(link, files={'file':('demo.pdf', d),'applicationProcessVersion':(None, 'V2:3:14dfac80702d099625d0274121b0dba68ac0fd96:861836b7d86adae8cc1ce69198b69b8ca59e2ed5')})
r_data = r.json()
return r_data.get('token')
def send_application(s,link,p):
res = s.post(link, json=p)
return res
if __name__ == '__main__':
attachment_list = ["cover_letter_token", "attached_resume_token", "otherattachment_token"]
token_dict = {}
with requests.Session() as s:
s.headers.update(headers)
for at in attachment_list:
rt = submit_attachment(s, ajax_link, "demo.pdf")
token_dict[at] = rt
payload = build_payload(token_dict['cover_letter_token'], token_dict['attached_resume_token'], token_dict['otherattachment_token'])
rd = send_application(s, post_link, payload)
print(rd.text)
print(rd.status_code)

Convert curl POST statement with JSON payload to Python request

I can run this curl statemtent with curl and works perfectly. I have read many post but nothing works.
curl -X POST "http://some.website.com" -H "accept: application/json" -H "authorization: Basic authcode" -H "Content-Type: application/json" -d "{ \"Fields\": [ \"string\" ], \"Filters\": [ { \"Field\": \"Item\", \"Operator\": \"=\", \"Value\": \"119001\" } ], \"PageSize\": 0, \"PageNumber\": 0}"
code tried so far
import requests
session = requests.Session()
url = 'http://some.website.com'
headers = {'accept': 'application/json', 'authorization': 'Basic authcode', 'Content-Type': 'application/json'}
data = {'Fields': 'string', 'Filters': { 'Field': 'Item', 'Operator': '=', 'Value': '119001' }, 'PageSize': 0, 'PageNumber': 0}
response = session.post(url, headers=headers, data=data)
print(response.status_code)
print(response.json())
Error = not valid JSON Value
I have also tried
import simplejson as json
# ...
# ...
response = session.post(url, headers=headers, data=json.dumps(data))
# ...
# ...
Failed = Error detecting JSON fields
I think it has something to do with the nested dict statement
Using https://httpbin.org/post I can see what data (headers and body) are received on server and I see the same result for curl
curl -X POST "http://httpbin.org/post" -H "accept: application/json" -H "authorization: Basic authcode" -H "Content-Type: application/json" -d "{\"Fields\": [\"string\"], \"Filters\": [{\"Field\": \"Item\", \"Operator\": \"=\", \"Value\": \"119001\"}], \"PageSize\": 0, \"PageNumber\": 0}"
# result
{
"args": {},
"data": "{\"Fields\": [\"string\"], \"Filters\": [{\"Field\": \"Item\", \"Operator\": \"=\", \"Value\": \"119001\"}], \"PageSize\": 0, \"PageNumber\": 0}",
"files": {},
"form": {},
"headers": {
"Accept": "application/json",
"Authorization": "Basic authcode",
"Content-Length": "122",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "curl/7.58.0"
},
"json": {
"Fields": [
"string"
],
"Filters": [
{
"Field": "Item",
"Operator": "=",
"Value": "119001"
}
],
"PageNumber": 0,
"PageSize": 0
},
"origin": "83.23.32.69, 83.23.32.69",
"url": "https://httpbin.org/post"
}
and Python (using json=data or data=json.dumps(data) instead of data=data)
import requests
headers = {
'Accept': 'application/json',
'Authorization': 'Basic authcode',
# 'Content-Type': 'application/json',
# 'User-Agent': 'Mozilla/5.0',
}
data = {
"Fields": [ "string" ],
"Filters": [ { "Field": "Item", "Operator": "=", "Value": "119001" } ],
"PageSize": 0,
"PageNumber": 0
}
response = requests.post('https://httpbin.org/post', headers=headers, json=data)
print(response.text)
# result
{
"args": {},
"data": "{\"Fields\": [\"string\"], \"Filters\": [{\"Field\": \"Item\", \"Operator\": \"=\", \"Value\": \"119001\"}], \"PageSize\": 0, \"PageNumber\": 0}",
"files": {},
"form": {},
"headers": {
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate",
"Authorization": "Basic authcode",
"Content-Length": "122",
"Content-Type": "application/json",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.22.0"
},
"json": {
"Fields": [
"string"
],
"Filters": [
{
"Field": "Item",
"Operator": "=",
"Value": "119001"
}
],
"PageNumber": 0,
"PageSize": 0
},
"origin": "83.23.32.69, 83.23.32.69",
"url": "https://httpbin.org/post"
}
There are only differences in headers: "User-Agent": "curl/7.58.0" and "User-Agent": "python-requests/2.22.0". And Python uses "Accept-Encoding": "gzip, deflate".
BTW: you can use portal curl.trillworks.com to convert curl to Python code

python requests drops "data" when use of "header" in request

I am working on REST API of a site that requires this request type when I want to upload a file:
'Authorization' and multi-part content type in header
File as binary string in form (body)
File Type in request URL
So I did this code:
import requests
url = 'http://httpbin.org/post'
parameters = {
'format': 'pdf',
}
headers = {
'Content-Type': 'multipart/form-data',
'Accept': 'application/json',
'Authorization' : 'Some authorization code'
}
data = {'file': open('1.pdf', 'rb')}
r = requests.post(url, params=parameters, headers=headers, data=data)
print(r.text)
BUT it seems to requests is dropping data :
{
"args": {
"format": "pdf"
},
"data": "",
"files": {},
"form": {},
"headers": {
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate",
"Authorization": "Some authorization code",
"Connection": "close",
"Content-Length": "30",
"Content-Type": "multipart/form-data",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.18.1"
},
"json": null,
"origin": "x.x.x.x",
"url": "http://httpbin.org/post?format=pdf"
}
it works when I remove 'headers' part in request:
r = requests.post(url, params=parameters, data=data)
Because response is :
{
"args": {
"format": "pdf"
},
"data": "",
"files": {},
"form": {
"fax_file": "some samplae texts\n"
},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Connection": "close",
"Content-Length": "30",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.18.1"
},
"json": null,
"origin": "x.x.x.x",
"url": "http://httpbin.org/post?format=pdf"
}
I have tried prepared request too and result is same.
You are trying to post file data, so use the files option:
r = requests.post(url, params=parameters, files=data, headers=headers)
You should really not set the Content-Type header, however; it is set for you when you use the files option. The header in this case includes the field boundary, to really want the library to take care of this for you:
headers = {
'Accept': 'application/json',
'Authorization' : 'Some authorization code'
}
If you leave the Content-Type header in, you’d have to generate the content body up front to be able to supply the required boundary info to the receiving server.
You could experiment with dropping the Accept header too; by default requests will add Accept: */* if you don't specify that header, signalling that anything is acceptable.
When using only the data argument, the parameters are encoding to application/x-www-form-urlencoded form, which doesn't support large file data, and your Content-Type header doesn't match the actual POST body content.
See Post a Multipart-Encoded File in the requests documentation and application/x-www-form-urlencoded or multipart/form-data? here on Stack Overflow.
Demo:
>>> import requests
>>> url = 'http://httpbin.org/post'
>>> parameters = {'format': 'pdf'}
>>> headers = {
... 'Accept': 'application/json',
... 'Authorization' : 'Some authorization code',
... }
>>> data = {'file': open('1.pdf', 'rb')}
>>> r = requests.post(url, params=parameters, files=data, headers=headers)
>>> print(r.text)
{
"args": {
"format": "pdf"
},
"data": "",
"files": {
"file": "<file data as base64>"
},
"form": {},
"headers": {
"Accept": "application/json",
"Accept-Encoding": "gzip, deflate",
"Authorization": "Some authorization code",
"Cache-Control": "max-age=0",
"Connection": "close",
"Content-Length": "374751",
"Content-Type": "multipart/form-data; boundary=d4b84f8bfd464e3f97e3de584d7315fc",
"Host": "httpbin.org",
"O2Gw-Id": "03",
"User-Agent": "python-requests/2.18.4",
"X-Gateway": "wap.london.02.net"
},
"json": null,
"origin": "10.120.6.78, 82.132.221.209",
"url": "http://httpbin.org/post?format=pdf"
}
Note the multipart/form-data; boundary=d4b84f8bfd464e3f97e3de584d7315fc value for the Content-Type header!

Categories

Resources