Related
Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 days ago.
Improve this question
I'm using an API from anomali to gather intel list and i wanna ask on how i could run the code so that it would output all the need columns header into an excel file.
So i created a code where i pull out the needed columns to be implemented to the site.
import requests
import json
import pandas as pd
import csv
url = 'https://api.threatstream.com/api/v2/intelligence/?itype=bot_ip'
csv_columns = ['ip','source_created', 'status', 'itype', 'expiration_ts', 'is_editable', 'feed_id', 'update_id',
'value', 'ispublic', 'threat_type', 'workgroups', 'rdns', 'confidence', 'uuid', 'retina_confidence',
'trusted_circle_ids', 'id', 'source', 'owner_organization_id', 'import_session_id', 'source_modified',
'type', 'sort', 'description', 'tags', 'threatscore', 'latitude', 'modified_ts', 'org', 'asn',
'created_ts', 'tlp', 'is_anonymous', 'country', 'source_reported_confidence', 'can_add_public_tags',
'subtype', 'meta', 'resource_uri']
with open("AnomaliThreat.csv","a", newline='') as filecsv:
writer = csv.DictWriter(filecsv, fieldnames=csv_columns)
writer.writeheader()
headers = {
'Accept': 'application/json',
'Authorization': 'apikey testing:wdwfawaf12321rfewawafa'
}
response= requests.get( url=url,headers=headers)
json_Data = json.loads(response.content)
result = json_Data["objects"]
with open("AnomaliThreat.csv","a", newline='')as filecsv:
writer = csv.DictWriter(filecsv,fieldnames=csv_columns)
writer.writerow(result)
If i ran this code, all i got is 'list' no attribute keys, my guess is because inside the response, there's a list inside the list or another string inside the list for example like this
'trusted_circle_ids': [1241412, 212141241]
or this
'tags': [{'id': 'fwafwff', 'name': 'wfwafwawf'},
{'id': '31231ewfw',
'name': 'fwafwafwafaw#gmail.com.wafawfawfds.com'}],
And this is what's inside the response of anomali
[{'source_created': None,
'status': 'inactive',
'itype': 'bot_ip',
'expiration_ts': '',
'ip': '231.24124.1241.412',
'is_editable': False,
'feed_id': 23112231,
'update_id': 231231,
'value': '124124124141224141',
'is_public': False,
'threat_type': 'bot',
'workgroups': [],
'rdns': None,
'confidence': 12,
'uuid': '3123414124124142',
'retina_confidence': 52414,
'trusted_circle_ids': [1241412, 212141241],
'id': fwaffewaewafw1231231,
'source': 'wfawfwaefwadfwa',
'owner_organization_id': 2,
'import_session_id': None,
'source_modified': None,
'type': 'ip',
'sort': [312312424124141241, '1241414214241'],
'description': None,
'tags': [{'id': 'fwafwff', 'name': 'wfwafwawf'},
{'id': '31231ewfw',
'name': 'fwafwafwafaw#gmail.com.wafawfawfds.com'}],
'threatscore': 412,
'latitude': wafefwaf,
'modified_ts': 'wawafwadfd',
'org': 'fawfwafawe',
'asn': 'fwafwa2131231',
'created_ts': '41241241241241',
'tlp': None,
'is_anonymous': False,
'country': 'fwafw',
'source_reported_confidence': 21,
'can_add_public_tags': False,
'longitude': --321412,
'subtype': None,
'meta': {'detail2': 'bi2141412412342424',
'severity': '3123124r3'},
'resource_uri': '/api/v2/intelligence/241fsdfsf241325/'},
{'source_created': None,
'status': 'inactive',
'itype': 'bot_ip',
'expiration_ts': '',
'ip': '231.24124.1241.412',
'is_editable': False,
'feed_id': 23112231,
'update_id': 231231,
'value': '124124124141224141',
'is_public': False,
'threat_type': 'bot',
'workgroups': [],
'rdns': None,
'confidence': 12,
'uuid': '3123414124124142',
'retina_confidence': 52414,
'trusted_circle_ids': [1241412, 212141241],
'id': fwaffewaewafw1231231,
'source': 'wfawfwaefwadfwa',
'owner_organization_id': 2,
'import_session_id': None,
'source_modified': None,
'type': 'ip',
'sort': [312312424124141241, '1241414214241'],
'description': None,
'tags': [{'id': 'fwafwff', 'name': 'wfwafwawf'},
{'id': '31231ewfw',
'name': 'fwafwafwafaw#gmail.com.wafawfawfds.com'}],
'threatscore': 412,
'latitude': wafefwaf,
'modified_ts': 'wawafwadfd',
'org': 'fawfwafawe',
'asn': 'fwafwa2131231',
'created_ts': '41241241241241',
'tlp': None,
'is_anonymous': False,
'country': 'fwafw',
'source_reported_confidence': 21,
'can_add_public_tags': False,
'longitude': --321412,
'subtype': None,
'meta': {'detail2': 'bi2141412412342424',
'severity': '3123124r3'},
'resource_uri': '/api/v2/intelligence/241fsdfsf241325/'}]
I'm open to any suggestions on how to make it so that the results can be inputed into an excel file
Problem Solved!
I needed to add a value to the code, so i added this line
csv_writer = csv.writer(data_file)
count = 0
for res in result:
if count == 0:
header = res.keys()
csv_writer.writerow(header)
count += 1
csv_writer.writerow(res.values())
data_file.close()
You can try doing something like this if i understood correctly,
import requests
import json
import pandas as pd
import csv
url = 'https://api.threatstream.com/api/v2/intelligence/?itype=bot_ip'
csv_columns = ['ip','source_created', 'status', 'itype', 'expiration_ts', 'is_editable', 'feed_id', 'update_id',
'value', 'ispublic', 'threat_type', 'workgroups', 'rdns', 'confidence', 'uuid', 'retina_confidence',
'trusted_circle_ids', 'id', 'source', 'owner_organization_id', 'import_session_id', 'source_modified',
'type', 'sort', 'description', 'tags', 'threatscore', 'latitude', 'modified_ts', 'org', 'asn',
'created_ts', 'tlp', 'is_anonymous', 'country', 'source_reported_confidence', 'can_add_public_tags',
'subtype', 'meta', 'resource_uri']
headers = {
'Accept': 'application/json',
'Authorization': 'apikey testing:wdwfawaf12321rfewawafa'
}
response= requests.get( url=url,headers=headers)
json_Data = json.loads(response.content)
result = json_Data["objects"]
dataframe_1 = pd.Dataframe
for key, value in result.items():
if key in csv_columns:
dataframe_1[key] = value
dataframe_1.to_csv("AnomaliThreat.csv")
something along those lines, so basically iterate through the key, value pairs with in the result, check if the key is in the csv_columns, save that key value pair, finally once all that is done just use the dataframe.to_csv
I'm sorry for my bad english
Hello, I am using a brokerage firm for payment instrument. The API connection is successful and I get the result. But I can't use the returned result information.
payment_card = {
'cardHolderName': kartisim,
'cardNumber': kartno,
'expireMonth': kartskt_ay,
'expireYear': '2030',
'cvc': karcvc,
'registerCard': '0'
}
buyer = {
'id': adres.id,
'name': adres.adres_uye.username,
'surname': 'Doe',
'gsmNumber': '+905350000000',
'email': adres.adres_uye.email,
'identityNumber': '74300864791',
'lastLoginDate': '2015-10-05 12:43:35',
'registrationDate': '2013-04-21 15:12:09',
'registrationAddress': adres.adres_detay,
'ip': '85.34.78.112',
'city': 'Istanbul',
'country': 'Turkey',
'zipCode': '34732'
}
address = {
'contactName': 'Jane Doe',
'city': 'Istanbul',
'country': 'Turkey',
'address': 'Nidakule Göztepe, Merdivenköy Mah. Bora Sok. No:1',
'zipCode': '34732'
}
basket_items = []
for bas in uye:
basa = {
'id': str(bas.id),
'name': str(bas.sepet_urun.urun_baslik),
'category1': str(bas.sepet_urun.urun_anakategori.anakategori_baslik),
'category2': str(bas.sepet_urun.urun_altkategori.altkategori_baslik),
'itemType': 'VIRTUAL',
'price': str(bas.sepet_fiyat)
}
basket_items.append(basa)
print(basket_items)
request_payload = {
'locale': 'tr',
'conversationId': '123456789',
'price': str(sepetf),
'paidPrice': str(sepetf),
'currency': 'TRY',
'installment': '1',
'basketId': str(sepetid),
'paymentChannel': 'WEB',
'paymentGroup': 'PRODUCT',
'paymentCard': payment_card,
'buyer': buyer,
'shippingAddress': address,
'billingAddress': address,
'basketItems': basket_items
}
payment = iyzipay.Payment().create(request_payload, options)
print(payment.read().decode('utf-8'))
return HttpResponse(payment["status"])
I cannot use the returned result information. The returned result is as follows
The error I get is as follows:
'HTTPResponse' object is not subscriptable
Looks like the issue is here.
return HttpResponse(payment["status"])
payment returns an HTTPResponse response object and you cannot directly index the status. Instead you should use .status attribute.
If your intention is to return JSON back as response you could use JsonResponse class from django.http module.
return JsonResponse(json.loads(payment.read().decode('utf-8')))
Here is the scirpt:
from bs4 import BeautifulSoup as bs4
import requests
import json
from lxml import html
from pprint import pprint
import re
def get_data():
url = 'https://sports.bovada.lv//baseball/mlb/game-lines-market-group'
r = requests.get(url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"})
html_bytes = r.text
soup = bs4(html_bytes, 'lxml')
# res = soup.findAll('script') # find all scripts..
pattern = re.compile(r"swc_market_lists\s+=\s+(\{.*?\})")
script = soup.find("script", text=pattern)
return script.text[23:]
test1 = get_data()
data = json.loads(test1)
for item1 in data['items']:
data1 = item1['itemList']['items']
for item2 in data1:
pitch_a = item2['opponentAName']
pitch_b = item2['opponentBName']
## group = item2['displayGroups']
## for item3 in group:
## new_il = item3['itemList']
## for item4 in new_il:
## market = item4['description']
## oc = item4['outcomes']
print(pitch_a,pitch_b)
##for items in data['items']:
## pos = items['itemList']['items']
## for item in pos:
## work = item['competitors']
## pitcher_a = item['opponentAName']
## pitcher_b = item['opponentBName']
## group = item['displayGroups']
## for item, item2 in zip(work,group):
## team = item['abbreviation']
## place = item['type']
## il2 = item2['itemList']
## for item in il2:
## ml = item['description']
## print(team,place,pitcher_a,pitcher_b,ml)
I have been trying to scrape
team abbrev = ['items']['itemList']['items']['competitors']['abbreviation']
home_away = ['items']['itemList']['items']['competitors']['type']
team pitcher home = ['items']['itemList']['items']['opponentAName']
team pitcher away = ['items']['itemList']['items']['opponentBName']
moneyline american odds = ['items']['itemList']['items']['displayGroups']['itemList']['outcomes']['price']['american']
Total runs = ['items']['itemList']['items']['displayGroups']['itemList']['outcomes']['price']['handicap']
Part of the Json pprinted:
[{'baseLink': '/baseball/mlb/game-lines-market-group',
'defaultType': True,
'description': 'Game Lines',
'id': '136',
'itemList': {'items': [{'LIVE': True,
'atmosphereLink': '/api/atmosphere/eventNotification/events/A/3149961',
'awayTeamFirst': True,
'baseLink': '/baseball/mlb/minnesota-twins-los-angeles-angels-201805112207',
'competitionId': '24736',
'competitors': [{'abbreviation': 'LAA',
'description': 'Los Angeles Angels',
'id': '3149961-1642',
'rotationNumber': '978',
'shortName': 'Angels',
'type': 'HOME'},
{'abbreviation': 'MIN',
'description': 'Minnesota Twins',
'id': '3149961-9990',
'rotationNumber': '977',
'shortName': 'Twins',
'type': 'AWAY'}],
'denySameGame': 'NO',
'description': 'Minnesota Twins # Los Angeles Angels',
'displayGroups': [{'baseLink': '/baseball/mlb/game-lines-market-group',
'defaultType': True,
'description': 'Game Lines',
'id': '136',
'itemList': [{'belongsToDefault': True,
'columns': 'H2Columns',
'description': 'Moneyline',
'displayGroups': '136,A-136',
'id': '46892277',
'isInRunning': True,
'mainMarketType': 'MONEYLINE',
'mainPeriod': True,
'marketTypeGroup': 'MONEY_LINE',
'notes': '',
'outcomes': [{'competitorId': '3149961-9990',
'description': 'Minnesota '
'Twins',
'id': '211933276',
'price': {'american': '-475',
'decimal': '1.210526',
'fractional': '4/19',
'id': '1033002124',
'outcomeId': '211933276'},
'status': 'OPEN',
'type': 'A'},
{'competitorId': '3149961-1642',
'description': 'Los '
'Angeles '
'Angels',
'id': '211933277',
'price': {'american': '+310',
'decimal': '4.100',
'fractional': '31/10',
'id': '1033005679',
'outcomeId': '211933277'},
'status': 'OPEN',
'type': 'H'}],
'periodType': 'Live '
'Match',
'sequence': '14',
'sportCode': 'BASE',
'status': 'OPEN',
'type': 'WW'},
{'belongsToDefault': True,
'columns': 'H2Columns',
'description': 'Runline',
'displayGroups': '136,A-136',
'id': '46892287',
'isInRunning': True,
'mainMarketType': 'SPREAD',
'mainPeriod': True,
'marketTypeGroup': 'SPREAD',
'notes': '',
'outcomes': [{'competitorId': '3149961-9990',
'description': 'Minnesota '
'Twins',
'id': '211933278',
'price': {'american': '+800',
'decimal': '9.00',
'fractional': '8/1',
'handicap': '-1.5',
'id': '1033005677',
'outcomeId': '211933278'},
'status': 'OPEN',
'type': 'A'},
{'competitorId': '3149961-1642',
'description': 'Los '
'Angeles '
'Angels',
'id': '211933279',
'price': {'american': '-2000',
'decimal': '1.050',
'fractional': '1/20',
'handicap': '1.5',
'id': '1033005678',
'outcomeId': '211933279'},
'status': 'OPEN',
'type': 'H'}],
'periodType': 'Live '
'Match',
'sequence': '14',
'sportCode': 'BASE',
'status': 'OPEN',
'type': 'SPR'}],
'link': '/baseball/mlb/game-lines-market-group'}],
'feedCode': '13625145',
'id': '3149961',
'link': '/baseball/mlb/minnesota-twins-los-angeles-angels-201805112207',
'notes': '',
'numMarkets': 2,
'opponentAId': '214704',
'opponentAName': 'Tyler Skaggs (L)',
'opponentBId': '215550',
'opponentBName': 'Lance Lynn (R)',
'sport': 'BASE',
'startTime': 1526090820000,
'status': 'O',
'type': 'MLB'},
There are a few different loops I had started in the script above but either of them are working out the way I would like.
away team | away moneyline | away pitcher | Total Runs | and repeat for Home Team is what I would like it to be eventually. I can write to csv once it is parsed the proper way.
Thank you for the fresh set of eyes, I've been working on this for the better part of a day trying to figure out the best way to access the content I would like. If Json is not the best way and bs4 works better I would love to hear your opinion
There's no simple answer to your problem. Scraping data requires you to carefully assess the data you are dealing with, work out where the parts you want to extract are located and figure out how to effectively store the data you extract.
Try printing the data in your loops to visualise what is happening in your code (or try debugging). From there its easy to figure out it if you're iterating over what you expect. Look for patterns throughout the input data to help organise the data you extract.
To help yourself, you should give your variables descriptive names, separate your code into logical chunks and add comments when it starts to get complicated.
Here's some working code, but I encourage you to try what I told you above, then if you're still stuck look below for guidance.
output = {}
root = data['items'][0]
for game_line in root['itemList']['items']:
# Create a temporary dict to store the data for this gameline
team_data = {}
# Get competitors
competitors = game_line['competitors']
for team in competitors:
team_type = team['type'] # either HOME or AWAY
# Create a new dict to store data for each team
team_data[team_type] = {}
team_data[team_type]['abbreviation'] = team['abbreviation']
team_data[team_type]['name'] = team['description']
# Get MoneyLine and Total Runs
for item in game_line['displayGroups'][0]['itemList']:
for outcome in item['outcomes']:
team_type = outcome['type'] # either A or H
team_type = 'HOME' if team_type == 'H' else 'AWAY'
if item['mainMarketType'] == 'MONEYLINE':
team_data[team_type]['moneyline'] = outcome['price']['american']
elif item['mainMarketType'] == 'SPREAD':
team_data[team_type]['total runs'] = outcome['price']['handicap']
# Get the pitchers
team_data['HOME']['pitcher'] = game_line['opponentAName']
team_data['AWAY']['pitcher'] = game_line['opponentBName']
# For each gameline, add the teamdata we gathered to the output dict
output[game_line['description']] = team_data
This produces like:
{
'Atlanta Braves # Miami Marlins': {
'AWAY': {
'abbreviation': 'ATL',
'moneyline': '-130',
'name': 'Atlanta Braves',
'pitcher': 'Mike Soroka (R)',
'total runs': '-1.5'
},
'HOME': {
'abbreviation': 'MIA',
'moneyline': '+110',
'name': 'Miami Marlins',
'pitcher': 'Jarlin Garcia (L)',
'total runs': '1.5'
}
},
'Boston Red Sox # Toronto Blue Jays': {
'AWAY': {
'abbreviation': 'BOS',
'moneyline': '-133',
'name': 'Boston Red Sox',
'pitcher': 'David Price (L)',
'total runs': '-1.5'
},
'HOME': {
'abbreviation': 'TOR',
'moneyline': '+113',
'name': 'Toronto Blue Jays',
'pitcher': 'Marco Estrada (R)',
'total runs': '1.5'
}
},
}
I have a text file filled with place data provided by twitter api. Here is the sample data of 2 lines
{'country': 'United Kingdom', 'full_name': 'Dorridge, England', 'id': '31fe56e2e7d5792a', 'country_code': 'GB', 'name': 'Dorridge', 'attributes': {}, 'contained_within': [], 'place_type': 'city', 'bounding_box': {'coordinates': [[[-1.7718518, 52.3635912], [-1.7266702, 52.3635912], [-1.7266702, 52.4091167], [-1.7718518, 52.4091167]]], 'type': 'Polygon'}, 'url': 'https://api.twitter.com/1.1/geo/id/31fe56e2e7d5792a.json'}
{'country': 'India', 'full_name': 'New Delhi, India', 'id': '317fcc4b21a604d5', 'country_code': 'IN', 'name': 'New Delhi', 'attributes': {}, 'contained_within': [], 'place_type': 'city', 'bounding_box': {'coordinates': [[[76.84252, 28.397657], [77.347652, 28.397657], [77.347652, 28.879322], [76.84252, 28.879322]]], 'type': 'Polygon'}, 'url': 'https://api.twitter.com/1.1/geo/id/317fcc4b21a604d5.json'}
I want 'country', 'name' and 'cordinates' filed of each line.In order to do this we need to iterate line by line the entire file.so i append each line to a list
data = []
with open('place.txt','r') as f:
for line in f:
data.append(line)
when i checked the data type it shows as 'str' instead of 'dict'.
type(data[0])
str
data[0].keys()
AttributeError: 'str' object has no attribute 'keys'
how to fix this so that it can be saved as list of dictionaries.
Originally tweets were encoded and decoded by following code:
f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\n') #encoded and saved to a .txt file
tweets.append(jsonpickle.decode(line)) # decoding
And place data file is saved by following code:
fName = "place.txt"
newLine = "\n"
with open(fName, 'a', encoding='utf-8') as f:
for i in range(len(tweets)):
f.write('{}'.format(tweets[i]['place']) +'\n')
In your case you should use json to do the data parsing. But if you have a problem with json (which is almost impossible since we are talking about an API ), then in general to convert from string to dictionary you can do:
>>> import ast
>>> x = "{'country': 'United Kingdom', 'full_name': 'Dorridge, England', 'id': '31fe56e2e7d5792a', 'country_code': 'GB', 'name': 'Dorridge', 'attributes': {}, 'contained_within': [], 'place_type': 'city', 'bounding_box': {'coordinates': [[[-1.7718518, 52.3635912], [-1.7266702, 52.3635912], [-1.7266702, 52.4091167], [-1.7718518, 52.4091167]]], 'type': 'Polygon'}, 'url': 'https://api.twitter.com/1.1/geo/id/31fe56e2e7d5792a.json'}
"
>>> d = ast.literal_eval(x)
>>> d
d now is a dictionary instead of a string.
But again if your data are in json format python has a built-in lib to handle json format, and is better and safer to use json than ast.
For example if you get a response let's say resp you could simply do:
response = json.loads(resp)
and now you could parse response as a dictionary.
Note: Single quotes are not valid JSON.
I have never tried Twitter API. Looks like your data are not valid JSON. Here is a simple preprocess method to replace '(single quote) into "(double quote)
data = "{'country': 'United Kingdom', ... }"
json_data = data.replace('\'', '\"')
dict_data = json.loads(json_data)
dict_data.keys()
# [u'full_name', u'url', u'country', ... ]
You should use python json library for parsing and getting the value.
In python it's quite easy.
import json
x = '{"country": "United Kingdom", "full_name": "Dorridge, England", "id": "31fe56e2e7d5792a", "country_code": "GB", "name": "Dorridg", "attributes": {}, "contained_within": [], "place_type": "city", "bounding_box": {"coordinates": [[[-1.7718518, 52.3635912], [-1.7266702, 52.3635912], [-1.7266702, 52.4091167], [-1.7718518, 52.4091167]]], "type": "Polygon"}, "url": "https://api.twitter.com/1.1/geo/id/31fe56e2e7d5792a.json"}'
y = json.loads(x)
print(y["country"],y["name"],y["bounding_box"]["coordinates"])
You can use list like this
mlist= list()
for i in ndata.keys():
mlist.append(i)
My code is:
from ipwhois import IPWhois
import pprint
obj = IPWhois('74.125.227.206')
results = obj.lookup_rws()
pprint.pprint(results)
It returns:
{'asn': '15169',
'asn_cidr': '74.125.227.0/24',
'asn_country_code': 'US',
'asn_date': '2007-03-13',
'asn_registry': 'arin',
'nets': [{'abuse_emails': 'arin-contact#google.com',
'address': '1600 Amphitheatre Parkway',
'cidr': '74.125.0.0/16',
'city': 'Mountain View',
'country': 'US',
'created': '2007-03-13T12:09:54-04:00',
'description': 'Google Inc.',
'handle': u'NET-74-125-0-0-1',
'misc_emails': None,
'name': 'GOOGLE',
'postal_code': '94043',
'range': u'74.125.0.0 - 74.125.255.255',
'state': 'CA',
'tech_emails': 'arin-contact#google.com',
'updated': '2012-02-24T09:44:34-05:00'}],
'query': '74.125.227.206',
'raw': None}
What is the best or easiest way in python to print a single line from the output?
For example:
'name': 'GOOGLE',
or 'abuse_emails': 'arin-contact#google.com',
Any help would be appreciated!!
results is already a dictionary, so just get the keys and values that you want.
from ipwhois import IPWhois
obj = IPWhois('74.125.227.206')
results = obj.lookup_rws()
print(results['nets'][0]['name'])
lookup_rws() function is deprecated. Instead, use:
from ipwhois import IPWhois
obj = IPWhois('{}'.format('IP') # Enter IP
result = obj.lookup_rdap(depth=1)
print(result['network']['name'])