Issue: The for loop for this function is not iterating the over all elements. Its stopping at 1. I used some diagnostic print statements to count the number of loops and its stopping at 1. I have reviewed the indentiation and the loop but cannot seem to find the issue.
def process_data(data):
"""Analyzes the data, looking for maximums.
Returns a list of lines that summarize the information.
"""
loop_count = 0
year_by_sales = dict()
max_revenue = {"revenue": 0}
# ----------->This is where the Loop Issue Exists <-----
for item in data:
item_price = locale.atof(item["price"].strip("$"))
item_revenue = item["total_sales"] * item_price
if item["car"]["car_year"] not in year_by_sales.keys():
year_by_sales[item["car"]["car_year"]] = item["total_sales"]
loop_count += 1
if item_revenue > max_revenue["revenue"]:
item["revenue"] = item_revenue
max_revenue = item
most_sold_model = item['car']['car_model']
highest_total_sales = item["total_sales"]
else:
year_by_sales[item["car"]["car_year"]] += item["total_sales"]
loop_count +=1
most_popular_year = max(year_by_sales, key=year_by_sales.get)
summary = [
"The {} generated the most revenue: ${}".format(
format_car(max_revenue["car"]), max_revenue["revenue"]
),
f"The {most_sold_model} had the most sales: {highest_total_sales}",
f"The most popular year was {most_popular_year} with {highest_total_sales} sales.",
]
print(loop_count)
print(year_by_sales)
return summary
Input Data
[{
"id": 1,
"car": {
"car_make": "Ford",
"car_model": "Club Wagon",
"car_year": 1997
},
"price": "$5179.39",
"total_sales": 446
},
{
"id": 2,
"car": {
"car_make": "Acura",
"car_model": "TL",
"car_year": 2005
},
"price": "$14558.19",
"total_sales": 589
},
{
"id": 3,
"car": {
"car_make": "Volkswagen",
"car_model": "Jetta",
"car_year": 2009
},
"price": "$14879.11",
"total_sales": 825
}]
The entire codebase for this script is https://replit.com/join/dkuzpdujne-terry-brooksjr
Actually problem is that your return statement is inside the for loop so you return after the first iteration itself,
it should run just fine if you move it outside something like below:
def process_data(data):
"""Analyzes the data, looking for maximums.
Returns a list of lines that summarize the information.
"""
loop_count = 0
year_by_sales = dict()
max_revenue = {"revenue": 0}
# ----------->This is where the Loop Issue Exists <-----
for item in data:
item_price = locale.atof(item["price"].strip("$"))
item_revenue = item["total_sales"] * item_price
if item["car"]["car_year"] not in year_by_sales.keys():
year_by_sales[item["car"]["car_year"]] = item["total_sales"]
loop_count += 1
if item_revenue > max_revenue["revenue"]:
item["revenue"] = item_revenue
max_revenue = item
most_sold_model = item['car']['car_model']
highest_total_sales = item["total_sales"]
else:
year_by_sales[item["car"]["car_year"]] += item["total_sales"]
loop_count +=1
most_popular_year = max(year_by_sales, key=year_by_sales.get)
summary = "1"
print(loop_count)
print(year_by_sales)
return summary # move this out of for loop
Related
I have a JSON log file and want to print and count the number of times a URL(requestURL) has been hit by an IP in the same log file. The output should be like the below:
IP(remoteIp): URL1-(Count), URL2-(Count), URL3...
127.0.0.1: http://www.google.com - 12, www.bing.com/servlet-server.jsp - 2, etc..
The Sample of the Logfile is like below
"insertId": "kdkddkdmdkd",
"jsonPayload": {
"#type": "type.googleapis.com/google.cloud.loadbalancing.type.LoadBalancerLogEntry",
"enforcedSecurityPolicy": {
"configuredAction": "DENY",
"outcome": "DENY",
"preconfiguredExprIds": [
"owasp-crs-v030001-id942220-sqli"
],
"name": "shbdbbddjdjdjd",
"priority": 2000
},
"statusDetails": "body_denied_by_security_policy"
},
"httpRequest": {
"requestMethod": "POST",
"requestUrl": "https://dknnkkdkddkd/token",
"requestSize": "3004",
"status": 403,
"responseSize": "274",
"userAgent": "okhttp/3.12.2",
"remoteIp": "127.0.0.1",
"serverIp": "123.123.33.31",
"latency": "0.018728s"
}
The solution that I am using is below. I am able to get the total hits per IP or how many total times a URL has been hit etc.
import json
from collections import Counter
unique_ip = {}
request_url = {}
def getAndSaveValueSafely(freqTable, searchDict, key):
try:
tmp = searchDict['httpRequest'][key]
if tmp in freqTable:
freqTable[tmp] += 1
else:
freqTable[tmp] = 1
except KeyError:
if 'not_present' in freqTable:
freqTable['not_present'] += 1
else:
freqTable['not_present'] = 1
with open("threat_intel_1.json") as file:
data = json.load(file)
for d2 in data:
getAndSaveValueSafely(unique_ip, d2, 'remoteIp')
getAndSaveValueSafely(request_url, d2, 'requestUrl')
mc_unique_ip = (dict(Counter(unique_ip).most_common()))
mc_request_url = (dict(Counter(request_url).most_common()))
def printing():
a = str(len(unique_ip))
b = str(len(request_url))
with open("output.txt", "w") as f1:
print(
f' Start Time of log = {minTs}'
f' \n\n End Time of log = {maxTs} \n\n\n {a} Unique IP List = {mc_unique_ip} \n\n\n {b} Unique URL = {mc_request_url},file=f1)
I dont think you need to use counter and are unlikely to see any benifit
from collections import defaultdict
result = {} # start empty
with open("threat_intel_1.json") as file:
data = json.load(file)
for d2 in data:
req = d2.get('httpRequest',None)
if not req:
continue
url = req['requestUrl']
ip = req['remoteIp']
result.setdefault(url,defaultdict(int))[ip] += 1
print(result)
# {"/endpoint.html": {"127.2.3.4":15,"222.11.31.22":2}}
if instead you want it the other way thats easy also
for d2 in data:
req = d2.get('httpRequest',None)
if not req:
continue
url = req['requestUrl']
ip = req['remoteIp']
result.setdefault(ip,defaultdict(int))[url] += 1
#{"127.1.2.3",{"/endpoint1.html":15,"/endpoint2.php":1},"33.44.55.66":{"/endpoint1.html":5}, ...}
instead of using defaultdict you could add a line
# result.setdefault(ip,defaultdict(int))[url] += 1
result.setdefault(ip,{})
result[ip][url] = result[ip].get(url,0) + 1
which arguably is more readable anyway...
I'm trying to return only a specific value from the "data" key in this response that I'm currently working with:
{
"dataset": {
"id": 49333506,
"dataset_code": "YMAB",
"database_code": "QOR",
"name": "Y-mAbs Therapeutics Inc. (YMAB) Option Earnings Crush, Liquidity, and Volatility Ratings",
"description": "Option Earnings Crush, Liquidity, and Volatility Ratings for Y-mAbs Therapeutics Inc. (YMAB). All time periods are measured in calendar days. See documentation for methodology.",
"refreshed_at": "2022-08-05 21:20:34 UTC",
"newest_available_date": "2022-08-05",
"oldest_available_date": "2020-02-12",
"column_names": [
"Date",
"EarningsCrushRate",
"CalendarDaysUntilEarnings",
"TradingDaysUntilEarnings",
"LiquidityRating",
"HasLeapOptions",
"HasWeeklyOptions",
"Iv30Rank",
"Iv30Percentile",
"Iv30Rating",
"Iv60Rank",
"Iv60Percentile",
"Iv60Rating",
"Iv90Rank",
"Iv90Percentile",
"Iv90Rating",
"Iv360Rank",
"Iv360Percentile",
"Iv360Rating"
],
"frequency": "daily",
"type": "Time Series",
"premium": true,
"limit": null,
"transform": null,
"column_index": null,
"start_date": "2020-02-12",
"end_date": "2022-08-05",
"data": [
[
"2022-08-05",
null,
null,
null,
2.0,
0.0,
0.0,
0.1437,
0.4286,
0.3706,
0.1686,
0.4762,
0.3936,
0.1379,
0.4502,
0.4129,
0.107,
0.5152,
0.4657
],
I only want to return the date, and a single value at a time from the "data": [ key that's within "dataset": {.
Here's the code I have so far, but am stuck as to make this happen:
r = requests.get(url=f"https://data.nasdaq.com/api/v3/datasets/QOR/{symbol}/data.json?api_key={apikey}")
d = r.json()
dataset = d['dataset_data']
data = dataset['data']
column_names = dataset['column_names']
date = column_names[0]
ercrush = column_names[1]
calendar = column_names[2]
tradingdays = column_names[3]
liquidity = column_names[4]
leaps = column_names[5]
weeklies = column_names[6]
ivrank30 = column_names[7]
ivper30 = column_names[8]
ivrate30 = column_names[9]
ivrank60 = column_names[10]
ivper60 = column_names[11]
ivrate60 =column_names[12]
ivrank90 = column_names[13]
ivper90 = column_names[14]
ivrank90 = column_names[15]
ivrank360= column_names[16]
ivper360 = column_names[17]
ivrank360 = column_names[18]
values = data[0]
For example - I'm only trying to return the Date, defined as column_names[0] paired with the value of "2022-08-05" that's within "data": [ , etc.
How would I go about doing this?
Thanks so much for any help.
I figured out the issue!
I created another variable called results = values and now I can pick the values I want and easily match them with the column_names!
Awesome!
The finished code that works:
r = requests.get(url=f"https://data.nasdaq.com/api/v3/datasets/QOR/{symbol}/data.json?api_key=KyVWdRX_o26L5XNUkgqN")
d = r.json()
dataset = d['dataset_data']
data = dataset['data']
column_names = dataset['column_names']
Date = column_names[0]
ercrush = column_names[1]
calendar = column_names[2]
tradingdays = column_names[3]
liquidity = column_names[4]
leaps = column_names[5]
weeklies = column_names[6]
ivrank30 = column_names[7]
ivper30 = column_names[8]
ivrate30 = column_names[9]
ivrank60 = column_names[10]
ivper60 = column_names[11]
ivrate60 =column_names[12]
ivrank90 = column_names[13]
ivper90 = column_names[14]
ivrank90 = column_names[15]
ivrank360= column_names[16]
ivper360 = column_names[17]
ivrank360 = column_names[18]
values = data[0]
results = values[2] #the correction
print(results)
I am working on a project for a python class, where we have to create a vin number look up tool. This code below works so far for country and year made. However I am having an issue with how I am indexing the user input.
Sometimes a country is the first two characters, and other times the country code is only the first character. I do not how to get around this problem, I tried using an if else kind of method while iterating through the dictionary, but it did not work.
class vinfo():
def __init__(self):
""" Get user input """
vin = input("Enter Vin: ")
""" Stupidity check """
unaccepted_chars = [
"-","/",".",","," ","?",
"^","$","*","(",")","[",
"]","{","}","#","!","_",
"+","'","=","|","#","<",
">","`","~","&"
]
for char in vin:
if char in unaccepted_chars:
vin = vin.replace(char, "")
else:
print("", end = "")
""" Length check and index """
if len(vin) == 17:
self.country_filt1 = vin[0]
self.country_filt2 = vin[0:2]
self.engine_filt = vin[7]
self.year_filt = vin[9]
self.manufact_filt = vin[1:3]
self.manufact_filt1 = vin[1:4]
else:
print("You must've entered something really stupid.\n(must be 17 characters, letters are uppercase)")
""" Vin Code Data """
# each manufacturer seems to have multiple vins for different car styles
# and for different countries
manufact_codes = {
"1G1":"Chevy",
"":"",
}
year_codes = {
"M":"2021","L":"2020","K":"2019",
"J":"2018","H":"2017","G":"2016",
"F":"2015","E":"2014","D":"2013",
"C":"2012","B":"2011","A":"2010",
"9":"2009","8":"2008","7":"2007",
"6":"2006","5":"2005","4":"2004",
"3":"2003","2":"2002","1":"2001",
"Y":"2000","X":"1999","W":"1998",
"V":"1997","T":"1996","S":"1995",
"R":"1994","P":"1993","N":"1992",
"M":"1991","L":"1990","K":"1989",
"J":"1988","H":"1987","G":"1986",
"F":"1985","E":"1984","D":"1983",
"C":"1982","B":"1981","A":"1980"
}
country_codes = {
"1": "USA",
"4": "USA",
"5": "USA",
"7F": "USA",
"3X": "Mexico",
"37": "Mexico",
"3A": "Canada",
"3W": "Canada",
"W": "Germany",
"SA": "United Kingdom",
"SM": "United Kingdom",
"J": "Japan",
"KL": "Korea",
"KR": "Korea"
}
engine_codes = {
}
""" Define the vehicles attributes using the find() function below """
self.year = self.find(self.year_filt, year_codes)[0]
# self.country = self.find(self.country_filt, country_codes)[0]
# self.manufact = self.find(self.manufact_filt, manufact_codes)
self.engine = self.find(self.engine_filt, engine_codes)
""" Find country (different lengths of country codes) """
for key, value in country_codes.items():
if key == self.country_filt1:
country = value
elif key == self.country_filt2:
country = value
else:
country = "Unsupported code"
""" Same for manufacturer """
for key, value in manufact_codes.items():
if key == self.manufact_filt:
manufact = value
elif key == self.manufact_filt1:
manufact = value
else:
manufact = "Unsupported code"
self.info = print(f"Year: {self.year}\nManufacturer: {manufact}\nCountry: {country}\nEngine: {self.engine}")
""" search through the dictionaries """
def find(self, filt, dict_of_codes):
try:
info = [value for key, value in dict_of_codes.items() if key == filt]
except:
info = "Unsupported"
if len(info) > 1:
info += " (Could be any of these)"
return info
I have this list of hierarchical URLs:
data = ["https://python-rq.org/","https://python-rq.org/a","https://python-rq.org/a/b","https://python-rq.org/c"]
And I want to dynamically make a nested dictionary for every URL for which there exists another URL that is a subdomain/subfolder of it.
I already tried the follwoing but it is not returning what I expect:
result = []
for key,d in enumerate(data):
form_dict = {}
r_pattern = re.search(r"(http(s)?://(.*?)/)(.*)",d)
r = r_pattern.group(4)
if r == "":
parent_url = r_pattern.group(3)
else:
parent_url = r_pattern.group(3) + "/"+r
print(parent_url)
temp_list = data.copy()
temp_list.pop(key)
form_dict["name"] = parent_url
form_dict["children"] = []
for t in temp_list:
child_dict = {}
if parent_url in t:
child_dict["name"] = t
form_dict["children"].append(child_dict.copy())
result.append(form_dict)
This is the expected output.
{
"name":"https://python-rq.org/",
"children":[
{
"name":"https://python-rq.org/a",
"children":[
{
"name":"https://python-rq.org/a/b",
"children":[
]
}
]
},
{
"name":"https://python-rq.org/c",
"children":[
]
}
]
}
Any advice?
This was a nice problem. I tried going on with your regex method but got stuck and found out that split was actually appropriate for this case. The following works:
data = ["https://python-rq.org/","https://python-rq.org/a","https://python-rq.org/a/b","https://python-rq.org/c"]
temp_list = data.copy()
# This removes the last "/" if any URL ends with one. It makes it a lot easier
# to match the URLs and is not necessary to have a correct link.
data = [x[:-1] if x[-1]=="/" else x for x in data]
print(data)
result = []
# To find a matching parent
def find_match(d, res):
for t in res:
if d == t["name"]:
return t
elif ( len(t["children"])>0 ):
temp = find_match(d, t["children"])
if (temp):
return temp
return None
while len(data) > 0:
d = data[0]
form_dict = {}
l = d.split("/")
# I removed regex as matching the last parentheses wasn't working out
# split does just what you need however
parent = "/".join(l[:-1])
data.pop(0)
form_dict["name"] = d
form_dict["children"] = []
option = find_match(parent, result)
if (option):
option["children"].append(form_dict)
else:
result.append(form_dict)
print(result)
[{'name': 'https://python-rq.org', 'children': [{'name': 'https://python-rq.org/a', 'children': [{'name': 'https://python-rq.org/a/b', 'children': []}]}, {'name': 'https://python-rq.org/c', 'children': []}]}]
I try to increment a list at each iteration of a loop :
ads = []
page = {}
page['titre'] = "Title here"
page['nombre_pages'] = 396
i = 1
total = 3
while i <= total:
print(i)
page['id'] = i
ads.append(page)
i += 1
this return
[{'titre': 'Title here', 'nombre_pages': 396, 'id': 3}, {'titre': 'Title here', 'nombre_pages': 396, 'id': 3}, {'titre': 'Title here', 'nombre_pages': 396, 'id': 3}]
I don't understand why the same id 3 times and not id:1, id:2, id:3
When print page['id'] is ok (increment), ads.append(page['id']) is available too.
Can you help ?
Thanks
you're only creating a single "page" object, i.e. by doing:
page = {}
and referring to it from several index locations in ads. you probably want to be doing something closer to:
ads = []
i = 1
total = 3
while i <= total:
print(i)
page = {}
page['titre'] = "Title here"
page['nombre_pages'] = 396
page['id'] = i
ads.append(page)
i += 1
or slightly more idiomatically:
ads = []
total = 3
for i in range(total):
ads.append({
'nombre_pages': 396,
'titre': "Title here",
'id': i,
})