Modify a loop based on key and value in dictionary - python

Im new to python
I wrote the code below, to search in a dictionary, do something, clear old items in dictionary and update dictionary with new key and values and break while there is noting to add to dictionary (it is empty), how can I modify my code to do this process?
#since_id - Returns results with an ID greater than
#(that is, more recent than) the specified ID. There are limits to the
#number of Tweets which can be accessed through the API.
# If the limit of Tweets has occured since the since_id,
# the since_id will be forced to the oldest ID available.
# max_id - Returns results with an ID less than (that is, older than)
#or equal to the specified ID.
Dict2 = dict({'#TweeetLorraine':1392217841680764931})
d2 = {}
rep=[]
from tqdm import tqdm
for key, value in tqdm(Dict2.items()):
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.update(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
d2 ```

You can use a while loop for that :
#since_id - Returns results with an ID greater than
#(that is, more recent than) the specified ID. There are limits to the
#number of Tweets which can be accessed through the API.
# If the limit of Tweets has occured since the since_id,
# the since_id will be forced to the oldest ID available.
# max_id - Returns results with an ID less than (that is, older than)
#or equal to the specified ID.
Dict2 = dict({'#TweeetLorraine':1392217841680764931})
d2 = {}
rep=[]
from tqdm import tqdm
for key, value in tqdm(Dict2.items()):
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.update(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
d2
For your use case, here is roughly the code that does what you describe, there is better ways to do that using map, I let you search for it if you want to know more.
Also, I'm not sure whether you want to completely clear the dict or only clear the current "i", but I think you can modify the following snippet to your true needs
mydict = initial_dict
# while there is something in the dictionary
while mydict:
value_searched = None
for key, value in mydict.items():
for i in tweepy.Cursor(api.search,
q='to:{} -filter:retweets"'.format(key),lang="en"
,since_id=value,tweet_mode='extended',
wait_on_rate_limit=True,
wait_on_rate_limit_notify=True).items(50):
if (i.in_reply_to_status_id == value):
replies3.append(i)
value_searched = i
break
break
# create new dict from value retrieved
mydict = {"#" +value_searched.user.screen_name : value_searched.id_str}
Edit2 :
Using recursivity
def tweepy_stub(key, value):
if key == "TweeetLorraine" and value == 1392217841680764931:
return [
("AlexBC997", 1392385334155956226),
("ChapinDolores", 1392432099945238529),
]
elif key == "AlexBC997" and value == 1392385334155956226:
return [("test", 139238533415595852)]
elif ("ChapinDolores", 1392432099945238529):
return []
def recursive(list_values, nb_recursion):
mydict = {}
if list_values == None or nb_recursion == 0:
return mydict
else:
for name_user, tweet_id in list_values:
mydict[(name_user, tweet_id)] = recursive(
retrieve_direct_reply_stub(name_user, tweet_id), nb_recursion - 1
)
return mydict
class stub_tweepy_answer:
def __init__(self, status_id) -> None:
self.in_reply_to_status_id = status_id
def retrieve_direct_reply_stub(name_user, tweepy_id):
rep = []
d2 = []
return tweepy_stub(name_user, tweepy_id)
def retrieve_direct_reply(name_user, tweet_id):
rep = []
d2 = []
for i in tweepy_stub(name_user, tweet_id):
val = i
if (i.in_reply_to_status_id == tweet_id):
rep.append(i)
from pandas.io.json import json_normalize
dfflat = pd.DataFrame()
for tweet in rep:
df_for_tweet = json_normalize(tweet._json)
dfflat=dfflat.append(df_for_tweet,ignore_index=True,sort=True)
d2.append(zip(dfflat["user.screen_name"].tolist(), dfflat["id"].tolist()))
return d2
#print(retrieve_direct_reply_stub("TweeetLorraine", 1392217841680764931))
elem = [("TweeetLorraine", 1392217841680764931)]
print(recursive(elem, 3))

Related

Python(requests) How to parse only certain json data, How to make this chunk of code loop (not as important)

Figured it out thanks to Loki!
Every 1 second logs data from print(get_rates()) function to a .txt file, so I made an infinite while loop (suggested by loki)
import requests
from time import sleep
from datetime import datetime,date
old_print = print
def tstamped_print(*args, **kwargs):
old_print(datetime.now(), *args, **kwargs)
print = tstamped_print
# Getting rates
def get_rates():
PREEVURL = requests.get('http://preev.com/pulse/units:btc+usd/sources:bitstamp+kraken')
DATA = PREEVURL.json()
RESULT = {}
for key, value in DATA["btc"]["usd"].items():
RESULT[key] = (value['last'])
return RESULT
# Infinite while loop
a = 1
while a < 10:
print(get_rates(),file=open("btc_price1.txt", "a"))
print(get_rates())
sleep(2)
a = a - 0
else:
print("loop end")
If I understand well, you want to do two things:
Extract a bitcoin rate from this api response
Loop requests and store each result in an file to save the history
Let's see how to do this:
Call the api and store the result
If you call it in a python shell you can look at the data:
>>> import requests
>>>
>>> url_resp = requests.get('http://preev.com/pulse/units:btc+usd/sources:bitstamp+kraken') # (your url)
>>> data = url_resp.json()
>>> data
{'btc': {'usd': {'bitstamp': {'last': '9503.05', 'volume': '6.27734e+7'}, 'kraken': {'last': '9509.10', 'volume': '4.08549e+7'}}}, 'other': {'slot': None, 'ver': 'b'}}
To get the value really simply access each dictionnary item by its key:
# Convert the string to float number.
bitsamp_rate = float(data['btc']['usd']['bitstamp']['last'])
kraken_rate = float(data['btc']['usd']['kraken']['last'])
Let's loop over each exchange and put it in a function:
def get_rates():
url_resp = requests.get('http://preev.com/pulse/units:btc+usd/sources:bitstamp+kraken') # (your url)
data = url_resp.json()
result = {}
for exchange, rates in data['btc']['usd'].items():
result[exchange] = float(rates['last'])
return result
Use a while loop to call the function multiple times
I'm letting you decide how to store the data, you might want to record
the time when the function was called also.
The problem is that your initial data.json file is empty. The easiest way out is to place an empty dictionary or something in your data.json file. Just make sure data.json is not an empty file.
Figured it out thanks to Loki!
Every 1 second logs data from each print(get_rates()) function to a .txt file, so I made an infinite while loop (suggested by loki)
import time
import requests
import datetime
from time import sleep
from datetime import datetime,date
'''
print('Enter correct username and password combo to continue')
count = 0
username = '420'
while count < 10:
username = input('login code: ')
if username== '420':
print('Access granted')
count = 11
break
else:
print('Access denied. Try again.')
count =- 1
print('====> LOGGED IN','\n')
'''
today = time.strftime("(%Y-%m-%d %I:%M%p)")
old_print = print
def tstamped_print(*args, **kwargs):
old_print(today, *args, **kwargs)
print = tstamped_print
# Getting rates
def get_rates():
PREEVURL = requests.get('http://preev.com/pulse/units:btc+usd/sources:bitstamp+kraken')
DATA = PREEVURL.json()
RESULT = {}
for key, value in DATA["btc"]["usd"].items():
RESULT[key] = (value['last'])
return RESULT
# Infinite while loop
a = 1
PREEVURL = requests.get('http://preev.com/pulse/units:btc+usd/sources:bitstamp+kraken')
DATA = PREEVURL.json()
while a < 10:
print(get_rates(), file=open("btc_price1.txt", "a"))
print(get_rates())
sleep(1)
a = a - 0
else:
print("loop end")

How to initialize a map with dateTime.date as key and list of strings as value?

I tried =
date_dict = {}
date_dict = defaultdict(datetime.date, list)
but none seems to be working.
I need to add values such that :
{
datetime.date(2018, 1, 2): ['name-20180102_000012', 'name-20180102'],
datetime.date(2020, 2, 4): ['test'],
datetime.date(2018, 1, 1): ['name-20180101_000012', 'name-20180101_000016'],
datetime.date(2018, 2, 13): ['name-20180213']}
via:
def _get_dates(name):
DATE_FORMATS = ['%Y%m%d', '%Y%m%d_%H%M%S']
for date_format in DATE_FORMATS:
try:
date_of_index = datetime.strptime(name.split('-')[-1], date_format).date()
except ValueError:
pass
else:
break
else:
date_of_index = date.today()
return date_of_index
date_dict = {}
index_list = ['test',
'name-20180213',
'name-20180102_000012',
'name-20180102',
'name-20180101_000012',
'name-20180101_000016']
for name in index_list:
date_dict[_get_dates(name)].append(name)
excluded_names = ['name-20180102_000012']
import itertools
def _get_list_to_delete_from(date_dict, index_to_keep):
keys_to_delete = sorted(date_dict(), reverse=True)[index_to_keep:] #I need to keep the dates
return list(itertools.chain.from_iterable([date_dict[key] for key in keys_to_delete]))
if excluded_names in _get_list_to_delete_from(date_dict, 2):
for values in date_dict.values():
for v in values:
if v in excluded_names:
values.remove(v) # not working with defaultdict(list) or defaultdict([]:lambda)
No need to use the .append method.
Here's a quick fix:
for name in index_list:
date_dict[_get_dates(name)] = name
Python isn't strongly typed. You can just create the default_dict and start using datetime.date values as keys and lists as values. Your code isn't working because you .append to a value even if it doesn't exist - you can start the dict as defaultdict(list) initialising new items to start as an empty list.
So, your line:
date_dict = {}
Should be
data_dict = defaultdict(list)
(had lambda: [] previously instead of list, but of course the type itself functions just as well to initialise, since list() also returns a new list)
Also, your _get_dates only gets a single date and it discards the time, so it may as well ignore it. Here's a working example:
from collections import defaultdict
from datetime import datetime, date
def _get_date(name):
try:
_ = name.index('-')
return datetime.strptime(name.split('-')[-1][:8], '%Y%m%d').date()
except ValueError:
return None
def main():
date_dict = defaultdict(list)
index_list = [
'test',
'name-20180213',
'name-20180102_000012',
'name-20180102',
'name-20180101_000012',
'name-20180101_000016'
]
for name in index_list:
date_dict[_get_date(name)].append(name)
print(date_dict)
main()
If you're looking to exclude some names, you could just have the loop be like this:
for name in index_list:
if name not in excluded_names:
date_dict[_get_date(name)].append(name)
Or if you need to remove them later:
for name in excluded_names:
date_dict[_get_date(name)].remove(name)

Retrieve bulk data from YAML using Python

I have a yaml file of the form below:
Solution:
- number of solutions: 1
number of solutions displayed: 1
- Gap: None
Status: optimal
Message: bonmin\x3a Optimal
Objective:
objective:
Value: 0.010981105395
Variable:
battery_E[b1,1,1]:
Value: 0.25
battery_E[b1,1,2]:
Value: 0.259912707017
battery_E[b1,2,1]:
Value: 0.120758408109
battery_E[b2,1,1]:
Value: 0.0899999972181
battery_E[b2,2,3]:
Value: 0.198967393893
windfarm_L[w1,2,3]:
Value: 1
windfarm_L[w1,3,1]:
Value: 1
windfarm_L[w1,3,2]:
Value: 1
Using Python27, I would like to import all battery_E values from this YAML file. I know I can iterate over the keys of battery_E dictionary to retrieve them one by one (I am already doing it using PyYAML) but I would like to avoid iterating and do it in one go!
It's not possible "in one go" - there will still be some kind of iteration either way, and that's completely OK.
However, if the memory is a concern, you can load only values of the keys of interest during YAML loading:
from __future__ import print_function
import yaml
KEY = 'battery_E'
class Loader(yaml.SafeLoader):
def __init__(self, stream):
super(Loader, self).__init__(stream)
self.values = []
def compose_mapping_node(self, anchor):
start_event = self.get_event()
tag = start_event.tag
if tag is None or tag == '!':
tag = self.resolve(yaml.MappingNode, None, start_event.implicit)
node = yaml.MappingNode(tag, [],
start_event.start_mark, None,
flow_style=start_event.flow_style)
if anchor is not None:
self.anchors[anchor] = node
while not self.check_event(yaml.MappingEndEvent):
item_key = self.compose_node(node, None)
item_value = self.compose_node(node, item_key)
if (isinstance(item_key, yaml.ScalarNode)
and item_key.value.startswith(KEY)
and item_key.value[len(KEY)] == '['):
self.values.append(self.construct_object(item_value, deep=True))
else:
node.value.append((item_key, item_value))
end_event = self.get_event()
node.end_mark = end_event.end_mark
return node
with open('test.yaml') as f:
loader = Loader(f)
try:
loader.get_single_data()
finally:
loader.dispose()
print(loader.values)
Note however, that this code does not assume anything about the position of battery_E keys in the tree inside the YAML file - it will just load all of their values.
There is no need to retrieve each entry using PyYAML, you can load the data once, and then use Pythons to select the key-value pairs with the following two lines:
data = yaml.safe_load(open('input.yaml'))
kv = {k:v['Value'] for k, v in data['Solution'][1]['Variable'].items() if k.startswith('battery_E')}
after that kv contains:
{'battery_E[b2,2,3]': 0.198967393893, 'battery_E[b1,1,1]': 0.25, 'battery_E[b1,1,2]': 0.259912707017, 'battery_E[b2,1,1]': 0.0899999972181, 'battery_E[b1,2,1]': 0.120758408109}

Python - Getting Attributes From A File of Constants

I have a file of constant variables that I need to query and I am not sure how to go about it.
I have a database query which is returning user names and I need to find the matching user name in the file of constant variables.
The file looks like this:
SALES_MANAGER_01 = {"user_name": "BO01", "password": "password", "attend_password": "BO001",
"csm_password": "SM001", "employee_num": "BOSM001"}
There is just a bunch of users just like the one above.
My function looks like this:
#attr("user_test")
def test_get_user_for_login(self):
application_code = 'BO'
user_from_view = self.select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
password = ""
global_users = dir(gum)
for item in global_users:
if user_wo_ent not in item.__getattr__("user_name"):
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = item.__getattr__("password")
print(user_wo_ent, password)
global_users = dir(gum) is my file of constants. So I know I am doing something wrong since I am getting an attribute error AttributeError: 'str' object has no attribute '__getattr__', I am just not sure how to go about resolving it.
You should reverse your looping as you want to compare each item to your match condition. Also, you have a dictionary, so use it to do some heavy lifting.
You need to add some imports
import re
from ast import literal_eval
I've changed the dir(gum) bit to be this function.
def get_global_users(filename):
gusers = {} # create a global users dict
p_key = re.compile(ur'\b\w*\b') # regex to get first part, e.g.. SALES_MANAGER_01
p_value = re.compile(ur'\{.*\}') # regex to grab everything in {}
with (open(filename)) as f: # open the file and work through it
for line in f: # for each line
gum_key = p_key.match(line) # pull out the key
gum_value = p_value.search(line) # pull out the value
''' Here is the real action. update a dictionary
with the match of gum_key and with match of gum_value'''
gusers[gum_key.group()] = literal_eval(gum_value.group())
return(gusers) # return the dictionary
The bottom of your existing code is replaced with this.
global_users = get_global_users(gum) # assign return to global_users
for key, value in global_users.iteritems(): # walk through all key, value pairs
if value['user_name'] != user_wo_ent:
user_with_ent = choice(users)
user_wo_ent = user_with_ent[-4:]
else:
password = value['password']
So a very simple answer was get the dir of the constants file then parsing over it like so:
global_users = dir(gum)
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if gum.__dict__[item].get("user_name") == user_wo_ent:
print(user_wo_ent, o.get("password"))
else:
print("User was not in global_user_mappings")
I was able to find the answer by doing the following:
def get_user_for_login(application_code='BO'):
user_from_view = BaseServiceTest().select_user_for_login(application_code=application_code)
users = [d['USER'] for d in user_from_view]
user_with_ent = choice(users)
user_wo_ent = user_with_ent[4:]
global_users = dir(gum)
user_dict = {'user_name': '', 'password': ''}
for item in global_users:
o = gum.__dict__[item]
if type(o) is not dict:
continue
if user_wo_ent == o.get("user_name"):
user_dict['user_name'] = user_wo_ent
user_dict['password'] = o.get("password")
return user_dict

Python multiprocessing: Reading a file and updating a dictionary

Lets assume that I have a text file with only 2 rows as follows:
File.txt:
100022441 #DavidBartonWB Guarding Constitution
100022441 RT #frankgaffney 2nd Amendment Guy.
First column is user id and second column is user tweet. I'd like to read the above text file and update the following dictionary:
d={'100022441':{'#frankgaffney': 0, '#DavidBartonWB': 0}}.
Here is my code:
def f(line):
data = line.split('\t')
uid = data[0]
tweet = data[1]
if uid in d.keys():
for gn in d[uid].keys():
if gn in tweet:
return uid, gn, 1
else:
return uid, gn, 0
p = Pool(4)
with open('~/File.txt') as source_file:
for uid, gn, r in p.map(f, source_file):
d[uid][gn] += r
So basically I need to read each line of the file and determine whether the user is in my dictionary, and if it is, whether the tweet contain user's keys in the dictionary (e.g. '#frankgaffney' and '#DavidBartonWB'). So based on the two lines I wrote above, the code should result:
d = {{'100022441':{'#frankgaffney': 1, '#DavidBartonWB': 1 }}
But it gives:
d = {{'100022441':{'#frankgaffney': 1, '#DavidBartonWB': 0 }}
For some reason the code always loses one of the keys for all users. Any idea what is wrong in my code?
Your file is tab delimited, and you are always checking the third column for the mention; it works correctly for the first mention because you are passing in the entire file to the function, not each line. So effectively you are doing this:
>>> s = '100022441\t#DavidBartonWB Guarding Constitution\n100022441\tRT#frankgaffney 2nd Amendment Guy.'
>>> s.split('\t')
['100022441', '#DavidBartonWB Guarding Constitution\n100022441', 'RT#frankgaffney 2nd Amendment Guy.']
I recommend two approaches:
Map your function to each line in the file.
Use regular expressions for a more robust search.
Try this version:
import re
d = {'100022441':{'#frankgaffney': 0, '#DavidBartonWB': 0}}
e = r'(#\w+)'
def parser(line):
key, tweet = line.split('\t')
data = d.get(key)
if data:
mentions = re.findall(e, tweet)
for mention in mentions:
if mention in data.keys():
d[key][mention] += 1
with open('~/File.txt') as f:
for line in f:
parser(line)
print(d)
Once you've confirmed its working correctly, then you can multi-process it:
import itertools, re
from multiprocessing import Process, Manager
def parse(queue, d, m):
while True:
line = queue.get()
if line is None:
return # we are done with this thread
key, tweet = line.split('\t')
data = d.get(key)
e = r'(#\w+)'
if data:
mentions = re.findall(e, tweet)
for mention in mentions:
if mention in data:
if mention not in m:
m[mention] = 1
else:
m[mention] += 1
if __name__ == '__main__':
workers = 2
manager = Manager()
d = manager.dict()
d2 = manager.dict()
d = {'100022441': ['#frankgaffney', '#DavidBartonWB']}
queue = manager.Queue(workers)
worker_pool = []
for i in range(workers):
p = Process(target=parse, args=(queue, d, d2))
p.start()
worker_pool.append(p)
# Fill the queue with data for the workers
with open(r'tweets2.txt') as f:
iters = itertools.chain(f, (None,)*workers)
for line in iters:
queue.put(line)
for p in worker_pool:
p.join()
for i,data in d.iteritems():
print('For ID: {}'.format(i))
for key in data:
print(' {} - {}'.format(key, d2[key]))
second column is data[1], not data[2]
the fact that data[2] works means that you are splitting into words, not columns
if you want to search for the user key as a separate word (as opposed to substring), you need tweet=data[1:]
if you want to search for a substring you need to split into exactly two pieces: uid,tweet=line.split(None,1)

Categories

Resources