My task is to cout all top senders and top recievers of user's email.
So the plan is to get all user id's, put them in a dictionary, count their amount and print.
I tried this but it doesn't work very well with INBOX label (10 000+ messages):
import base64
import email
import re
import operator
from googleapiclient import errors
from quickstart import service
def find(st):
for i in range(0,len(st)):
tmp = str(st[i])
for j in range(0,len(tmp)):
if tmp[j] == 'T' and tmp[j+1] == 'o' and tmp[j-1] == "'" and tmp[j+2] == "'":
return i
pass
def getTop(n):
try:
if n == 1:
label_ids = "INBOX"
else:
label_ids = "SENT"
user_id = "me"
topers = service.users().labels().get(userId = user_id,id = label_ids).execute()
count = topers['messagesTotal']
print(count)
topers = service.users().messages().list(userId = user_id, labelIds = label_ids).execute()
arrId = []
for i in range(0,count):
arrId.append(topers['messages'][i]['id'])
st = []
for i in range(0,count):
message = service.users().messages().get(userId=user_id,
id=arrId[i],
format = 'metadata').execute()
head = message['payload']['headers']
index = find(head)
obval = head[index]['value']
tmp = str(obval)
tmp =tmp.split('<', 1)[-1]
tmp = tmp.replace('>',"")
st.append(tmp)
cnt = 0
mvalues = {}
for mail in st:
if not mail in mvalues:
mvalues[mail] = 1
else:
mvalues[mail]+= 1
sorted_values = sorted(mvalues.items(),key= operator.itemgetter(1))
ln = len(sorted_values)
for j in range(1,6):
print(sorted_values[-j])
pass
except errors.HttpError as error:
print('An error occurred: %s' % error)
My question is: what is the fastest and the most correct way to get all these user emails?
If I have a lot of messages, using a while and make a request every time is not the best way I guess. I'm trying to figure this out for about 4 days. Help
Related
My ideia is to find every email in a sentence and replace it for a different random email (anonymization). But I can't get the result I want. Every email is replaced for the same one or I get an error (list index out of range)
input:
email = "daniel#hotmail.com sent it to ana#gmail.com"
output I want
email = "albert#hotmail.com sent it to john#gmail.com"
random_emails = ["albert", "john", "mary"]
def find_email(email: str):
result = email
i = 0
email_address = r"\S+#"
for text in email:
result = re.sub(email_address, random_emails[i] + "#", result)
i += 1
return result
print(find_email(email))
I found a solution, but note that identical emails will be anonymized in the same way. I let you try this :
import re
email = "daniel#hotmail.com sent it to ana#gmail.com"
random_emails = ["albert", "john", "mary"]
def find_email(email: str):
result = email
i = 0
email_address = r"\S+#"
regex_matches = re.findall(email_address, email)
for match in regex_matches:
result = result.replace(match, random_emails[i] + "#")
i += 1
return result
print(find_email(email))
You dont need for loop, and I think your RegExr can be improved
def find_email(email):
result = email
email_address = r"(\w+#)(\w+.* )(\w+#)(\w+.*)"
a='AAAAA#'
b='BBBBB#'
result = re.sub(email_address, rf'{a}\2{b}\4', result)
return result
email = "daniel#hotmail.com sent it to ana#gmail.com"
print(find_email(email))
Explaining:
You can create substitution groups:
1º = 1º email 2º = server and texts 3º = 2º email 4º = server.com
And now, you just need to replace \1 and \2 with everythink you want
example2: Your new routine
import re
from random import seed
from random import randint
random_emails = ["albert", "john", "mary"]
def find_email(email):
result = email
email_address = r"(\w+#)(\w+.* )(\w+#)(\w+.*)"
first = randint(0, 2)
second = randint(0, 2)
while first == second:
second = randint(0, 2)
result = re.sub(email_address, rf'{random_emails[first]}#\2{random_emails[second]}#\4', result)
return result
email = "daniel#hotmail.com sent it to ana#gmail.com"
print(find_email(email))
I used random to generate an random number to got emails from list.
And "while first == second:" just to not repeat first and second
emails
I'm extracting data using Spotify API wrapper. The access token (which is global variable) is valid for only 1 hour so I need to update it during the for loop in some defined function. I tried to update it using try/except, but I got the following error:
UnboundLocalError: local variable 'spotify' referenced before assignment.
Here is the relevant code:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
...
def main():
...
df_af = generate_audio_features_df(track_ids)
...
def generate_audio_features_df(track_ids):
col_list = ['id', 'danceability']
result = []
count = 0
for j in track_ids:
try:
r = spotify.audio_features(j)[0]
features_list = [r['id'], r['danceability']]
result.append(features_list)
#display progress
count += 1
print("Added ", count, " track")
except spotipy.client.SpotifyException:
token = credentials.get_access_token()
spotify = spotipy.Spotify(auth = token)
df = pd.DataFrame(data = result, columns = col_list)
return df
if __name__ == "__init__":
main()
I would like the code to update a token and get back to the loop.
I've been using python for a little while and have made some improvements but this a new error to me. I'm trying to learn social media analysis for my career and that's why I am trying out this set of code here.
I've de bugged one error but this one, which appears at line 81, has got me stumped as I can't see why the function "def get_user_objects(follower_ids):" returns none and what i'd need to change it in accordance with previous advice on other questions here.
Here's script to that point for simplicity. All help appreciated.
The error, to repeat is TypeError: object of type 'NoneType' has no len()
from tweepy import OAuthHandler
from tweepy import API
from collections import Counter
from datetime import datetime, date, time, timedelta
import sys
import json
import os
import io
import re
import time
# Helper functions to load and save intermediate steps
def save_json(variable, filename):
with io.open(filename, "w", encoding="utf-8") as f:
f.write(str(json.dumps(variable, indent=4, ensure_ascii=False)))
def load_json(filename):
ret = None
if os.path.exists(filename):
try:
with io.open(filename, "r", encoding="utf-8") as f:
ret = json.load(f)
except:
pass
return ret
def try_load_or_process(filename, processor_fn, function_arg):
load_fn = None
save_fn = None
if filename.endswith("json"):
load_fn = load_json
save_fn = save_json
else:
load_fn = load_bin
save_fn = save_bin
if os.path.exists(filename):
print("Loading " + filename)
return load_fn(filename)
else:
ret = processor_fn(function_arg)
print("Saving " + filename)
save_fn(ret, filename)
return ret
# Some helper functions to convert between different time formats and
perform date calculations
def twitter_time_to_object(time_string):
twitter_format = "%a %b %d %H:%M:%S %Y"
match_expression = "^(.+)\s(\+[0-9][0-9][0-9][0-9])\s([0-9][0-9][0-9]
[09])$"
match = re.search(match_expression, time_string)
if match is not None:
first_bit = match.group(1)
second_bit = match.group(2)
last_bit = match.group(3)
new_string = first_bit + " " + last_bit
date_object = datetime.strptime(new_string, twitter_format)
return date_object
def time_object_to_unix(time_object):
return int(time_object.strftime("%s"))
def twitter_time_to_unix(time_string):
return time_object_to_unix(twitter_time_to_object(time_string))
def seconds_since_twitter_time(time_string):
input_time_unix = int(twitter_time_to_unix(time_string))
current_time_unix = int(get_utc_unix_time())
return current_time_unix - input_time_unix
def get_utc_unix_time():
dts = datetime.utcnow()
return time.mktime(dts.timetuple())
# Get a list of follower ids for the target account
def get_follower_ids(target):
return auth_api.followers_ids(target)
# Twitter API allows us to batch query 100 accounts at a time
# So we'll create batches of 100 follower ids and gather Twitter User
objects for each batch
def get_user_objects(follower_ids):
batch_len = 100
num_batches = len(follower_ids)/100
batches = (follower_ids[i:i+batch_len] for i in range(0,
len(follower_ids), batch_len))
all_data = []
for batch_count, batch in enumerate(batches):
sys.stdout.write("\r")
sys.stdout.flush()
sys.stdout.write("Fetching batch: " + str(batch_count) + "/" +
str(num_batches))
sys.stdout.flush()
users_list = auth_api.lookup_users(user_ids=batch)
users_json = (map(lambda t: t._json, users_list))
all_data += users_json
return all_data
# Creates one week length ranges and finds items that fit into those range
boundaries
def make_ranges(user_data, num_ranges=20):
range_max = 604800 * num_ranges
range_step = range_max/num_ranges
# We create ranges and labels first and then iterate these when going
through the whole list
# of user data, to speed things up
ranges = {}
labels = {}
for x in range(num_ranges):
start_range = x * range_step
end_range = x * range_step + range_step
label = "%02d" % x + " - " + "%02d" % (x+1) + " weeks"
labels[label] = []
ranges[label] = {}
ranges[label]["start"] = start_range
ranges[label]["end"] = end_range
for user in user_data:
if "created_at" in user:
account_age = seconds_since_twitter_time(user["created_at"])
for label, timestamps in ranges.iteritems():
if account_age > timestamps["start"] and account_age <
timestamps["end"]:
entry = {}
id_str = user["id_str"]
entry[id_str] = {}
fields = ["screen_name", "name", "created_at",
"friends_count", "followers_count", "favourites_count", "statuses_count"]
for f in fields:
if f in user:
entry[id_str][f] = user[f]
labels[label].append(entry)
return labels
if __name__ == "__main__":
account_list = []
if (len(sys.argv) > 1):
account_list = sys.argv[1:]
if len(account_list) < 1:
print("No parameters supplied. Exiting.")
sys.exit(0)
consumer_key="XXXXXXX"
consumer_secret="XXXXXX"
access_token="XXXXXXX"
access_token_secret="XXXXXXXX"
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
auth_api = API(auth)
for target in account_list:
print("Processing target: " + target)
# Get a list of Twitter ids for followers of target account and save it
filename = target + "_follower_ids.json"
follower_ids = try_load_or_process(filename, get_follower_ids,
target)
# Fetch Twitter User objects from each Twitter id found and save the data
filename = target + "_followers.json"
user_objects = try_load_or_process(filename, get_user_objects,
follower_ids)
total_objects = len(user_objects)
# Record a few details about each account that falls between specified age
ranges
ranges = make_ranges(user_objects)
filename = target + "_ranges.json"
save_json(ranges, filename)
# Print a few summaries
print
print("\t\tFollower age ranges")
print("\t\t===================")
total = 0
following_counter = Counter()
for label, entries in sorted(ranges.iteritems()):
print("\t\t" + str(len(entries)) + " accounts were created
within " + label)
total += len(entries)
for entry in entries:
for id_str, values in entry.iteritems():
if "friends_count" in values:
following_counter[values["friends_count"]] += 1
print("\t\tTotal: " + str(total) + "/" + str(total_objects))
print
print("\t\tMost common friends counts")
print("\t\t==========================")
total = 0
for num, count in following_counter.most_common(20):
total += count
print("\t\t" + str(count) + " accounts are following " +
str(num) + " accounts")
print("\t\tTotal: " + str(total) + "/" + str(total_objects))
print
print
The immediate problem is in load_json: you assume its return value is a list or dict, or something that can be passed to len. However, it can return None in a number of circumstances:
The file to read from isn't found
There is some error reading from the file
There is a problem decoding the contents of the file
The file contains just the JSON value null.
At no point after you call load_json do you check its return value.
Worse, you catch and ignore any exception that might occur in load_json, causing it to silently return None with no indication that something went wrong.
The function would be better written like
def load_json(filename):
with io.open(filename, "r", encoding="utf-8") as f:
return json.load(f)
At least now, any errors will raise an uncaught exception, making it more obvious that there was a problem and providing a clue as to what the problem was. The golden rule of exception handling is to only catch the exceptions you can do something about, and if you can't do anything about a caught exception, re-raise it.
You could check for the resultant value and follow accordingly:
# Fetch Twitter User objects from each Twitter id found and save the data
filename = target + "_followers.json"
res_get_user_objects = get_user_objects()
if res_get_user_objects is not None:
user_objects = try_load_or_process(filename, get_user_objects,
follower_ids)
total_objects = len(user_objects)
else:
# handle it otherwise
I have a question about SAP silent logon which I implemented using win32com this way
from win32com.client import Dispatch
R3 = Dispatch("SAP.Functions")
R3.Conn.System = 'xxx'
R3.Conn.Client = '100'
# other values needed to pass to R3.Conn
R3.Conn.logon #here is the problem
In VB i can use R3.Conn.Logon(1, True) to make logon siliencely. But in Python Logon seems not to be a method and do not allow me to pass parameters to it.
I tried using R3.Conn.Logon(1, True) in Python, but it returned an error
Logon was not callable.
How should I call silent logon in Python?
Thanks
This works for me.
Still experimenting, I want to add field selection and of course a filter to the RFC_READ_TABLE. But the connection works.
from win32com.client import Dispatch
Functions = Dispatch("SAP.Functions")
Functions.Connection.Client = "000"
Functions.Connection.ApplicationServer = "your server"
Functions.Connection.Language = "EN"
Functions.Connection.User = "you"
Functions.Connection.Password = "your pass"
Functions.Connection.SystemNumber = "00"
Functions.Connection.UseSAPLogonIni = False
if (Functions.Connection.Logon (0,True) == True):
print("Logon OK")
RfcCallTransaction = Functions.Add("RFC_READ_TABLE")
strExport1 = RfcCallTransaction.exports("QUERY_TABLE")
strExport2 = RfcCallTransaction.exports("DELIMITER")
strExport3 = RfcCallTransaction.exports("ROWSKIPS")
strExport4 = RfcCallTransaction.exports("ROWCOUNT")
tblOptions = RfcCallTransaction.Tables("OPTIONS")
#RETURNED DATA
tblData = RfcCallTransaction.Tables("DATA")
tblFields = RfcCallTransaction.Tables("FIELDS")
strExport1.Value = 'AGR_DEFINE'
strExport2.Value = ";"
strExport3.Value = 0
strExport4.Value = 10
if RfcCallTransaction.Call == True:
print ("Function call successful")
#print (tblData.RowCount)
j = 1
while j < tblData.RowCount:
print (tblData(j,"WA"))
j = j + 1
I am using Blockchain.info's API to send multiple payments. I believe I have everything how it should be however when I run the code I get the following Error: RuntimeError: ERROR: Invalid Recipients JSON. Please make sure it is url encoded and consult the docs. The docs can be found here: https://blockchain.info/api/blockchain_wallet_api
The Python library I am using can be found here: https://github.com/p4u/blockchain.py/blob/master/blockchain.py
The only other post on this issue is posted by the original creator of the library, he said the problem was that the amounts cannot be a decimal, mine are not however.Post can be found here: https://bitcointalk.org/index.php?topic=600870.0
Here is my code:
from __future__ import print_function
from itertools import islice, imap
import csv, requests, json, math
from collections import defaultdict
import requests
import urllib
import json
from os.path import expanduser
import configparser
class Wallet:
guid = 'g'
isAccount = 0
isKey = 0
password1 = 'x'
password2 = 'y'
url = ''
def __init__(self, guid = 'g', password1 = 'x', password2 = 'y'):
if guid.count('-') > 0:
self.isAccount = 1
if password1 == '': # wallet guid's contain -
raise ValueError('No password with guid.')
else:
self.isKey = 1
self.guid = guid
self.url = 'https://blockchain.info/merchant/' + guid + '/'
self.password1 = password1
self.password2 = password2
r = requests.get('http://api.blockcypher.com/v1/btc/main/addrs/A/balance')
balance = r.json()['balance']
with open("Entries#x1.csv") as f,open("winningnumbers.csv") as nums:
nums = set(imap(str.rstrip, nums))
r = csv.reader(f)
results = defaultdict(list)
for row in r:
results[sum(n in nums for n in islice(row, 1, None))].append(row[0])
self.number_matched_0 = results[0]
self.number_matched_1 = results[1]
self.number_matched_2 = results[2]
self.number_matched_3 = results[3]
self.number_matched_4 = results[4]
self.number_matched_5 = results[5]
self.number_matched_5_json = json.dumps(self.number_matched_5, sort_keys = True, indent = 4)
print(self.number_matched_5_json)
if len(self.number_matched_3) == 0:
print('Nobody matched 3 numbers')
else:
self.tx_amount_3 = int((balance*0.001)/ len(self.number_matched_3))
if len(self.number_matched_4) == 0:
print('Nobody matched 4 numbers')
else:
self.tx_amount_4 = int((balance*0.1)/ len(self.number_matched_4))
if len(self.number_matched_5) == 0:
print('Nobody matched 3 numbers')
else:
self.tx_amount_5 = int((balance*0.4)/ len(self.number_matched_5))
self.d = {el: self.tx_amount_5 for el in json.loads(self.number_matched_5_json)}
print(self.d)
self.d_url_enc = urllib.urlencode(self.d)
def Call(self, method, data = {}):
if self.password1 != '':
data['password'] = self.password1
if self.password2 != '':
data['second_password'] = self.password2
response = requests.post(self.url + method,params=data)
json = response.json()
if 'error' in json:
raise RuntimeError('ERROR: ' + json['error'])
return json
def SendPayment(self, toaddr, amount, fromaddr = 'A', shared = 0, fee = 0.0001, note = True):
data = {}
data['to'] = toaddr
data['amount'] = self.tx_amount_5
data['fee'] = fee
data['recipients'] = self.d_url_enc
if fromaddr:
data['from'] = fromaddr
if shared:
data['shared'] = 'true'
if note:
data['note'] = 'n'
response = self.Call('payment',data)
def SendManyPayment(self, fromaddr = True, shared = False, fee = 0.0001, note = True):
data = {}
recipients = self.d_url_enc
data['recipients'] = recipients.__str__().replace("'",'"')
data['fee'] = str(fee)
if fromaddr:
data['from'] = 'A'
if shared:
data['shared'] = 'true'
else:
data['shared'] = 'false'
if note:
data['note'] = 'n'
response = self.Call('sendmany',data)
return response
print(Wallet().SendManyPayment())
Complete runtime error: Traceback (most recent call last):
File "D:\Documents\B\Code\A\jsontest.py", line 125, in <module>
print(Wallet().SendManyPayment())
File "D:\Documents\B\Code\A\jsontest.py", line 121, in SendManyPayment
response = self.Call('sendmany',data)
File "D:\Documents\B\Code\A\jsontest.py", line 86, in Call
raise RuntimeError('ERROR: ' + json['error'])
RuntimeError: ERROR: Invalid Recipients JSON. Please make sure it is url encoded and consult the docs.
What does data['recipients'] contain inside of your SendManyPayment() function? It looks like you are trying to do some manual encoding instead of using json.dumps(recipients)
The docs say it should look like this:
{
"1JzSZFs2DQke2B3S4pBxaNaMzzVZaG4Cqh": 100000000,
"12Cf6nCcRtKERh9cQm3Z29c9MWvQuFSxvT": 1500000000,
"1dice6YgEVBf88erBFra9BHf6ZMoyvG88": 200000000
}
Try this out for send many:
def SendManyPayment(self, fromaddr = True, shared = False, fee = 0.0001, note = True):
data = {}
recipients = self.d_url_enc
# recipients should be a json string FIRST!
data['recipients'] = json.dumps(recipients)
data['fee'] = str(fee)
if fromaddr:
data['from'] = 'A'
if shared:
data['shared'] = 'true'
else:
data['shared'] = 'false'
if note:
data['note'] = 'n'
response = self.Call('sendmany',data)
return response