I created the next part of the program to extract specific attatchment from my active outlook:
import win32com.client as win32
import win32com
from tabulate import tabulate
import os
def findFolder(folderName, searchIn):
try:
lowerAccount = searchIn.Folders
for x in lowerAccount:
if x.Name == folderName:
objective = x
return objective
return None
except Exception as error:
print("Looks like we had an issue accessing the searchIn object")
print(error)
return None
outlook = win32com.client.Dispatch("Outlook.Application")
ons = outlook.GetNamespace("MAPI")
one = 'email#email.nl'
Folder1 = findFolder(one, ons)
inbox = findFolder('Postvak IN', Folder1)
messages=inbox.Items
ma = [["Subject", "Sender", "Attachment", "Saved?"]]
PathName = "C:\\Temp\\CV_BU_SQ"
os.chdir(PathName)
for msg in messages:
if msg.Class == 43 and "TLS NL_Commvault" in msg.SenderName and len(msg.Attachments) == 1:
CV_file = str(msg.Attachments.Item(1))
CV_pf = os.path.join(os.getcwd() + '\\' + CV_file)
res = "Yes!"
try:
msg.Attachments.SaveAsFile(os.getcwd() + '\\' + CV_file)
except Exception as e:
res = "No, error: " + str(e)
ma.append([msg.Subject[:30], msg.SenderName[:30], CV_file, res])
print(tabulate(ma,headers="firstrow"))
The output is:
Subject Sender Attachment Saved?
------------------------- ---------------- ---------------------------------------------------- -------------------------------
Backup Job Summary Report TLS NL_Commvault Commvault***DagelijkseBackup_2021-07-23-08-00-13.csv No, error: <unknown>.SaveAsFile
or raw error:
Traceback (most recent call last):
File "C:/Users/id983857/PycharmProjects/CheckCVMail/main.py", line 37, in <module>
msg.Attachments.SaveAsFile(os.getcwd() + '\\' + CV_file)
File "C:\Users\id983857\CheckCVMail\lib\site-packages\win32com\client\dynamic.py", line 527, in __getattr__
raise AttributeError("%s.%s" % (self._username_, attr))
AttributeError: <unknown>.SaveAsFile
This outlook environment is based on Office 365, enterprise edition ...
Any e-mail extraction software demo-edition can not be tested, as this is an enterprise edition.
I do NOT need a token to read my mail per O365 or HTML, just my user account.
I don't have access to an non-enterprise O365.
I want to know what could be the reason for this error?
Any value to the SaveAsFile(...) results in the same error.
I hope somebody has a idea how to fix this.
Change the line
CV_file = str(msg.Attachments.Item(1))
to
CV_file = msg.Attachments.Item(1).FileName
and
msg.Attachments.SaveAsFile(os.getcwd() + '\\' + CV_file)
to
msg.Attachments.Item(1).SaveAsFile(os.getcwd() + '\\' + CV_file)
The Attachments property returns a collection of attachments. If you need to access the first attachment from the collection you can use the Item method:
msg.Attachments.Item(1)
I am iterating through a list of urls from a csv file trying to locate their sitemaps, however, I am getting a weird leading space issue that's causing an error to occur when requests processes each url. I'm trying to figure out what's causing this space to be generated and what type of space it is. I believe something funky is happening with strip() because I can get this to run fine when copying and pasting a url into requests. I am just not sure what type of space this is and what's causing it to occur.
Wondering if anyone else is having or had this issue?
So far I have tried to solve using the following methods:
replace()
"".join(split())
regex
Here is my code:
with open('links.csv') as f:
for line in f:
strdomain = line.strip()
if strdomain:
domain = strdomain
fix_domain = domain.replace('https://', '').replace('www', '').replace('/', '').replace('.', '').replace(' ', '')
ofile = fix_domain + '.txt' # args.ofile
domain_rem = domain
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
url_info = find_sitemap.parse_sitemap(map)
print("Found {0} urls".format(len(url_info)))
new_urls = []
for u in url_info:
new_urls.append(u)
print(u)
links.csv look like the following with just one column:
https://site1.com/
https://site2.com/
https://site3.com/
I printed domain and strdomain and even added the word "this" next to the variable domain so you can see the space being produced clearly:
Here is the error I receive in full when running (you will notice there is no leading space within the url after I've copied and pasted from the terminal into here however I provide an image of my terminal below so you can see it):
Traceback (most recent call last):
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 358, in <module>
main()
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/map_website.py", line 318, in main
map = find_sitemap.get_sitemap(domain_rem+"sitemap.xml")
File "/Users/natehurwitz/PROJECTS/axis/axis/apps/axisDataFinder/find_sitemap.py", line 5, in get_sitemap
get_url = requests.get(url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 72, in get
return request('get', url, params=params, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 522, in request
resp = self.send(prep, **send_kwargs)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 636, in send
adapter = self.get_adapter(url=request.url)
File "/Users/natehurwitz/Library/Caches/pypoetry/virtualenvs/axis-eSvach19-py3.9/lib/python3.9/site-packages/requests/sessions.py", line 727, in get_adapter
raise InvalidSchema("No connection adapters were found for '%s'" % url)
requests.exceptions.InvalidSchema: No connection adapters were found for 'https://blkgrn.com/sitemap.xml'
Here is where you can see the leading space that occurs
Here is the code for "find_sitemap.py":
from bs4 import BeautifulSoup
import requests
def get_sitemap(url):
get_url = requests.get(url)
if get_url.status_code == 200:
return get_url.text
else:
print ('Unable to fetch sitemap: %s.' % url)
def process_sitemap(s):
soup = BeautifulSoup(s, "lxml")
result = []
for loc in soup.findAll('loc'):
item = {}
item['loc'] = loc.text
item['tag'] = loc.parent.name
if loc.parent.lastmod is not None:
item['lastmod'] = loc.parent.lastmod.text
if loc.parent.changeFreq is not None:
item['changeFreq'] = loc.parent.changeFreq.text
if loc.parent.priority is not None:
item['priority'] = loc.parent.priority.text
result.append(item)
return result
def is_sub_sitemap(s):
if s['loc'].endswith('.xml') and s['tag'] == 'sitemap':
return True
else:
return False
def parse_sitemap(s):
sitemap = process_sitemap(s)
result = []
while sitemap:
candidate = sitemap.pop()
if is_sub_sitemap(candidate):
sub_sitemap = get_sitemap(candidate['loc'])
for i in process_sitemap(sub_sitemap):
sitemap.append(i)
else:
result.append(candidate)
return result
I am working with a star wars API from http://swapi.co/api/. I can connect to it just fine and my project is coming along fine. However, I am running into the following error message: IndexError: list index out of range error. Looking at other stack overflow questions it appears that this could be an off by one error. I am not sure how to fix it in regards to my Program. Here is the code:
url = ('http://swapi.co/api/' + str(view))
#Setting up a get request to pull the data from the URL
r = requests.get(url)
if r.status_code == 200:
print("status_code", r.status_code)
else:
print("Sorry it appears your connection failed!")
#Storing the API response in a variable
response_dict = r.json()
print("There are currently", response_dict['count'], "items to view")
repo_dicts = response_dict['results']
num = 0
while num < response_dict['count']:
if view == 'films':
repo_dict = repo_dicts[num]['title']
print(str(num) + " " + repo_dict)
elif view == 'starships':
repo_dict = repo_dicts[num]['name']
print(str(num) + " " + repo_dict)
num += 1
Now the line that is giving me the problem is in that elif view == 'starships' area. Actually if one goes to the API you can see certain categories like films, people, starships etc. All of the categories, except films, have greater than 10 things in them. I also notice that if I go to http://swapi.co/api/starships/4/ there will be no detail found. Could the fact that some of the categories have no data be causing my problem? Thank you for any insight!!
Here is the traceback error message:
Traceback (most recent call last):
File "main.py", line 102, in <module>
main()
File "main.py", line 98, in main
began()
File "main.py", line 87, in began
connect(view)
File "main.py", line 31, in connect
repo_dict = repo_dicts[num]['name']
IndexError: list index out of range
Iterate through results you have using foreach loop like this:
for item in repo_dicts:
if view == 'films':
repo_dict = item['title']
print(str(num) + " " + repo_dict)
elif view == 'starships':
repo_dict = item['name']
print(str(num) + " " + repo_dict)
Reason is because api returns 10 items in response_dict['results'] but response_dict['count'] is 37. Consult api documentation on why this happens. My guess this is possible pagination happening.
I'm trying to build a simple Python script to count how many notes each user has entered in a Highrise CRM system, in the last 365 days. I have created a script that works for a tiny data set (a Highrise system with only 10 notes), but it times out on larger data sets (I assume because my script is horribly inefficient due to my lack of Python skills).
I am working on this, using Nitrous.io for the environment, using Python 3.3.
I'm using the Highton wrapper for the Highrise API calls (I haven't figured out how to read the API key in from a file successfully, but I can get it to work by typing the API key and username in directly -- tips here would be useful, but my big focus is getting the script to run on a production-size Highrise environment.)
Can anyone offer recommendations on how to do this more elegantly/correctly?
My Python script is:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
#initialize Highrise instance
#keyfile = open('highrisekeys.txt', 'r')
#highrise_key = keyfile.readline()
#highrise_user = keyfile.readline()
#print('api key = ', api_key, 'user = ', api_user)
high = Highton(
api_key = 'THIS_IS_A_SECRET',
user = 'SECRET'
)
users = high.get_users()
#print('users is type: ', type(users))
#for user in users:
# print('Users: ', user.name)
people = high.get_people()
#print('people is type: ', type(people))
notes = []
tmp_notes = []
for person in people:
#print('Person: ', person.first_name, person.last_name)
#person_highrise_id = person.highrise_id
#print(person.last_name)
tmp_notes = high.get_person_notes(person.highrise_id)
if (type(tmp_notes) is list):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
#print('Notes is type ', type(notes), ' for ', person.first_name, ' ', person.last_name)
#print('total number of notes is ', len(notes))
for user in users:
#print(user.name, ' has ', notes.author_id.count(user.highrise_id), ' activities')
counter = 0
for note in notes:
if (note.author_id == user.highrise_id) and (note.created_at > datetime.utcnow() + timedelta(days = -365)):
counter += 1
print(user.name, ' has performed ', counter, ' activities')
The error message I got was:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): socket.gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 370, in send
timeout=timeout File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 597, in urlopen
_stacktrace=sys.exc_info()[2]) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/retry.py", line 245, in increment
raise six.reraise(type(error), error, _stacktrace) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/packages/six.py", line 309, in reraise
raise value.with_traceback(tb) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "highrise-analysis.py", line 35, in <module>
tmp_notes = high.get_person_notes(person.highrise_id) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 436, in get_person_notes
return self._get_notes(subject_id, 'people') File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 433, in _get_notes
highrise_type, subject_id)), Note) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 115, in _get_data
content = self._get_request(endpoint, params).content File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 44, in _get_request
params=params, File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 69, in get
return request('get', url, params=params, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 415, in send
raise ConnectionError(err, request=request) requests.exceptions.ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
Problem Solved: The Highrise API is rate limited to 500 requests per 10 second period from the same IP address for the same account, which I was exceeding when extracting the data. To resolve this, I added a time.sleep(.5) command to pause between each note data-pull per person, to avoid crossing that rate limit threshold.
In addition, I broke the code into 2 separate functions:
1. Extract the users, people, and notes data and store them as local files with pickle, so I didn't need to pull the data each time I wanted to do some analysis
2. Perform analysis on the extracted pickle files
I also needed to add a try / except KeyError conditional, as some notes were created by Highrise users who are no longer active (people who left the company)
Here's revised code:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
import time
import pickle
# ===================================================================
def Create_Notes_Backup(highrise_key, highrise_user, notesfile, userfile, peoplefile, trailing_days = 365):
# Function to create new Notes backup file of Highrise instance (this can take a while)
print('Entered Create_Notes_Backup function')
high = Highton(api_key = highrise_key, user = highrise_user) # Connect to API
print('Connected to Highrise')
users = high.get_users()
print('Pulled ', len(users), ' users')
people = high.get_people()
print('Pulled ', len(people), ' people')
notes = []
tmp_notes = []
print('Started creating notes array')
for person in people:
tmp_notes = high.get_person_notes(person.highrise_id)
time.sleep(.5) # Pause per API limits https://github.com/basecamp/highrise-api
if (type(tmp_notes) is list):
print('Pulled ', len(tmp_notes), ' notes for ', person.first_name, ' ', person.last_name)
if tmp_notes[0].created_at > datetime.utcnow() + timedelta(days = -trailing_days):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
print('Finished creating notes array')
# Final Step: Export lists into pickle files
with open(notesfile, 'wb') as f:
pickle.dump(notes, f)
with open(userfile, 'wb') as g:
pickle.dump(users, g)
with open(peoplefile, 'wb') as h:
pickle.dump(people, h)
print('Exported lists to *.bak files')
# ===================================================================
def Analyze_Notes_Backup(notesfile, userfile, peoplefile, trailing_days = 365):
# Function to analyze notes backup:
# 1. Count number of activities in last trailing_days days
# 2. Identify date of last note update
print('Entered Analyze_Notes_Backup function')
notes = []
users = []
people = []
# Load the lists
with open(notesfile, 'rb') as a:
notes = pickle.load(a)
with open(userfile, 'rb') as b:
users = pickle.load(b)
with open(peoplefile, 'rb') as c:
people = pickle.load(c)
# Start counting
user_activity_count = {}
last_user_update = {}
for user in users:
user_activity_count[user.highrise_id] = 0
last_user_update[user.highrise_id] = date(1901, 1, 1)
print('Started counting user activity by note')
for note in notes:
if note.created_at > datetime.utcnow() + timedelta(days = -trailing_days):
#print('Note created ', note.created_at, ' by ', note.author_id, ' regarding ', note.body)
try:
user_activity_count[note.author_id] += 1
except KeyError:
print('User no longer exists')
try:
if (note.created_at.date() > last_user_update[note.author_id]):
last_user_update[note.author_id] = note.created_at.date()
except KeyError:
print('...')
print('Finished counting user activity by note')
print('=======================================')
f = open('highrise-analysis-output.txt', 'w')
f.write('Report run on ')
f.write(str(date.today()))
f.write('\n Highrise People Count: ')
f.write(str(len(people)))
f.write('\n ============================ \n')
for user in users:
print(user.name, ' has performed ', user_activity_count[user.highrise_id], ' activities')
f.write(str.join(' ', (user.name, ', ', str(user_activity_count[user.highrise_id]))))
if last_user_update[user.highrise_id] == date(1901, 1, 1):
print(user.name, ' has not updated Highrise in the last 365 days')
f.write(str.join(' ', (', NO_UPDATES\n')))
else:
print(user.name, ' last updated Highrise ', last_user_update[user.highrise_id])
f.write(str.join(' ', (', ', str(last_user_update[user.highrise_id]), '\n')))
all_done = time.time()
f.close
# ===================================================================
if __name__ == "__main__":
trailing_days = 365 # Number of days back to monitor
# Production Environment Analysis
Create_Notes_Backup(MY_API_KEY, MY_HIGHRISE_USERID, 'highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365) # Production Environment
Analyze_Notes_Backup('highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365)
Mike,
What you are doing is going through all the users, and for each one then going through all of the notes. Once you have the user there should be a way to query for just the notes that belong to that user. You probably can include the date range in the query and just do a .count to see how many records match.
If you can't search notes by user, then go through the notes once and store the userId and the sum of that users notes that match your criteria in a dictionary. Then you can match up the userid's with the users table.
Good luck
I have a simple script using requests to validate a list of emails. Relevant code:
def ___process_email(email, output_file=None):
profile = request(email)
if profile and profile.success != 'nothing_useful':
logger.info('Found match for {0}'.format(email))
print(profile)
if output_file:
output_file.write(str(profile) + '\n')
else:
print("No information found\n")
This ran through 5 loops successfully then threw:
Traceback (most recent call last):
File "app.py", line 147, in <module> main()
File "app.py", line 141, in main ___process_email(arg, output)
File "app.py", line 107, in ___process_email if profile and profile.success != 'nothing_useful':
AttributeError: 'unicode' object has no attribute 'success'
Here's the model:
class Profile(object):
def __init__(self, person):
if person:
self.name = person.get('name')
self.jobinfo = [
(occupation.get('job_title'), occupation.get('company'))
for occupation in person.get('occupations', [])
]
self.memberships = [
(membership.get('site_name'), membership.get('profile_url'))
for membership in person.get('memberships', [])
]
self.success = person.get('success')
def __str__(self):
return dedent("""
Name: {0}
{1}
{2}
""").format(
self.name,
"\n".join(
"{0} {1}".format(title, company)
for title, company in self.jobinfo),
"\n".join(
"\t{0} {1}".format(site_name, url)
for site_name, url in self.memberships)
)
Request:
import requests
def request(email):
status_url = STATUS_URL.format(email)
response = requests.get(status_url).json()
session_token = response.get('session_token')
# fail gracefully if there is an error
if 'error' in response:
return response['error']
elif response['status'] == 200 and session_token:
logger.debug('Session token: {0}'.format(session_token))
url = URL.format(email)
headers = {'X-Session-Token': session_token}
response = requests.get(url, headers=headers).json()
if response.get('success') != 'nothing_useful':
return Profile(response.get('contact'))
return {}
Anyone see why my strings are unicode? thanks
If there is an error in the response, you return the error string:
if 'error' in response:
return response['error']
That's your unicode value there. Note that the same function returns either the 'error' value, a new Profile() instance, or an empty dictionary. You may want to make this more consistent, return only Profile() istances and None instead.
Instead of the error string, raise an exception and handle the exception in your ___process_email method:
class EmailValidationError(Exception):
pass
and in your request() function:
if 'error' in response:
raise EmailValidationError(response['error'])
then handle this in __process_email() with something like:
try:
profile = request(email)
if profile and profile.success != 'nothing_useful':
logger.info('Found match for {0}'.format(email))
print(profile)
if output_file:
output_file.write(str(profile) + '\n')
else:
print("No information found\n")
except EmailValidationError:
# Do something here