pyperclip not working for large texts - python

I am using Python 3.5 on MacOS Sierra. I am working on the course, Automate the Boring Stuff with Python and having a problem with pyperclip. The code (below) works when I copy only 4 lines of the pdf, however when I copy all of the text I get an error message back(below).
Could someone help me? Is it a problem with pyperclip? My code? My computer?
Error message:
Traceback (most recent call last):
File "/Users/ericgolden/Documents/MyPythonScripts/phoneAndEmail.py", line 35, in <module>
text = pyperclip.paste()
File "/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/pyperclip/clipboards.py", line 22, in paste_osx
return stdout.decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd5 in position 79: invalid continuation byte
Here is my code:
#! python3
import re, pyperclip
# Create a regex for phone numbers
phoneRegex = re.compile(r'''
# 415-555-000, 555-0000, (415) 555-0000, 555-000 ext 12345, ext. 12345, x12345
(
((\d\d\d) | (\(\d\d\d\)))? # area code optional
(\s|-) # first separator
\d\d\d # first 3 digits
- # seperator
\d\d\d\d # last 4 digits
(((ext(\.)?\s)|x) # extension word-part optional
(\d{2,5}))? # extension number-part optional
)
''', re.VERBOSE)
# Create a regex for email addresses
emailRegex = re.compile(r'''
# some.+_things#(\d{2,5}))?.com
[a-zA-Z0-9_.+]+ # name part
# # # symbol
[a-zA-Z0-9_.+]+ # domain name part
''', re.VERBOSE)
# Get the text off the clipboard
text = pyperclip.paste()
# TODO: Extract the email/phone from this text
extractedPhone = phoneRegex.findall(text)
extractedEmail = emailRegex.findall(text)
allPhoneNumbers = []
for phoneNumber in extractedPhone:
allPhoneNumbers.append(phoneNumber[0])
# TODO: Copy the extraced email/phone to the clipboard
results = '\n'.join(allPhoneNumbers) + '\n' + '\n'.join(extractedEmail)
pyperclip.copy(results)

can you try to change the import to: import pyperclip, re ?
Also, here is just an example of my code if it helps, since I am using the same book.
#! python3
# phoneAndEmail.py - Finds phone numbers and email addresses on the clipboard.
import pyperclip, re
phoneRegex = re.compile(r'''(
(\d{3}|\(\d{3}\))? # area code
(\s|-|\.)? # separator
(\d{3}) # first 3 digits
(\s|-|\.) # separator
(\d{4}) # last 4 digits
(\s*(ext|x|ext.)\s*(\d{2,5}))? # extension
)''', re.VERBOSE)
# Create email regex.
emailRegex = re.compile(r'''(
[a-zA-Z0-9._%+-]+ # username
# # # symbol
[a-zA-Z0-9.-]+ # domain name
(\.[a-zA-Z]{2,4}) # dot-something
)''', re.VERBOSE)
# Find matches in clipboard text.
text = str(pyperclip.paste())
matches = []
for groups in phoneRegex.findall(text):
phoneNum = '-'.join([groups[1], groups[3], groups[5]])
if groups[8] != '':
phoneNum += ' x' + groups[8]
matches.append(phoneNum)
for groups in emailRegex.findall(text):
matches.append(groups[0])
# Copy results to the clipboard.
if len(matches) > 0:
pyperclip.copy('\n'.join(matches))
print('Copied to clipboard:')
print('\n'.join(matches))
else:
print('No phone numbers or email addresses found.')

Related

Tune a Regex to match a string *only* at the beginning of a message

I have the following code which matches rdar://problem (one or more) in the commit_msg, I only want to match it at the beginning of the message, please note that it could be more than one rdar at the beginning of the message, how can I change the regex to do that?
# -*- coding: utf-8 -*-
import re
commit_msg = """
<rdar://problem/19391231> This is the subject line1
<rdar://problem/11121314> This is the subject line2
[Problem]
The Problem description
[Solution]
This is the Solutions section
[Recommended Tests]
This is the Recommended Tests <rdar://problem/12345678> Text
Change-Id: Ibbafa780adb2502d470f12d0280ddb0049c727c4
Reviewed-on: https://tech-gerrit.sd.company.com/17954
Tested-by: Username1 <username1#company.com>
Build-watchOS: service account <serviceaccount#company.com>
Reviewed-by: username2 <username2#company.com>
"""
m = re.findall("(?!.*(?:Revert|revert))[\S]*(?:rdar:\/\/problem\/)(\d{8,8})", commit_msg)
print m
CURRENT OUTPUT:-
['19391231', '11121314', '12345678']
EXPECTED OUTPUT:-
['19391231', '11121314']
Going off your conversation with #ShadowRanger below, how about this?
import re
commit_msg = """
<rdar://problem/19391231> This is the subject line1
<rdar://problem/11121314> This is the subject line2
[Problem]
The Problem description
[Solution]
This is the Solutions section
[Recommended Tests]
This is the Recommended Tests <rdar://problem/12345678> Text
Change-Id: Ibbafa780adb2502d470f12d0280ddb0049c727c4
Reviewed-on: https://tech-gerrit.sd.company.com/17954
Tested-by: Username1 <username1#company.com>
Build-watchOS: service account <serviceaccount#company.com>
Reviewed-by: username2 <username2#company.com>
"""
m = re.findall("(?!.*(?:Revert|revert))[\S]*(?:rdar:\/\/problem\/)(\d{8,8})", commit_msg.split('[')[0])
print m

Why this code is not working ? Python

The code is working fine when the text in clipboard has no email address or phone number i.e., when expected result is "Nothing Found"
For other case, it is not working. It is showing error -
AttributeError: 'str' object has no attribute 'matches'
#! python3
# contactDetails.py - Finds email and phone number from a page
import pyperclip, re
phoneRegex = re.compile(r'(\+\d{2}-\d{10})') # Phone Number Regex
# email Regex
emailRegex = re.compile(r'''(
[a-zA-Z0-9._]+ # username
# # # symbol
[a-zA-Z0-9._]+ # domain name
(\.[a-zA-Z]{2,4}])# dot-something
)''', re.VERBOSE)
text = str(pyperclip.paste())
matches = []
for groups in phoneRegex.findall(text):
phoneNum=phoneRegex.findall(text)
matches.append(phoneNum)
for groups in emailRegex.findall(text):
matches.append(groups[0])
if len(matches) >0:
pyperclip.copy('\n'.matches)
print('Copied to Clipboard:')
print('\n'.join(matches))
else:
print('Nothing Found')
As was mentioned in the comment by Wiktor Stribiżew, the problem is in this line
pyperclip.copy('\n'.matches)
In particular, it is here
'\n'.matches
The first item '\n' is a string object, and has no property called matches that can be called. What you want is to do a .join as you had done two lines later i.e.
pyperclip.copy('\n'.join(matches))

line.replace is replacing with value even for a portion of key match instead of entire key match

#!/usr/bin/python
import socket
import subprocess
ip=socket.gethostbyname(socket.gethostname())
reps= {'application.baseUrl': 'application.baseUrl="http://'+ip+':9000"',
'baseUrl': 'baseUrl="http://'+ip+':9000"'
}
f = open('/opt/presentation/conf/application.conf','r+')
lines = f.readlines()
f.seek(0)
f.truncate()
for line in lines:
for key in reps.keys():
if key in line:
line = line.replace(line, reps[key])
f.write(line+'\n')
f.close()
issue: it's replacing application.baseUrl with baseUrl="http://'+ip+':9000 instead of application.baseUrl="http://'+ip+':9000 since baseUrl is there in application.baseUrl.
how do i replace a key only if it matches entire string and not portion of string
file name : abc.config
application.baseUrl="http://ip:9000"
baseUrl="http://ip:9000"
remote {
log-received-messages = on
netty.tcp {
hostname = "ip"
port = 9999
send-buffer-size = 512000b
receive-buffer-size = 512000b
maximum-frame-size = 512000b
server-socket-worker-pool {
pool-size-factor = 4.0
pool-size-max = 64
}
client-socket-worker-pool {
pool-size-factor = 4.0
pool-size-max = 64
}
}
}
Since you want an exact matching instead of checking:
if key in line:
you should do:
if key == line[0:len(key)]:
or better yet, as Adam suggested in the comments below:
if line.startswith(key):
You could use regular expressions instead:
re.sub(r"\b((?:application\.)?baseUrl)\b", r"\1=http://{}:9000".format(ip))
This will match application.baseUrl, replacing with application.baseUrl=http://IP_ADDRESS_HERE:9000, and baseUrl, replacing with baseUrl=http://IP_ADDRESS_HERE:9000
Regular expression explained:
re.compile(r"""
\b # a word boundary
( # begin capturing group 1
(?: # begin non-capturing group
application\. # application and a literal dot
)? # end non-capturing group and allow 1 or 0 occurrences
baseUrl # literal baseUrl
) # end capturing group 1
\b # a word boundary""", re.X)
and the replacement
re.compile(r"""
\1 # the contents of capturing group 1
=http:// # literal
{} # these are just brackets for the string formatter
:9000 # literal""".format(ip), re.X)
# resulting in `r"\1=http://" + ip + ":9000"` precisely.

Find strings that begins with a '#' and create link

I want to check whether a string (a tweet) begins with a '#' (i.e. is a hashtag) or not, and if so create a link.
Below is what I've tried so far but it doesn't work (error on the last line).
How can I fix this and will the code work for the purpose?
tag_regex = re.compile(r"""
[\b#\w\w+] # hashtag found!""", re.VERBOSE)
message = raw_message
for tag in tag_regex.findall(raw_message):
message = message.replace(url, '' + message + '')
>>> msg = '#my_tag the rest of my tweet'
>>> re.sub('^#(\w+) (.*)', r'\2', msg)
'the rest of my tweet'
>>>

search and replace text inline in file in Python

I am trying to convert a file which contains ip address in the traditional format to a file which contains ip address in the binary format.
The file contents are as follows.
src-ip{ 192.168.64.54 }
dst-ip{ 192.168.43.87 }
The code I have is as follows.
import re
from decimal import *
filter = open("filter.txt", "r")
output = open("format.txt", "w")
for line in filter:
bytePattern = "([01]?\d\d?|2[0-4]\d|25[0-5])"
regObj = re.compile("\.".join([bytePattern]*4))
for match in regObj.finditer(line):
m1,m2,m3,m4 = match.groups()
line = line.replace((' '.join([bin(256 + int(x))[3:] for x in '123.123.123.123'.split('.')])),bytePattern)
print line
The portion line.replace() does not seem to be working fine. The first parameter to line .replace is working fine.(i.e it is converting the ip address into the binary format)
But line.replace doesn't seem to work. Any help or clues as to why this happens is appreciated.
with open('filter.txt') as filter_:
with open("format.txt", "w") as format:
for line in filter_:
if line != '\n':
ip = line.split()
ip[1] = '.'.join(bin(int(x)+256)[3:] for x in ip[1].split('.'))
ip[4]= '.'.join(bin(int(x)+256)[3:] for x in ip[4].split('.'))
ip = " ".join(ip) + '\n'
format.write(ip)
Why not take advantage of re.sub() instead, to both make your replacements easier and simplify your regex?
import re
from decimal import *
filter = open("filter.txt", "r")
output = open("format.txt", "w")
pattern = re.compile(r'[\d.]+') # Matches any sequence of digits and .'s
def convert_match_to_binary(match)
octets = match.group(0).split('.')
# do something here to convert the octets to a string you want to replace
# this IP with, and store it in new_form
return new_form
for line in filter:
line = pattern.sub(convert_match_to_binary, line)
print line
Your code is very odd:
line = line.replace(
(' '.join([bin(256 + int(x))[3:] for x in '123.123.123.123'.split('.')])),
bytePattern
)
The first argument is a constant that evaluates to '01111011 01111011 01111011 01111011', and bytePattern is the regex "([01]?\d\d?|2[0-4]\d|25[0-5])", so it's effectively this:
line = line.replace('01111011 01111011 01111011 01111011', "([01]?\d\d?|2[0-4]\d|25[0-5])")
This won't do anything if your file doesn't have 01111011 01111011 01111011 01111011 in it.
The .replace() method only replaces literal strings, not regexes.
If it is any help here is my old code from DaniWed IP number conversion between dotnumber string and integer with some error check added.
def ipnumber(ip):
if ip.count('.') != 3:
raise ValueError, 'IP string with wrong number of dots'
ip=[int(ipn) for ipn in ip.rstrip().split('.')]
if any(ipn<0 or ipn>255 for ipn in ip):
raise ValueError, 'IP part of wrong value: %s' % ip
ipn=0
while ip:
ipn=(ipn<<8)+ip.pop(0)
return ipn
def ipstring(ip):
ips=''
for i in range(4):
ip,n=divmod(ip,256)
print n
if (n<0) or (n>255):
raise ValueError, "IP number %i is not valid (%s, %i)." % (ip,ips,n)
ips = str(n)+'.'+ips
return ips[:-1] ## take out extra point
inp = "src-ip{ 192.168.64.544 } dst-ip{ 192.168.43.87 }"
found=' '
while found:
_,found,ip = inp.partition('-ip{ ')
ip,found,inp = ip.partition(' }')
if ip:
print ipnumber(ip)

Categories

Resources