How to take an element after a re.compile? - python

Using the "re" i compile the datas of a handshake like this:
piece_request_handshake = re.compile('13426974546f7272656e742070726f746f636f6c(?P<reserved>\w{16})(?P<info_hash>\w{40})(?P<peer_id>\w{40})')
handshake = piece_request_handshake.findall(hex_data)
Then i print it
I'm unable to add image because i'm new so this is the output:
root#debian:/home/florian/Téléchargements# python script.py
[('0000000000100005', '606d4759c464c8fd0d4a5d8fc7a223ed70d31d7b', '2d5452323532302d746d6e6a657a307a6d687932')]
My question is, how can i take only the second piece of this data that is to say the "hash_info" (the "606d47...") ?
I already tried with the group of re with the following line:
print handshake.group('info_hash')
But the result is an error (sorry again i can't show the screen...):
*root#debian:/home/florian/Téléchargements# python script.py
Exception in thread Thread-1:
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 552, in __bootstrap_inner
self.run()
File "script.py", line 122, in run
self.p.dispatch(0, PieceRequestSniffer.cb)
File "script.py", line 82, in cb
print handshake.group('info_hash')
AttributeError: 'list' object has no attribute 'group'*
This is the start of my full code for the curious:
import pcapy
import dpkt
from threading import Thread
import re
import binascii
import socket
import time
liste=[]
prefix = '13426974546f7272656e742070726f746f636f6c'
hash_code = re.compile('%s(?P<reserved>\w{16})(?P<info_hash>\w{40})(?P<peer_id>\w{40})' % prefix)
match = hash_code.match()
piece_request_handshake = re.compile('13426974546f7272656e742070726f746f636f6c(?P<aaa>\w{16})(?P<bbb>\w{40})(?P<ccc>\w{40})')
piece_request_tcpclose = re.compile('(?P<start>\w{12})5011')
#-----------------------------------------------------------------INIT------------------------------------------------------------
class PieceRequestSniffer(Thread):
def __init__(self, dev='eth0'):
Thread.__init__(self)
self.expr = 'udp or tcp'
self.maxlen = 65535 # max size of packet to capture
self.promiscuous = 1 # promiscuous mode?
self.read_timeout = 100 # in milliseconds
self.max_pkts = -1 # number of packets to capture; -1 => no limit
self.active = True
self.p = pcapy.open_live(dev, self.maxlen, self.promiscuous, self.read_timeout)
self.p.setfilter(self.expr)
#staticmethod
def cb(hdr, data):
eth = dpkt.ethernet.Ethernet(str(data))
ip = eth.data
#------------------------------------------------------IPV4 AND TCP PACKETS ONLY---------------------------------------------------
#Select Ipv4 packets because of problem with the .p in Ipv6
if eth.type == dpkt.ethernet.ETH_TYPE_IP6:
return
else:
#Select only TCP protocols
if ip.p == dpkt.ip.IP_PROTO_TCP:
tcp = ip.data
src_ip = socket.inet_ntoa(ip.src)
dst_ip = socket.inet_ntoa(ip.dst)
fin_flag = ( tcp.flags & dpkt.tcp.TH_FIN ) != 0
#if fin_flag:
#print "TH_FIN src:%s dst:%s" % (src_ip,dst_ip)
try:
#Return hexadecimal representation
hex_data = binascii.hexlify(tcp.data)
except:
return
#-----------------------------------------------------------HANDSHAKE-------------------------------------------------------------
handshake = piece_request_handshake.findall(hex_data)
if handshake and (src_ip+" "+dst_ip) not in liste and (dst_ip+" "+src_ip) not in liste and handshake != '':
liste.append(src_ip+" "+dst_ip)
print match.group('info_hash')

re.findall() returns a list of tuples, each containing the matching strings that correspond to the named groups in the re pattern. This example (using a simplified pattern) demonstrates that you can access the required item with indexing:
import re
prefix = 'prefix'
pattern = re.compile('%s(?P<reserved>\w{4})(?P<info_hash>\w{10})(?P<peer_id>\w{10})' % prefix)
handshake = 'prefix12341234567890ABCDEF1234' # sniffed data
match = pattern.findall(handshake)
>>> print match
[('1234', '1234567890', 'ABCDEF1234')]
>>> info_hash = match[0][1]
>>> print info_hash
1234567890
But the point of named groups is to provide a way to access the matched values for a named group by name. You can use re.match() instead:
import re
prefix = 'prefix'
pattern = re.compile('%s(?P<reserved>\w{4})(?P<info_hash>\w{10})(?P<peer_id>\w{10})' % prefix)
handshake = 'prefix12341234567890ABCDEF1234' # sniffed data
match = pattern.match(handshake)
>>> print match
<_sre.SRE_Match object at 0x7fc201efe918>
>>> print match.group('reserved')
1234
>>> print match.group('info_hash')
1234567890
>>> print match.group('peer_id')
ABCDEF1234
The values are also available using dictionary access:
>>> d = match.groupdict()
>>> d
{'peer_id': 'ABCDEF1234', 'reserved': '1234', 'info_hash': '1234567890'}
>>> d['info_hash']
'1234567890'
Finally, if there are multiple handshake sequences in the input data, you can use re.finditer():
import re
prefix = 'prefix'
pattern = re.compile('%s(?P<reserved>\w{4})(?P<info_hash>\w{10})(?P<peer_id>\w{10})' % prefix)
handshake = 'blahprefix12341234567890ABCDEF1234|randomjunkprefix12349876543210ABCDEF1234,more random junkprefix1234hellothereABCDEF1234...' # sniffed data
for match in pattern.finditer(handshake):
print match.group('info_hash')
Output:
1234567890
9876543210
hellothere

re.findall will return a list of tuples. The group() call works on Match objects, returned by some other functions in re:
for match in re.finditer(needle, haystack):
print match.group('info_hash')
Also, you might not need findall if you're just matching a single handshake.

Related

TypeError in Python3.10 script

I am trying to do network automation task using python but ran with type error while running the below given script.
I AM STUCK AT SAME KIND OF ERROR, CAN YOU GUYS PLEASE HELP ME OUT HOW TO FIX THIS ?*
durai#durai-virtual-machine:~/Network Automation$ python3 session_details.py
devices list: ['1.1.1.1', '2.2.2.2', '6.6.6.6', '7.7.7.7', '', '']
establishing telnet session: 1.1.1.1 cisco cisco
--- connected to: 1.1.1.1
--- getting version information
Traceback (most recent call last):
File "/home/durai/Network Automation/session_details.py", line 82, in <module>
device_version = get_version_info(session)
File "/home/durai/Network Automation/session_details.py", line 65, in get_version_info
version_output_parts = version_output_lines[1].split(',')
**TypeError: a bytes-like object is required, not 'str'**
durai#durai-virtual-machine:~/Network Automation$
THE BELOW IS THE BLOCK OF CODE WHERE I AM GETTING ERROR !
#-----------------------------------------------------------------------
def get_version_info(session):
print ('--- getting version information')
session.sendline('show version | include Version')
result = session.expect(['>', pexpect.TIMEOUT])
# Extract the 'version' part of the output
version_output_lines = session.before.splitlines()
version_output_parts = version_output_lines[1].split(',')
version = version_output_parts[2].strip()
print ('--- got version: ', version)
return version
#-----------------------------------------------------------------------
FULL CODE FOR YOUR REFERENCE:
#!/usr/bin/python
import re
import pexpect
#-----------------------------------------------------------------------
def get_devices_list():
devices_list = []
file = open('devices', 'r')
for line in file:
devices_list.append( line.rstrip() )
file.close()
print ('devices list:', devices_list)
return devices_list
#-----------------------------------------------------------------------
def connect(ip_address, username, password):
print ('establishing telnet session:', ip_address, username, password)
telnet_command = 'telnet ' + ip_address
# Connect via telnet to device
session = pexpect.spawn('telnet ' + ip_address, timeout=20)
result = session.expect(['Username:', pexpect.TIMEOUT])
# Check for error, if so then print error and exit
if result != 0:
print ('!!! TELNET failed creating session for: ', ip_address)
exit()
# Enter the username, expect password prompt afterwards
session.sendline(username)
result = session.expect(['Password:', pexpect.TIMEOUT])
# Check for error, if so then print error and exit
if result != 0:
print ('!!! Username failed: ', username)
exit()
session.sendline(password)
result = session.expect(['>', pexpect.TIMEOUT])
# Check for error, if so then print error and exit
if result != 0:
print ('!!! Password failed: ', password)
exit()
print ('--- connected to: ', ip_address)
return session
#-----------------------------------------------------------------------
def get_version_info(session):
print ('--- getting version information')
session.sendline('show version | include Version')
result = session.expect(['>', pexpect.TIMEOUT])
# Extract the 'version' part of the output
version_output_lines = session.before.splitlines()
version_output_parts = version_output_lines[1].split(',')
version = version_output_parts[2].strip()
print ('--- got version: ', version)
return version
#-----------------------------------------------------------------------
devices_list = get_devices_list() # Get list of devices
version_file_out = open('version-info-out', 'w')
# Loop through all the devices in the devices list
for ip_address in devices_list:
# Connect to the device via CLI and get version information
session = connect(ip_address, 'cisco', 'cisco')
device_version = get_version_info(session)
session.close() # Close the session
version_file_out.write('IP: '+ip_address+' Version: '+device_version+'\n')
# Done with all devices and writing the file, so close
version_file_out.close()
Python has two different kinds of strings. Normal strings are Unicode, where the individual characters are several bytes long, to handle Unicode characters. Many activities (like networking) need to use byte strings, which are written b"abc".
That's the problem here. The pexpect module is returning a byte string. So, in this line:
version_output_parts = version_output_lines[1].split(',')
version_output_parts is a byte string, but ',' is a Unicode string, and you can't mix them. So, you can either keep it bytes and do this:
version_output_parts = version_output_lines[1].split(b',')
or you can convert it to a Unicode string:
version_output_parts = version_output_parts.decode('utf-8')
version_output_parts = version_output_lines[1].split(b',')
It depends on how much you need to do with the parts.
It's the part where you extract the version that is bugged.
Try using print statements as demonstrated below to check the data types of your variables at each step.
This error suggests that you are using a method that is not available for the given data type e.g. calling a string method on an array or so on.
def get_version_info(session):
print ('--- getting version information')
session.sendline('show version | include Version')
result = session.expect(['>', pexpect.TIMEOUT])
# Extract the 'version' part of the output
version_output_lines = session.before.splitlines()
# print(version_output_lines)
version_output_parts = version_output_lines[1].split(',')
# print(version_output_parts)
version = version_output_parts[2].strip()
# print(version)
print ('--- got version: ', version)
return version

decode binary from xmlrpc python

I'm new to python and xml-rpc , and I'm stuck with decoding binary data coming from a public service :
the service request response with this code is :
from xmlrpc.client import Server
import xmlrpc.client
from pprint import pprint
DEV_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxx'
logFile = open('stat.txt', 'w')
s1 = Server('http://muovi.roma.it/ws/xml/autenticazione/1')
s2 = Server('http://muovi.roma.it/ws/xml/paline/7')
token = s1.autenticazione.Accedi(DEV_KEY, '')
res = s2.paline.GetStatPassaggi(token)
pprint(res, logFile)
response :
{'id_richiesta': '257a369dbf46e41ba275f8c821c7e1e0',
'risposta': {'periodi_aggregazione': <xmlrpc.client.Binary object at 0x0000027B7D6E2588>,
'tempi_attesa_percorsi': <xmlrpc.client.Binary object at 0x0000027B7D9276D8>}}
I need to decode these two binary objects , and I'm stuck with this code :
from xmlrpc.client import Server
import xmlrpc.client
from pprint import pprint
DEV_KEY = 'xxxxxxxxxxxxxxxxxxxxxxxx'
logFile = open('stat.txt', 'w')
s1 = Server('http://muovi.roma.it/ws/xml/autenticazione/1')
s2 = Server('http://muovi.roma.it/ws/xml/paline/7')
token = s1.autenticazione.Accedi(DEV_KEY, '')
res = s2.paline.GetStatPassaggi(token)
dat = xmlrpc.client.Binary(res)
out = xmlrpc.client.Binary.decode(dat)
pprint(out, logFile)
that ends in :
Traceback (most recent call last): File "stat.py", line 18, in
dat = xmlrpc.client.Binary(res) File "C:\Users\Leonardo\AppData\Local\Programs\Python\Python35\lib\xmlrpc\client.py",
line 389, in init
data.class.name) TypeError: expected bytes or bytearray, not dict
The only doc I found for xmlrpc.client is the one at docs.python.org , but I can't figure out how I could decode these binaries
If the content of res variable (what you get from the 2nd (s2) server) is the response you pasted into the question, then you should modify the last 3 lines of your 2nd snippet to (as you already have 2 Binary objects in the res dictionary):
# ... Existing code
res = s2.paline.GetStatPassaggi(token)
answer = res.get("risposta", dict())
aggregation_periods = answer.get("periodi_aggregazione", xmlrpc.client.Binary())
timeout_paths = answer.get("tempi_attesa_percorsi", xmlrpc.client.Binary())
print(aggregation_periods.data)
print(timeout_paths.data)
Notes:
According to [Python.Docs]: xmlrpc.client - Binary Objects (emphasis is mine):
Binary objects have the following methods, supported mainly for internal use by the marshalling/unmarshalling code:
I wasn't able to connect (and this test the solution), since DEV_KEY is (obviously) fake

How do I read this stringified javascript variable into Python?

I'm trying to read _pageData from https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1 into Python 2.7.11 so that I can process it, using this code:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
# These all fail
data = ast.literal_eval(first_result)
# data = yaml.load(first_result)
# data = json.loads(first_result)
if __name__ == '__main__':
main()
but get the following error:
Traceback (most recent call last):
File "./temp.py", line 24, in <module>
main()
File "./temp.py", line 19, in main
data = ast.literal_eval(first_result)
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 49, in literal_eval
node_or_string = parse(node_or_string, mode='eval')
File "/usr/local/Cellar/python/2.7.11/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ast.py", line 37, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
File "<unknown>", line 1
[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n
^
SyntaxError: invalid syntax
var _pageData is in this format:
"[[1,true,true,true,true,true,true,true,true,,\"at\",\"\",\"\",1450364255674,\"\",\"en_US\",false,[]\n,\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/embed?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/edit?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/thumbnail?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",,,true,\"https://www.google.com/maps/d/print?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/pdf?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",\"https://www.google.com/maps/d/viewer?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",false,false,\"/maps/d\",\"maps/sharing\",\"//www.google.com/intl/en_US/help/terms_maps.html\",true,\"https://docs.google.com/picker\",[]\n,false,true,[[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-001.png\",143,25]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-regular-2x-001.png\",286,50]\n]\n,[[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-001.png\",113,20]\n,[\"//www.gstatic.com/mapspro/images/google-my-maps-logo-small-2x-001.png\",226,40]\n]\n]\n,1,\"https://www.gstatic.com/mapspro/_/js/k\\u003dmapspro.gmeviewer.en_US.8b9lQX3ifcs.O/m\\u003dgmeviewer_base/rt\\u003dj/d\\u003d0/rs\\u003dABjfnFWonctWGGtD63MaO3UZxCxF6UPKJQ\",true,true,false,true,\"US\",false,true,true,5,false]\n,[\"mf.map\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",\"Hollywood, FL\",\"\",[-80.16005,26.01043,-80.16005,26.01043]\n,[-80.16005,26.01043,-80.16005,26.01043]\n,[[,\"zBghbRiSwHlg.kq4rrF9BNRIg\",\"Untitled layer\",\"\",[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026scale\\u003d1.0\"]\n,[]\n,1,1,[[,[26.01043,-80.16005]\n]\n,\"MDZBMzJCQjRBOTAwMDAwMQ~CjISKmdlby1tYXBzcHJvLm1hcHNob3AtbGF5ZXItNDUyOWUwMTc0YzhkNmI2ZBgAKAAwABIZACBawIJBU4Fe8v7vNSoAg0dtnhhVotEBLg\",\"vdb:\",\"zBghbRiSwHlg.kq4rrF9BNRIg\",[26.01043,-80.16005]\n,[0,-32]\n,\"06A32BB4A9000001\"]\n,[[\"Hollywood, FL\"]\n]\n,[]\n]\n]\n,,1.0,true,true,,,,[[\"zBghbRiSwHlg.kq4rrF9BNRIg\",1,,,,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\\u0026lid\\u003dzBghbRiSwHlg.kq4rrF9BNRIg\",,,,,0,2,true,[[[\"06A32BB4A9000001\",[[[26.01043,-80.16005]\n]\n]\n,[]\n,[]\n,0,[[\"name\",[\"Hollywood, FL\"]\n,1]\n,,[]\n,[]\n]\n,,0]\n]\n,[[[\"https://mt.googleapis.com/vt/icon/name\\u003dicons/onion/22-blue-dot.png\\u0026filter\\u003dff\\u0026scale\\u003d1.0\",[16,32]\n,1.0]\n,[[\"0000FF\",0.45098039215686275]\n,5000]\n,[[\"0000FF\",0.45098039215686275]\n,[\"000000\",0.25098039215686274]\n,3000]\n]\n]\n]\n]\n]\n,[]\n,,,,,1]\n]\n,[2]\n,,,\"mapspro\",\"zBghbRiSwHlg.k2ATNtn6BCk0\",,true,false,false,\"\",2,false,\"https://mapsengine.google.com/map/kml?mid\\u003dzBghbRiSwHlg.k2ATNtn6BCk0\",3807]\n]\n"
I've tried replacing the \" and \n and decoding the \uXXXX before using, without success. I've also tried replacing ,, with ,"", and ,'', without success.
Thank you.
It seems like there are three kinds of syntactic errors in your string:
, followed by ,
[ followed by ,
, followed by ]
Assuming that those are supposed to be null elements (or ''?), you can just replace those in the original string -- exactly like you did for the ,, case, but you missed the others. Also, you have to do the ,, replacement twice, otherwise you will miss cases such as ,,,,. Then, you can load the JSON string with json.loads.
>>> s = "your messed up json string"
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r",\s*,", ", null,", s)
>>> s = re.sub(r"\[\s*,", "[ null,", s)
>>> s = re.sub(r",\s*\]", ", null]", s)
>>> json.loads(s)
I started off using ast.literal.eval(...) because I was under the (mistaken?) impression that javascript arrays and Python lists were mutually compatible, so all I had to do was destringify _pageData.
However, I hadn't noticed that Python doesn't like ,, true, false or [,. Fixing them does the trick (thank you #Two-Bit Alchemist and #tobias_k)
So, the following appears to work:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Testing _pageData processing. """
import urllib2
import re
import ast
import json
import yaml
BASE_URL = 'https://www.simpliowebstudio.com/wp-content/uploads/2014/07/aWfyh1'
def main():
""" Do the business. """
response = urllib2.urlopen(BASE_URL, None)
results = re.findall('var _pageData = \\"(.*?)\\";</script>', response.read())
first_result = results[0]
first_result = first_result.replace(',,,,,,', ',None,None,None,None,None,')
first_result = first_result.replace(',,,,,', ',None,None,None,None,')
first_result = first_result.replace(',,,,', ',None,None,None,')
first_result = first_result.replace(',,,', ',None,None,')
first_result = first_result.replace(',,', ',None,')
first_result = first_result.replace('[,', '[None,')
first_result = first_result.replace('\\"', '\'')
first_result = first_result.replace('\\n', '')
first_result = first_result.replace('true', 'True')
first_result = first_result.replace('false', 'False')
data = ast.literal_eval(first_result)
for entry in data:
print entry
if __name__ == '__main__':
main()

Python Invalid Snytax

Below is the code I have been working on.
The very last line write_csv('twitter_gmail.csv', messages, append=True) throws a
[ec2-user#ip-172-31-46-164 ~]$ ./twitter_test16.sh
Traceback (most recent call last):
File "./twitter_test16.sh", line 53, in
write_csv('twitter_gmail.csv', messages, append=True)
NameError: name 'messages' is not defined
I have messages defined so I dont understand why it would do that.
import csv
import json
import oauth2 as oauth
import urllib
import sys
import requests
import time
CONSUMER_KEY = "
CONSUMER_SECRET = "
ACCESS_KEY = "
ACCESS_SECRET = "
class TwitterSearch:
def __init__(self, ckey=CONSUMER_KEY, csecret=CONSUMER_SECRET,
akey=ACCESS_KEY, asecret=ACCESS_SECRET,
query='https://api.twitter.com/1.1/search/tweets.{mode}?{query}'
):
consumer = oauth.Consumer(key=ckey, secret=csecret)
access_token = oauth.Token(key=akey, secret=asecret)
self.client = oauth.Client(consumer, access_token)
self.query = query
def search(self, q, mode='json', **queryargs):
queryargs['q'] = q
query = urllib.urlencode(queryargs)
return self.client.request(self.query.format(query=query, mode=mode))
def write_csv(fname, rows, header=None, append=False, **kwargs):
filemode = 'ab' if append else 'wb'
with open(fname, filemode) as outf:
out_csv = csv.writer(outf, **kwargs)
if header:
out_csv.writerow(header)
out_csv.writerows(rows)
def main():
ts = TwitterSearch()
response, data = ts.search('#gmail.com', result_type='recent')
js = json.loads(data)
messages = ([msg['created_at'], msg['txt'], msg['user']['id']] \
for msg in js.get('statuses', []))
write_csv('twitter_gmail.csv', messages, append=True)
The previous line is missing a parenthesis.
messages = ([msg['created_at'], msg['txt'], msg['user']['id']] for msg in js.get('statuses', [])
Should be:
messages = ([msg['created_at'], msg['txt'], msg['user']['id']] for msg in js.get('statuses', []))
I'm surprised that it works when you change to print? Are you also changing the comprehension when you do that?
You asked why the line number of the error was after the bad syntax?
Try putting this in line one of a file and running it, and note the line of the SyntaxError.
a = (]
Then try this and check out the line number:
a = (
b = "some stuff"
Finally, try this:
a = (
b = "some stuff"
Think about when you would know that the programmer had made a python-illegal typo if you were reading the code and carrying it out via pen and paper.
Basically, a SyntaxError is raised as soon as it can be unambiguously determined that invalid syntax was used, which is often immediately after a statement where a mistake was made, not immediately at.
You'll frequently get line numbers on SyntaxErrors that are a line (or several lines if there's empty lines or a corner case) below the actual typo.

How to resolve DNS in Python?

I have a DNS script which allow users to resolve DNS names by typing website names on a Windows command prompt.
I have looked through several guides on the DNS resolve but my script can't still seem to resolve the names (www.google.com) or (google.com) to IP address.
The script outputs an error of
Traceback (most recent call last):
File "C:\python\main_menu.py", line 37, in ?
execfile('C:\python\showdns.py')
File "C:\python\showdns.py", line 3, in ?
x = input ("\nPlease enter a domain name that you wish to translate: ")
File "<string>", line 0, in ?
NameError: name 'google' is not defined
The code:
import socket
x = input ("\nPlease enter a domain name that you wish to translate: ")
print ("\n\nThe IP Address of the Domain Name is: "+socket.gethostbyname_ex(x))
x = raw_input("\nSelect enter to proceed back to Main Menu\n")
if x == '1':
execfile('C:\python\main_menu.py')
Please give advice on the codes. Thanks!
input() is the wrong function to use here. It actually evaluates the string that the user entered.
Also gethostbyname_ex returns more than just a string. So your print statement would also have failed.
In your case this code should work:
import socket
x = raw_input ("\nPlease enter a domain name that you wish to translate: ")
data = socket.gethostbyname_ex(x)
print ("\n\nThe IP Address of the Domain Name is: "+repr(data))
x = raw_input("\nSelect enter to proceed back to Main Menu\n")
if x == '1':
execfile('C:\python\main_menu.py')
#!/user/bin/env python
"""
Resolve the DNS/IP address of a given domain
data returned is in the format:
(name, aliaslist, addresslist)
#filename resolveDNS.py
#version 1.01 (python ver 2.7.3)
#author LoanWolffe
"""
import socket
def getIP(d):
"""
This method returns the first IP address string
that responds as the given domain name
"""
try:
data = socket.gethostbyname(d)
ip = repr(data)
return ip
except Exception:
# fail gracefully!
return False
#
def getIPx(d):
"""
This method returns an array containing
one or more IP address strings that respond
as the given domain name
"""
try:
data = socket.gethostbyname_ex(d)
ipx = repr(data[2])
return ipx
except Exception:
# fail gracefully!
return False
#
def getHost(ip):
"""
This method returns the 'True Host' name for a
given IP address
"""
try:
data = socket.gethostbyaddr(ip)
host = repr(data[0])
return host
except Exception:
# fail gracefully
return False
#
def getAlias(d):
"""
This method returns an array containing
a list of aliases for the given domain
"""
try:
data = socket.gethostbyname_ex(d)
alias = repr(data[1])
#print repr(data)
return alias
except Exception:
# fail gracefully
return False
#
# test it
x = raw_input("Domain name or IP address? > ")
a = getIP(x)
b = getIPx(x)
c = getHost(x)
d = getAlias(x)
print " IP ", a
print " IPx ", b
print " Host ", c
print " Alias ", d
Use raw_input instead of input.

Categories

Resources