python pyinsane script fails - python

I need some help with a python script. The script is an Example Script form the Pyinsane Module for Python. (Pyinsane https://github.com/jflesch/pyinsane)
I want to write my own scanner script but to do that i should understand the example code or the output error.
Example Script.
import sys
``from PIL import Image
try:
import src.abstract as pyinsane
except ImportError:
import pyinsane.abstract as pyinsane
def set_scanner_opt(scanner, opt, value):
print("Setting %s to %s" % (opt, str(value)))
try:
scanner.options[opt].value = value
except (KeyError, pyinsane.SaneException) as exc:
print("Failed to set %s to %s: %s" % (opt, str(value), str(exc)))
if __name__ == "__main__":
steps = False
args = sys.argv[1:]
if len(args) <= 0 or args[0] == "-h" or args[0] == "--help":
print("Syntax:")
print(" %s [-s] <output file (JPG)>" % sys.argv[0])
print("")
print("Options:")
print(" -s : Generate intermediate images (may generate a lot of"
" images !)")
sys.exit(1)
for arg in args[:]:
if arg == "-s":
steps = True
args.remove(arg)
output_file = args[0]
print("Output file: %s" % output_file)
print("Looking for scanners ...")
devices = pyinsane.get_devices()
if (len(devices) <= 0):
print("No scanner detected !")
sys.exit(1)
print("Devices detected:")
print("- " + "\n- ".join([str(d) for d in devices]))
print("")
device = devices[0]
print("Will use: %s" % str(device))
print("")
source = 'Auto'
if (device.options['source'].constraint_type
== pyinsane.SaneConstraintType.STRING_LIST):
if 'Auto' in device.options['source'].constraint:
source = 'Auto'
elif 'FlatBed' in device.options['source'].constraint:
source = 'FlatBed'
else:
print("Warning: Unknown constraint type on the source: %d"
% device.options['source'].constraint_type)
set_scanner_opt(device, 'resolution', 300)
set_scanner_opt(device, 'source', source)
set_scanner_opt(device, 'mode', 'Color')
print("")
print("Scanning ... ")
scan_session = device.scan(multiple=False)
if steps and scan_session.scan.expected_size[1] < 0:
print("Warning: requested step by step scan images, but"
" scanner didn't report the expected number of lines"
" in the final image --> can't do")
print("Step by step scan images won't be recorded")
steps = False
if steps:
last_line = 0
expected_size = scan_session.scan.expected_size
img = Image.new("RGB", expected_size, "#ff00ff")
sp = output_file.split(".")
steps_filename = (".".join(sp[:-1]), sp[-1])
try:
PROGRESSION_INDICATOR = ['|', '/', '-', '\\']
i = -1
while True:
i += 1
i %= len(PROGRESSION_INDICATOR)
sys.stdout.write("\b%s" % PROGRESSION_INDICATOR[i])
sys.stdout.flush()
scan_session.scan.read()
if steps:
next_line = scan_session.scan.available_lines[1]
if (next_line > last_line):
subimg = scan_session.scan.get_image(last_line, next_line)
img.paste(subimg, (0, last_line))
img.save("%s-%05d.%s" % (steps_filename[0], last_line,
steps_filename[1]), "JPEG")
last_line = next_line
except EOFError:
pass
print("\b ")
print("Writing output file ...")
img = scan_session.images[0]
img.save(output_file, "JPEG")
print("Done")
Now the Output:
sudo python /home/pi/Desktop/scantest.py -s 1
Output file: 1
Looking for scanners ...
Devices detected:
- Scanner 'genesys:libusb:001:006' (Canon, LiDE 110, flatbed scanner)
Will use: Scanner 'genesys:libusb:001:006' (Canon, LiDE 110, flatbed scanner)
Setting resolution to 300
Setting source to Auto
Failed to set source to Auto: <class 'pyinsane.rawapi.SaneStatus'> : Data is invalid (4)
Setting mode to Color
Scanning ...
|Traceback (most recent call last):
File "/home/pi/Desktop/scantest.py", line 107, in <module>
steps_filename[1]), "JPEG")
File "/usr/local/lib/python2.7/dist-packages/PIL/Image.py", line 1439, in save
save_handler(self, fp, filename)
File "/usr/local/lib/python2.7/dist-packages/PIL/PngImagePlugin.py", line 572, in _save
ImageFile._save(im, _idat(fp, chunk), [("zip", (0,0)+im.size, 0, rawmode)])
File "/usr/local/lib/python2.7/dist-packages/PIL/ImageFile.py", line 481, in _save
e = Image._getencoder(im.mode, e, a, im.encoderconfig)
File "/usr/local/lib/python2.7/dist-packages/PIL/Image.py", line 399, in _getencoder
return apply(encoder, (mode,) + args + extra)
TypeError: an integer is required
Please Can anybody explain the example code or help me with the output error?
Thanks a lot
laurin

Is your PIL / Pillow library compiled with JPEG support ?
If not, try replacing:
img.save("%s-%05d.%s" % (steps_filename[0], last_line,
steps_filename[1]), "JPEG")
by
img.save("%s-%05d.%s" % (steps_filename[0], last_line,
steps_filename[1]), "PNG")
And
img.save(output_file, "JPEG")
by
img.save(output_file, "PNG")

Related

How can i run sphinxtrain script?

I'm using sphinx to build my own model for vocal recognition , i've followed the tutorial step by step and all works fine until that point when i should run the python script of sphinixtrain (whose role is to execute a set of perl files throw terminal in normal case) but for me, the program only opens the files one by one with a chosen editor without executing them !(watching other tutorials videos, the code bellow works normal)
The code of trainer :
#!/usr/bin/python
from __future__ import print_function
import getopt, sys, os
training_basedir = ""
sphinxbinpath = ""
sphinxpath = ""
def find_paths():
global training_basedir
global sphinxbinpath
global sphinxpath
# Find the location of the files, it can be libexec or lib or lib64
currentpath = os.path.dirname(os.path.realpath(__file__))
sphinxbinpath = os.path.realpath(currentpath + "/../libexec/sphinxtrain")
if os.path.exists(currentpath + "/../lib/sphinxtrain/bw"):
sphinxbinpath = os.path.realpath(currentpath + "/../lib/sphinxtrain/bw")
if os.path.exists(currentpath + "/../bin/Release/Win32"):
sphinxbinpath = os.path.realpath(currentpath + "/../bin/Release/Win32")
# Find the location for the libraries
sphinxpath = os.path.realpath(currentpath + "/../lib/sphinxtrain")
if os.path.exists(currentpath + "/../lib64/sphinxtrain/scripts/00.verify"):
sphinxpath = os.path.realpath(currentpath + "/../lib64/sphinxtrain")
if os.path.exists(currentpath + "/../scripts/00.verify"):
sphinxpath = os.path.realpath(currentpath + "/..")
if not (os.path.exists(sphinxbinpath + "/bw") or os.path.exists(sphinxbinpath + "/bw.exe")):
print("Failed to find sphinxtrain binaries. Check your installation")
exit(1)
# Perl script want forward slashes
training_basedir = os.getcwd().replace('\\', '/');
sphinxpath = sphinxpath.replace('\\','/')
sphinxbinpath = sphinxbinpath.replace('\\','/')
print("Sphinxtrain path:", sphinxpath)
print("Sphinxtrain binaries path:", sphinxbinpath)
def setup(task):
if not os.path.exists("etc"):
os.mkdir("etc")
print("Setting up the database " + task)
out_cfg = open("./etc/sphinx_train.cfg", "w")
for line in open(sphinxpath + "/etc/sphinx_train.cfg", "r"):
line = line.replace("___DB_NAME___", task)
line = line.replace("___BASE_DIR___", training_basedir)
line = line.replace("___SPHINXTRAIN_DIR___", sphinxpath)
line = line.replace("___SPHINXTRAIN_BIN_DIR___", sphinxbinpath)
out_cfg.write(line)
out_cfg.close()
out_cfg = open("etc/feat.params", "w")
for line in open(sphinxpath + "/etc/feat.params", "r"):
out_cfg.write(line)
out_cfg.close()
steps = [
"000.comp_feat/slave_feat.pl",
"00.verify/verify_all.pl",
"0000.g2p_train/g2p_train.pl",
"01.lda_train/slave_lda.pl",
"02.mllt_train/slave_mllt.pl",
"05.vector_quantize/slave.VQ.pl",
"10.falign_ci_hmm/slave_convg.pl",
"11.force_align/slave_align.pl",
"12.vtln_align/slave_align.pl",
"20.ci_hmm/slave_convg.pl",
"30.cd_hmm_untied/slave_convg.pl",
"40.buildtrees/slave.treebuilder.pl",
"45.prunetree/slave.state-tying.pl",
"50.cd_hmm_tied/slave_convg.pl",
"60.lattice_generation/slave_genlat.pl",
"61.lattice_pruning/slave_prune.pl",
"62.lattice_conversion/slave_conv.pl",
"65.mmie_train/slave_convg.pl",
"90.deleted_interpolation/deleted_interpolation.pl",
"decode/slave.pl",
]
def run_stages(stages):
for stage in stages.split(","):
for step in steps:
name = step.split("/")[0].split(".")[-1]
if name == stage:
ret = os.system(sphinxpath + "/scripts/" + step)
if ret != 0:
exit(ret)
def run_from(stage):
found = False
for step in steps:
name = step.split("/")[0].split(".")[-1]
if name == stage or found:
found = True
ret = os.system(sphinxpath + "/scripts/" + step)
if ret != 0:
exit(ret)
def run():
print("Running the training")
for step in steps:
ret = os.system(sphinxpath + "/scripts/" + step)
if ret != 0:
exit(ret)
def usage():
print ("")
print ("Sphinxtrain processes the audio files and creates and acoustic model ")
print ("for CMUSphinx toolkit. The data needs to have a certain layout ")
print ("See the tutorial http://cmusphinx.sourceforge.net/wiki/tutorialam ")
print ("for details")
print ("")
print ("Usage: sphinxtrain [options] <command>")
print ("")
print ("Commands:")
print (" -t <task> setup - copy configuration into database")
print (" [-s <stage1,stage2,stage3>] [-f <stage>] run - run the training or just selected
stages")
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "ht:s:f:", ["help", "task", "stages", "from"])
except getopt.GetoptError as err:
print(str(err))
usage()
sys.exit(-1)
task = None
stages = None
from_stage = None
for o, a in opts:
if o in ("-t", "--task"):
task = a
if o in ("-f", "--from"):
from_stage = a
if o in ("-s", "--stages"):
stages = a
if o in ("-h", "--help"):
usage()
if len(args) == 0:
usage()
sys.exit(-1)
command = args[0]
find_paths()
if command == "setup":
if task == None:
print("No task name defined")
sys.exit(-1)
setup(task)
elif command == "run":
if stages != None:
run_stages(stages)
elif from_stage != None:
run_from(from_stage)
else:
run()
else:
run()
if __name__ == "__main__":
main()
Another way to solve this is to be explicit about calling the Perl interpreter in your os.system call
i.e.
ret = os.system('perl ' + sphinxpath + "/scripts/" + step)
Solved by associating perl files ( with pl extension) to perl.exe

python imap email collector errorso

I have a script which is supposed to login via imap, and scan my mailbox for email addresses i have sent mail to. It seems there is an error in the code below. Would it be best to just scan named boxes or is there another fix?
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# imap-email-address-collector
#
import sys
import re
import csv
import getpass
import imaplib
import argparse
from email.parser import HeaderParser
RE_EMAIL = re.compile(r'^[a-z0-9._%+-]+\#[a-z0-9.-]+\.[a-z]{2,}$')
RE_QUOTES = re.compile(r'[\'\"]')
RE_SPACES = re.compile(r'[\n\t\s]+')
NOSSL_PROMT = 'LETTHEWOLDREADALLMESSAGES'
results = {}
unmatched = set()
def matchAndAdd(email, name=''):
email = email.lower()
if RE_EMAIL.match(email):
if email not in results or len(name) > len(results[email]): # only overwrite with longer name
results[email] = name.strip()
else:
unmatched.add(email)
def grabAddress(address):
address = str(address).strip()
address = RE_QUOTES.sub('', address)
address = RE_SPACES.sub(' ', address)
if address.startswith('<'): # No name, just an email address
address = address[1:]
if address.endswith('>'):
address = address[:-1]
matchAndAdd(address)
else: # Name and email address
try:
name, email = address.split('<')
except Exception:
matchAndAdd(address)
else:
matchAndAdd(email[:-1], name)
def listBoxes(imap):
boxes = imap.list()
if boxes[0] == 'OK' and len(boxes) > 1:
for box in boxes[1]:
box = re.split(r'\) \".\" ', box, maxsplit=1)
if len(box) == 2:
yield box[1]
else:
print 'No folders found.'
sys.exit(0)
def main(args):
if args.password:
password = args.password
else:
password = getpass.getpass('Password: ')
if args.nossl:
print 'Connecting to %s:%s without SSL...' % (args.host, args.port)
if not args.donotannoyme:
confirmation = raw_input('Please type %s:' % NOSSL_PROMT)
if not confirmation == NOSSL_PROMT:
print 'Good choice ;) (Disable with --donotannoyme)'
sys.exit(1)
imap = imaplib.IMAP4(args.host, args.port)
else:
print 'Connecting to %s:%s over SSL...' % (args.host, args.port)
imap = imaplib.IMAP4_SSL(args.host, args.port)
try:
imap.login(args.user, password)
except imaplib.IMAP4.error:
print "Login failed."
sys.exit(1)
else:
print 'Logged in as %s' % args.user
print 'Collecting email addresses from all messages...'
for box in listBoxes(imap):
print 'Scanning %s' % box
imap.select(box, readonly=True)
typ, data = imap.search(None, 'Inbox')
count = 0
for num in data[0].split():
typ, data = imap.fetch(num, '(BODY[HEADER.FIELDS (TO FROM)])')
headers = headerParser.parsestr(data[0][1])
for h in ('From', 'To'):
if headers[h]:
for address in headers[h].split(','):
grabAddress(address)
count += 1
sys.stdout.flush()
sys.stdout.write('\rScanned %s messages' % count)
if count > 0:
print ''
imap.close()
imap.logout()
if len(results) > 0:
print 'Found %s addresses' % len(results)
if not args.csv:
toStdout = True
else:
try:
outFile = open(args.csv, 'wb')
except Exception:
toStdout = True
print 'Cannot write to %s, dumping out here.' % args.csv
else:
toStdout = False
if toStdout:
outFile = sys.stdout
print '======================================================='
else:
print 'Writing to %s' % args.csv
writer = csv.writer(outFile)
writer.writerows(list(results.items()))
if toStdout:
print '======================================================='
else:
outFile.close()
if len(unmatched) > 0:
print 'Could not interpret %s address(es): %s' % (len(unmatched), "'"+("', '".join(unmatched))+"'")
else:
print 'No addresses found'
if __name__ == '__main__':
headerParser = HeaderParser()
argParser = argparse.ArgumentParser()
argParser.add_argument('--host', help='imap host address', required=True)
argParser.add_argument('--user', help='login username', required=True)
argParser.add_argument('--csv', help='(optional) output csv filepath')
argParser.add_argument('--nossl', help='(optional) do not use ssl', action='store_true')
argParser.add_argument('--donotannoyme', help='(optional) do not complain about non-ssl connections', action='store_true')
argParser.add_argument('--password', help='(optional) login password (will be prompted otherwise)')
argParser.add_argument('--port', help='(optional) imap host port, defaults to 993', type=int, default=993)
main(argParser.parse_args())
However, when i execute the script i get the error below. What may be the solution to this?
Traceback (most recent call last):
File "./imap.py", line 169, in <module>
main(argParser.parse_args())
File "./imap.py", line 99, in main
typ, data = imap.search(None, 'Inbox')
File "/usr/lib64/python2.7/imaplib.py", line 640, in search
typ, dat = self._simple_command(name, *criteria)
File "/usr/lib64/python2.7/imaplib.py", line 1083, in _simple_command
return self._command_complete(name, self._command(name, *args))
File "/usr/lib64/python2.7/imaplib.py", line 918, in _command_complete
raise self.error('%s command error: %s %s' % (name, typ, data))
imaplib.error: SEARCH command error: BAD ['Error in IMAP command SEARCH: Unknown argument INBOX (0.001 + 0.000 secs).']

Unable to start capture: Invalid argument

I am new in python. Please help me solve the below problem.
What the meaning of "Invalid argument"?
Below code all are work well but when I add a code publish the live stream through dataplicity. There will occur error "Unable to start capture: Invalid argument i: Error grabbing frames". After the error to publish the live stream, those function below will proceed while motion detection.
The code I add in the top of the def is_person(image) caused error:
os.system('sudo ./mjpg_streamer -i "./input_uvc.so -f 10 -r 640x320 -n -y" -o "./output_http.so -w ./www -p 80"')
def is_person(image):
det = Detector(image)
faces = len(det.face())
print ("FACE: "), det.drawColors[det.drawn-1 % len(det.drawColors)], faces
uppers = len(det.upper_body())
print ("UPPR: "), det.drawColors[det.drawn-1 % len(det.drawColors)], uppers
fulls = len(det.full_body())
print ("FULL: "), det.drawColors[det.drawn-1 % len(det.drawColors)], fulls
peds = len(det.pedestrian())
print ("PEDS: "), det.drawColors[det.drawn-1 % len(det.drawColors)], peds
det.draw()
det.overlay()
return faces + uppers + fulls + peds
# return len(det.face()) or len(det.full_body()) or len(det.upper_body()) # or len(det.pedestrian())
def processImage(imgFile):
global connection
if is_person(imgFile):
print ("True")
imgFile = datetime.datetime.now() .strftime ("%Y-%m-%d-%H.%M.%S.jpg")
cam.capture (imgFile)
#with open(imgFile, "rb") as image_file:
# encoded_string = base64.b64encode(image_file.read())
else: # Not a person
print ("False")
os.remove(imgFile)
sys.exit(0)
try:
while True:
previous_state = current_state
current_state = GPIO.input(sensor)
if current_state != previous_state:
new_state = "HIGH" if current_state else "LOW"
if current_state: # Motion is Detected
lock.acquire()
cam.start_preview() # Comment in future
cam.preview_fullscreen = False
cam.preview_window = (10,10, 320,240)
print('Motion Detected')
for i in range(imgCount):
curTime = (time.strftime("%I:%M:%S")) + ".jpg"
cam.capture(curTime, resize=(320,240))
t = threading.Thread(target=processImage, args = (curTime,))
t.daemon = True
t.start()
time.sleep(frameSleep)
cam.stop_preview()
lock.release()
time.sleep(camSleep)
except KeyboardInterrupt:
cam.stop_preview()
sys.exit(0)
Thank you in advance.
You have an issue with mjpg_streamer configuration.
Check comment #274 here.

Get the data from scapy command to textfile?

I am trying to create an ARPscanner based on scapy. I found code from the Internet and I need to modify it to save the results to a .txt file. Can anyone help me to do that?
lena = int(raw_input("Enter Number : "))
print(lena)
logging.basicConfig(format='%(asctime)s %(levelname)-5s %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
logger = logging.getLogger(__name__)
def long2net(arg):
if (arg <= 0 or arg >= 0xFFFFFFFF):
raise ValueError("illegal netmask value", hex(arg))
return 32 - int(round(math.log(0xFFFFFFFF - arg, 2)))
def to_CIDR_notation(bytes_network, bytes_netmask):
network = scapy.utils.ltoa(bytes_network)
netmask = long2net(bytes_netmask)
net = "%s/%s" % (network, netmask)
if netmask < 16:
logger.warn("%s is too big. skipping" % net)
return None
return net
def scan_and_print_neighbors(net, interface, timeout=1):
logger.info("arping %s on %s" % (net, interface))
try:
ans, unans = scapy.layers.l2.arping(net, iface=interface, timeout=timeout, verbose=True)
for s, r in ans.res:
line = r.sprintf("%Ether.src% %ARP.psrc%")
try:
hostname = socket.gethostbyaddr(r.psrc)
line += " " + hostname[0]
except socket.herror:
# failed to resolve
pass
logger.info(line)
except socket.error as e:
if e.errno == errno.EPERM: # Operation not permitted
logger.error("%s. Did you run as root?", e.strerror)
else:
raise
if __name__ == "__main__":
if lena == 1:
for network, netmask, _, interface, address in scapy.config.conf.route.routes:
# skip loopback network and default gw
if network == 0 or interface == 'lo' or address == '127.0.0.1' or address == '0.0.0.0':
continue
if netmask <= 0 or netmask == 0xFFFFFFFF:
continue
net = to_CIDR_notation(network, netmask)
if interface != scapy.config.conf.iface:
# see http://trac.secdev.org/scapy/ticket/537
logger.warn("skipping %s because scapy currently doesn't support arping on non-primary network interfaces", net)
#continue
if net:
scan_and_print_neighbors(net, interface)
repr(network)
text_file = open("Output.txt", "w")
text_file.write(repr(network))
elif lena == 3 :
print("Bye Bye ")
I added this code to my script:
repr(network)
text_file = open("Output.txt", "w")
text_file.write(repr(network))
but it does not work, I just got a blank file.
solved :
if __name__ == "__main__":
import sys
sys.stdout = open('textout.txt', 'w')
... rest of your code here ...
sys.stdout.close()

Error in Python Script?

I keep getting an error when I run this python program,
It says I don't have any file or directory at '/path/to/times-testing.log'.
I don't seem to understand, can anyone help me in fixing this problem.
Thank you in advance!
Heres the code:
import urllib2
import json
import datetime
import time
import sys, os
import logging
from urllib2 import HTTPError
from ConfigParser import SafeConfigParser
# helper function to iterate through dates
def daterange( start_date, end_date ):
if start_date <= end_date:
for n in range( ( end_date - start_date ).days + 1 ):
yield start_date + datetime.timedelta( n )
else:
for n in range( ( start_date - end_date ).days + 1 ):
yield start_date - datetime.timedelta( n )
# helper function to get json into a form I can work with
def convert(input):
if isinstance(input, dict):
return {convert(key): convert(value) for key, value in input.iteritems()}
elif isinstance(input, list):
return [convert(element) for element in input]
elif isinstance(input, unicode):
return input.encode('utf-8')
else:
return input
# helpful function to figure out what to name individual JSON files
def getJsonFileName(date, page, json_file_path):
json_file_name = ".".join([date,str(page),'json'])
json_file_name = "".join([json_file_path,json_file_name])
return json_file_name
# helpful function for processing keywords, mostly
def getMultiples(items, key):
values_list = ""
if len(items) > 0:
num_keys = 0
for item in items:
if num_keys == 0:
values_list = item[key]
else:
values_list = "; ".join([values_list,item[key]])
num_keys += 1
return values_list
# get the articles from the NYTimes Article API
def getArticles(date, query, api_key, json_file_path):
# LOOP THROUGH THE 101 PAGES NYTIMES ALLOWS FOR THAT DATE
for page in range(101):
for n in range(5): # 5 tries
try:
request_string = "http://api.nytimes.com/svc/search/v2/articlesearch.json?begin_date=" + date + "&end_date=" + date + "&page=" + str(page) + "&api-key=" + api_key
response = urllib2.urlopen(request_string)
content = response.read()
if content:
articles = convert(json.loads(content))
# if there are articles here
if len(articles["response"]["docs"]) >= 1:
json_file_name = getJsonFileName(date, page, json_file_path)
json_file = open(json_file_name, 'w')
json_file.write(content)
json_file.close()
# if no more articles, go to next date
else:
return
time.sleep(3) # wait so we don't overwhelm the API
except HTTPError as e:
logging.error("HTTPError on page %s on %s (err no. %s: %s) Here's the URL of the call: %s", page, date, e.code, e.reason, request_string)
if e.code == 403:
print "Script hit a snag and got an HTTPError 403. Check your log file for more info."
return
if e.code == 429:
print "Waiting. You've probably reached an API limit."
time.sleep(30) # wait 30 seconds and try again
except:
logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0])
continue
# parse the JSON files you stored into a tab-delimited file
def parseArticles(date, tsv_file_name, json_file_path):
for file_number in range(101):
# get the articles and put them into a dictionary
try:
file_name = getJsonFileName(date,file_number, json_file_path)
if os.path.isfile(file_name):
in_file = open(file_name, 'r')
articles = convert(json.loads(in_file.read()))
in_file.close()
else:
break
except IOError as e:
logging.error("IOError in %s page %s: %s %s", date, file_number, e.errno, e.strerror)
continue
# if there are articles in that document, parse them
if len(articles["response"]["docs"]) >= 1:
# open the tsv for appending
try:
out_file = open(tsv_file_name, 'ab')
except IOError as e:
logging.error("IOError: %s %s %s %s", date, file_number, e.errno, e.strerror)
continue
# loop through the articles putting what we need in a tsv
try:
for article in articles["response"]["docs"]:
# if (article["source"] == "The New York Times" and article["document_type"] == "article"):
keywords = ""
keywords = getMultiples(article["keywords"],"value")
# should probably pull these if/else checks into a module
variables = [
article["pub_date"],
keywords,
str(article["headline"]["main"]).decode("utf8").replace("\n","") if "main" in article["headline"].keys() else "",
str(article["source"]).decode("utf8") if "source" in article.keys() else "",
str(article["document_type"]).decode("utf8") if "document_type" in article.keys() else "",
article["web_url"] if "web_url" in article.keys() else "",
str(article["news_desk"]).decode("utf8") if "news_desk" in article.keys() else "",
str(article["section_name"]).decode("utf8") if "section_name" in article.keys() else "",
str(article["snippet"]).decode("utf8").replace("\n","") if "snippet" in article.keys() else "",
str(article["lead_paragraph"]).decode("utf8").replace("\n","") if "lead_paragraph" in article.keys() else "",
]
line = "\t".join(variables)
out_file.write(line.encode("utf8")+"\n")
except KeyError as e:
logging.error("KeyError in %s page %s: %s %s", date, file_number, e.errno, e.strerror)
continue
except (KeyboardInterrupt, SystemExit):
raise
except:
logging.error("Error on %s page %s: %s", date, file_number, sys.exc_info()[0])
continue
out_file.close()
else:
break
# Main function where stuff gets done
def main():
config = SafeConfigParser()
script_dir = os.path.dirname(__file__)
config_file = os.path.join(script_dir, 'config/settings.cfg')
config.read(config_file)
json_file_path = config.get('files','json_folder')
tsv_file_name = config.get('files','tsv_file')
log_file = config.get('files','logfile')
api_key = config.get('nytimes','api_key')
start = datetime.date( year = int(config.get('nytimes','start_year')), month = int(config.get('nytimes','start_month')), day = int(config.get('nytimes','start_day')) )
end = datetime.date( year = int(config.get('nytimes','end_year')), month = int(config.get('nytimes','end_month')), day = int(config.get('nytimes','end_day')) )
query = config.get('nytimes','query')
logging.basicConfig(filename=log_file, level=logging.INFO)
logging.info("Getting started.")
try:
# LOOP THROUGH THE SPECIFIED DATES
for date in daterange( start, end ):
date = date.strftime("%Y%m%d")
logging.info("Working on %s." % date)
getArticles(date, query, api_key, json_file_path)
parseArticles(date, tsv_file_name, json_file_path)
except:
logging.error("Unexpected error: %s", str(sys.exc_info()[0]))
finally:
logging.info("Finished.")
if __name__ == '__main__' :
main()
It generates the following error when compiling it:
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$ python getTimesArticles.py
Traceback (most recent call last):
File "getTimesArticles.py", line 180, in <module>
main()
File "getTimesArticles.py", line 164, in main
logging.basicConfig(filename=log_file, level=logging.INFO)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 1545, in basicConfig
hdlr = FileHandler(filename, mode)
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 911, in __init__
StreamHandler.__init__(self, self._open())
File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/logging/__init__.py", line 941, in _open
stream = open(self.baseFilename, self.mode)
IOError: [Errno 2] No such file or directory: '/path/to/times-testing.log'
Rakeshs-MacBook-Air:get-nytimes-articles-master niharika$
Your main() function:
def main():
config = SafeConfigParser()
script_dir = os.path.dirname(__file__)
config_file = os.path.join(script_dir, 'config/settings.cfg')
config.read(config_file)
...
log_file = config.get('files','logfile')
...
logging.basicConfig(filename=log_file, level=logging.INFO)
opens the config/settings.cfg file and fetches the name of the log file, which seems to be /path/to/times-testing.log. You either need to create that folder (probably not the best idea) or configure it to point to the correct file.

Categories

Resources