Python script run via cron does not execute occassionally - python

I have a simple python script for fetching tweets and caching them to disk that is configured to run every two minutes via cron.
*/2 * * * * (date ; /usr/bin/python /path/get_tweets.py) >> /path/log/get_tweets.log 2>&1
The script runs successfully most of the time. However, every so often the script doesn't execute. In addition to other logging, I added a simple print statement above the meat of the script and nothing except the output from the initial date command makes it to the log.
#!/usr/bin/python
# Script for Fetching Tweets and then storing them as an HTML snippet for inclusion using SSI
print "Starting get_tweets.py"
import simplejson as json
import urllib2
import httplib
import re
import calendar
import codecs
import os
import rfc822
from datetime import datetime
import time
import sys
import pprint
debug = True
now = datetime.today()
template = u'<p class="tweet">%s <span class="date">on %s</span></p>'
html_snippet = u''
timelineUrl = 'http://api.twitter.com/1/statuses/user_timeline.json?screen_name=gcorne&count=7'
tweetFilePath = '/path/server-generated-includes/tweets.html'
if(debug): print "[%s] Fetching tweets from %s." % (now, timelineUrl)
def getTweets():
request = urllib2.Request(timelineUrl)
opener = urllib2.build_opener()
try:
tweets = opener.open(request)
except:
print "[%s] HTTP Request %s failed." % (now, timelineUrl)
exitScript()
tweets = tweets.read()
return tweets
def exitScript():
print "[%s] Script failed." % (now)
sys.exit(0)
tweets = getTweets()
now = datetime.today()
if(debug): print "[%s] Tweets retrieved." % (now)
tweets = json.loads(tweets)
for tweet in tweets:
text = tweet['text'] + ' '
when = tweet['created_at']
when = re.match(r'(\w+\s){3}', when).group(0).rstrip()
# print GetRelativeCreatedAt(when)
# convert links
text = re.sub(r'(http://.*?)\s', r'\1', text).rstrip()
#convert hashtags
text = re.sub(r'#(\w+)', r'#\1', text)
# convert # replies
text = re.sub(r'#(\w+)', r'#\1', text)
html_snippet += template % (text, when) + "\n"
#print html_snippet
now = datetime.today()
if(debug): print "[%s] Opening file %s." % (now, tweetFilePath)
try:
file = codecs.open(tweetFilePath, 'w', 'utf_8')
except:
print "[%s] File %s cound not be opened." % (now, tweetFilePath)
exitScript()
now = datetime.today()
if(debug): print "[%s] Writing %s to disk." % (now, tweetFilePath)
file.write(html_snippet)
now = datetime.today()
if(debug): print "[%s] Finished writing %s to disk." % (now, tweetFilePath)
file.close()
sys.exit(0)
Any ideas? The system is a VPS running Centos 5.3 with python 2.4.
Update: I have added the entire script to avoid any confusion.

The most likely explanation is that once in a while the script takes more than two minutes (maybe the system's very busy occasionally, or the script may have to wait for some external site that's occasionally busy, etc) and your cron's a sensible one that skips repeating events that haven't yet terminated. By logging the starting and ending times of your script, you'll be able to double check if that is the case. What you want to do in such circumstances is up to you (I recommend you consider skipping an occasional run to avoid further overloading a very busy system -- your own, or the remote one you're getting data from).

I just had a problem with a Python script which sometimes wouldn't run in crontab, but always ran from the command line. Turns out I had to redirect logging to /dev/null. The standard output otherwise seems to get full and the program just stops and the process is killed off. Using /dev/null to dump the output, and everything's fine.

Related

python checking file changes without reading the full file

I have a web app (in the backend) where I am using pysondb (https://github.com/pysonDB/pysonDB) to upload some tasks which will be executed by another program (sniffer).
The sniffer program (a completely separate program) now checks the database for any new unfinished uploaded tasks in an infinite loop and executes them and updates the database.
I don't want to read the database repeatedly, instead want to look for any file changes in the database file (db.json), then read the database only. I have looked into watchdog but was looking for something lightweight and modern to suit my needs.
# infinite loop
import pysondb
import time
from datetime import datetime
# calling aligner with os.system
import os
import subprocess
from pathlib import Path
while True:
# always alive
time.sleep(2)
try:
# process files
db = pysondb.getDb("../tasks_db.json")
tasks = db.getBy({"task_status": "uploaded"})
for task in tasks:
try:
task_path = task["task_path"]
cost = task["cost"]
corpus_folder = task_path
get_output = subprocess.Popen(f"mfa validate {corpus_folder} english english", shell=True, stdout=subprocess.PIPE).stdout
res = get_output.read().decode("utf-8")
# print(type(res))
if "ERROR - There was an error in the run, please see the log." in res:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {str(res)}\n")
f.close()
else:
align_folder = f"{corpus_folder}_aligned"
Path(align_folder).mkdir(parents=True, exist_ok=True)
o = subprocess.Popen(f"mfa align {corpus_folder} english english {align_folder}", shell=True, stdout=subprocess.PIPE).stdout.read().decode("utf-8")
# success
except subprocess.CalledProcessError:
# mfa align ~/mfa_data/my_corpus english english ~/mfa_data/my_corpus_aligned
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: Files not in right format\n")
f.close()
except Exception as e:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {e}\n")
f.close()
Using python-rq would be a much more efficient way of doing this that wouldn't need a database. It has no requirements other then needing a redis install. From there, you could just move all of that into a function:
def task(task_path, cost):
corpus_folder = task_path
get_output = subprocess.Popen(f"mfa validate {corpus_folder} english english", shell=True, stdout=subprocess.PIPE).stdout
res = get_output.read().decode("utf-8")
# print(type(res))
if "ERROR - There was an error in the run, please see the log." in res:
# log errors
f = open("sniffer_log.error", "a+")
f.write(f"{datetime.now()} :: {str(res)}\n")
... #etc
Obviously you would rename that function and put the try-except statement back, but then you could just call that through RQ:
# ... where you want to call the function
from wherever.you.put.your.task.function import task
result = your_redis_queue.enqueue(task, "whatever", "arguments)

nagios core external agent using python scripting

I have a bash script for performing the passive checks i.e., external agent/application. I tried converting the bash script into python but when I execute the file I don't see any kind of responses on my nagios core interface regarding my passive check result.
import os
import datetime
CommandFile='/usr/local/nagios/var/rw/nagios.cmd'
datetime = datetime.datetime.now()
os.stat(CommandFile)
f = open(CommandFile, 'w')
f.write("/bin/echo " + str(datetime) + " PROCESS_SERVICE_CHECK_RESULT;compute-1;python dummy;0;I am dummy python")
f.close()
my bash script code is:
#!/bin/sh
# Write a command to the Nagios command file to cause
# it to process a service check result
echocmd="/bin/echo"
CommandFile="/usr/local/nagios/var/rw/nagios.cmd"
# get the current date/time in seconds since UNIX epoch
datetime=`date +%s`
# create the command line to add to the command file
cmdline="[$datetime] PROCESS_SERVICE_CHECK_RESULT;host-name;dummy bash;0;I am dummy bash"
# append the command to the end of the command file
`$echocmd $cmdline >> $CommandFile`
Changed my code, now its working perfectly fine. I can see the response in the Nagios interface.
import time
import sys
HOSTNAME = "compute-1"
service = "python dummy"
return_code = "0"
text = "python dummy is working .....I am python dummy"
timestamp = int(time.time())
nagios_cmd = open("/usr/local/nagios/var/rw/nagios.cmd", "w")
nagios_cmd.write("[{timestamp}] PROCESS_SERVICE_CHECK_RESULT;{hostname};{service};{return_code};{text}\n".format
(timestamp = timestamp,
hostname = HOSTNAME,
service = service,
return_code = return_code,
text = text))
nagios_cmd.close()

How do I count the number of line in a FTP file without downloading it locally while using Python

So I need to be able to read and count the number of lines from a FTP server WITHOUT downloading it to my local machine while using Python.
I know the code to connect to the server:
ftp = ftplib.FTP('example.com') //Object ftp set as server address
ftp.login ('username' , 'password') // Login info
ftp.retrlines('LIST') // List file directories
ftp.cwd ('/parent folder/another folder/file/') //Change file directory
I also know the basic code to count the number of line If it is already downloaded/stored locally :
with open('file') as f:
... count = sum(1 for line in f)
... print (count)
I just need to know how to connect these 2 pieces of code without having to download the file to my local system.
Any help is appreciated.
Thank You
As far as i know FTP doesn't provide any kind of functionality to read the file content without actually downloading it. However you could try using something like Is it possible to read FTP files without writing them using Python?
(You haven't specified what python you are using)
#!/usr/bin/env python
from ftplib import FTP
def countLines(s):
print len(s.split('\n'))
ftp = FTP('ftp.kernel.org')
ftp.login()
ftp.retrbinary('RETR /pub/README_ABOUT_BZ2_FILES', countLines)
Please take this code as a reference only
There is a way: I adapted a piece of code that I created for processes csv files "on the fly". Is implement by producer-consumer problem approach. Apply this pattern allows us to assign each task to a thread (or process) and show partial results for huge remote files. You can adapt it for ftp requests.
Download stream is saved in queue and is consumed "on the fly". No HDD extra space is needed and memory efficient. Tested in Python 3.5.2 (vanilla) on Fedora Core 25 x86_64.
This is the source adapted for ftp (over http) retrieve:
from threading import Thread, Event
from queue import Queue, Empty
import urllib.request,sys,csv,io,os,time;
import argparse
FILE_URL = 'http://cdiac.ornl.gov/ftp/ndp030/CSV-FILES/nation.1751_2010.csv'
def download_task(url,chunk_queue,event):
CHUNK = 1*1024
response = urllib.request.urlopen(url)
event.clear()
print('%% - Starting Download - %%')
print('%% - ------------------ - %%')
'''VT100 control codes.'''
CURSOR_UP_ONE = '\x1b[1A'
ERASE_LINE = '\x1b[2K'
while True:
chunk = response.read(CHUNK)
if not chunk:
print('%% - Download completed - %%')
event.set()
break
chunk_queue.put(chunk)
def count_task(chunk_queue, event):
part = False
time.sleep(5) #give some time to producer
M=0
contador = 0
'''VT100 control codes.'''
CURSOR_UP_ONE = '\x1b[1A'
ERASE_LINE = '\x1b[2K'
while True:
try:
#Default behavior of queue allows getting elements from it and block if queue is Empty.
#In this case I set argument block=False. When queue.get() and queue Empty ocurrs not block and throws a
#queue.Empty exception that I use for show partial result of process.
chunk = chunk_queue.get(block=False)
for line in chunk.splitlines(True):
if line.endswith(b'\n'):
if part: ##for treat last line of chunk (normally is a part of line)
line = linepart + line
part = False
M += 1
else:
##if line not contains '\n' is last line of chunk.
##a part of line which is completed in next interation over next chunk
part = True
linepart = line
except Empty:
# QUEUE EMPTY
print(CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE)
print(CURSOR_UP_ONE + ERASE_LINE + CURSOR_UP_ONE)
print('Downloading records ...')
if M>0:
print('Partial result: Lines: %d ' % M) #M-1 because M contains header
if (event.is_set()): #'THE END: no elements in queue and download finished (even is set)'
print(CURSOR_UP_ONE + ERASE_LINE+ CURSOR_UP_ONE)
print(CURSOR_UP_ONE + ERASE_LINE+ CURSOR_UP_ONE)
print(CURSOR_UP_ONE + ERASE_LINE+ CURSOR_UP_ONE)
print('The consumer has waited %s times' % str(contador))
print('RECORDS = ', M)
break
contador += 1
time.sleep(1) #(give some time for loading more records)
def main():
chunk_queue = Queue()
event = Event()
args = parse_args()
url = args.url
p1 = Thread(target=download_task, args=(url,chunk_queue,event,))
p1.start()
p2 = Thread(target=count_task, args=(chunk_queue,event,))
p2.start()
p1.join()
p2.join()
# The user of this module can customized one parameter:
# + URL where the remote file can be found.
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--url', default=FILE_URL,
help='remote-csv-file URL')
return parser.parse_args()
if __name__ == '__main__':
main()
Usage
$ python ftp-data.py -u <ftp-file>
Example:
python ftp-data-ol.py -u 'http://cdiac.ornl.gov/ftp/ndp030/CSV-FILES/nation.1751_2010.csv'
The consumer has waited 0 times
RECORDS = 16327
Csv version on Github: https://github.com/AALVAREZG/csv-data-onthefly

Python pass a file as argument

I've been working on a Python problem for sometime now. I'm trying to use the Echoprint API to sort my music out. So i'm writing some code that does that for me.
This is how the API works :
Takes in a song name as a command line arg.
Gives the appropriate result.
But i'm writing a script that has to perform this "internally". As in, the script should take the files and perform the lookup and output the results to the terminal. (basically - NO COMMAND LINE ARGUMENTS SUPPLIED )
So is there anyway as to pass files into a function ?
I know this sounds silly but it's a problem i'm not able to solve.
If i use os.walk(), etc it returns a str object to my lookup function as a parameter. I want the audio file to be passed as a parameter.
Here's the code which takes in the song as a command line arg :
import sys
import os
import pyechonest.config as config
import pyechonest.song as song
config.CODEGEN_BINARY_OVERRIDE = os.path.abspath("/Users/******/python/minger/echoprint-codegen-master/echoprint-codegen")
config.ECHO_NEST_API_KEY='*****'
def lookup(file):
# Note that song.identify reads just the first 30 seconds of the file
fp = song.util.codegen(file)
if len(fp) and "code" in fp[0]:
# The version parameter to song/identify indicates the use of echoprint
result = song.identify(query_obj=fp, version="4.11")
print "Got result:", result
print result[0]
if len(result):
print "Artist: %s (%s)" % (result[0].artist_name, result[0].artist_id)
print "Song: %s (%s)" % (result[0].title, result[0].id)
else:
print "No match. This track may not be in the database yet."
else:
print "Couldn't decode", file
if __name__ == "__main__":
if len(sys.argv) < 2:
print >>sys.stderr, "Usage: %s <audio file>" % sys.argv[0]
sys.exit(1)
lookup(sys.argv[1])
From there, http://echonest.github.io/remix/apidocs/pyechonest.util-module.html#codegen
the method you use has signature
codegen(filename, start=0, duration=30)
so that it is the filename that has to be passed as an argument... not the file itself...
Ex use here http://nullege.com/codes/show/src#p#y#pyechonest-7.1.0#pyechonest#song.py/371/util.codegen
if filename:
if os.path.exists(filename):
query_obj = util.codegen(filename, start=codegen_start, duration=codegen_duration)
if query_obj is None:
raise Exception("The filename specified: %s could not be decoded." % filename)

Python GI Notify How can I call the Gtk.main()?

I'm trying to create a Python notification application. To make it short here is what I wanted to do :
1. Checking my gmail account
2. Display a notification with the number of unread mails
3. Display a button that permits me to open chromium (using a system call)
For now everything looks just fine. The checking mail part was kind of easy. I serialised my unread mail count so that the notification doesn't show up every single minute. It only displays if I have a new mail.
Where I'm blocking is that I don't know how to create the main gtk loop so that I can handle the button signal.
Here is my code :
#!/usr/bin/python
from gi.repository import Notify, Gtk, GLib
from urllib.request import FancyURLopener
from datetime import datetime, date, time, timedelta
import os.path, sys, getopt
from subprocess import call
serialisedvalue=0;
serialiseddate=0;
def callback():
call(["chromium", "gmail.com"])
def serialise(unread):
try:
f = open("mailcount", "w")
try:
f.write(unread+"\n") # Write a string to a file
f.write(datetime.now().strftime('%b %d %Y %I:%M%p'))
finally:
f.close()
except IOError:
pass
def deserialise():
global serialisedvalue
global serialiseddate
try:
f = open("mailcount", "r")
try:
serialisedvalue = f.readline().rstrip()
serialiseddate = datetime.strptime(f.readline(), '%b %d %Y %I:%M%p')
finally:
f.close()
except IOError:
pass
def notif(unread):
Notify.init ("New Mail")
if unread != "1":
Hello=Notify.Notification.new ("New mail","You have "+unread+" unread mails","/usr/share/icons/Faenza/actions/96/mail-forward.png")
else :
Hello=Notify.Notification.new ("New mail","You have "+unread+" unread mails","/usr/share/icons/Faenza/actions/96/mail-forward.png")
Hello.add_action('action', 'Read', callback, None, None)
Hello.show ()
def main(argv):
notify=0
forced=0
try:
opts, args = getopt.getopt(argv,"nf",['notify','force-notify'])
except getopt.GetoptError:
print("unreadgmail.py [-n --notify] [-f --force-notify")
sys.exit(2)
for opt,args in opts:
if opt in ("-n", "--notify"):
notify=1
elif opt in ("-f","--force-notify"):
forced=1
url = 'https://%s:%s#mail.google.com/mail/feed/atom' % ("myaccount", "mypassword")
opener = FancyURLopener()
page = opener.open(url)
contents = page.read().decode('utf-8')
ifrom = contents.index('<fullcount>') + 11
ito = contents.index('</fullcount>')
unread = contents[ifrom:ito]
print("Unread messages : "+unread)
if notify==1 and forced==0:
if os.path.exists("mailcount"):
deserialise()
else:
serialise(unread)
deserialise()
if unread != "0":
if unread != serialisedvalue:
notif(unread)
serialise(unread)
elif ((datetime.now() - serialiseddate) > timedelta(hours=1)):
notif(unread)
if forced==1:
notif(unread)
GLib.MainLoop().run()
if __name__ == "__main__":
main(sys.argv[1:])
I remember that my notifications used to work fine with pygtk and pynotify. Though I want to update my code and since I lost the last code, I don't have a clue on that. Calling Gtk.main() in my main just block the program until I kill it.
I'm using Gnome3.6, Archlinux and python3.3.
So does anyone know how to "wait" for the signal handler to be clicked before the program ends ? In fact it runs fine, but the script just end when the notification is displayed and it doesn't wait for the signal.
Thanks a lot :)
EDIT : A bit more details of my problem :
As you can see, the program already ended and is not waiting for a signal. That's what I'm trying to solve right now.
If you're not using GTK+--which as far as I can tell you aren't--you could probably call GLib.MainLoop().run() at the end of your main function to keep your program running.

Categories

Resources