I am very new to python and trying to work on this script which recieves data from multiple ftp sites and download yesterday data according to date directory to my local folder. but if the receives fails on any day it would not update that day records and would go to the next day. I want to sync the files that even if it is missed on particular it should complete sync teh new files to local folder I have tried looking at rsync but need your help to process it on the script.this is my script.
MAX_CHILDREN = 16
ftp_site_prog = "/usr/local/bin/ftp_site.py"
class SpawnQ:
def __init__(self, max_pids):
self.max_pids = max_pids
self.queue = []
tmp = re.split("/", ftp_site_prog)
self.my_name = tmp[-1]
def addQ(self, site_id):
self.queue.append(site_id)
return
def runQ(self):
while (len(self.queue) != 0):
# Check how many sessions are running
cmd = """ps -ef | grep "%s" | grep -v grep""" % self.my_name
num_pids = 0
for line in os.popen(cmd).readlines():
num_pids = num_pids + 1
if (num_pids < self.max_pids):
site_id = self.queue.pop()
# print site_id
# print "Forking........"
fpid = os.fork()
if fpid:
# print "Created child: ", fpid
os.waitpid(fpid, os.WNOHANG)
else:
# print "This is the Child"
# Exec the ftp_site
arg_string = "%s" % site_id
args = [arg_string]
os.execvp(ftp_site_prog, (ftp_site_prog,) + tuple(args))
how to call rsync on my py script.//os.system("rsync -ftp_site_prog, (ftp_site_prog,)+ tuple(args))
sys.exit(0)
else:
# print "Waiting for a spare process...."
time.sleep(10)
return
# Get a list of the sites
db_obj = nprint.celDb()
site_list = db_obj.get_all_site_ids()
myQ = SpawnQ(MAX_CHILDREN)
for site_id in site_list:
myQ.addQ(site_id)
myQ.runQ()
# Wait until we only have the parent left
# Check how many sessions are running
tmp = re.split("/",ftp_site_prog)
ftp_name = tmp[-1]
cmd = """ps -ef | grep "%s" | grep -v grep""" % ftp_name
num_pids = MAX_CHILDREN
while (num_pids > 0):
num_pids = 0
for line in os.popen(cmd).readlines():
num_pids = num_pids + 1
time.sleep(60)
today = datetime.date.today()
daydelta = datetime.timedelta(days=1)
yesterday = today - daydelta
Much of this can be accomplished with the ftplib module for the retrieval of files from standard FTP servers. If you are dealing with SFTP servers you can use the paramiko library.
Related
I want to get the name of running application having the process name for e.g. pycharm64.exe-->Pycharm or chrome.exe --> Google chrome
Here's what I have achieved so far
import wmi
c = wmi.WMI()
def get_process_name(hwnd):
"""Get applicatin filename given hwnd."""
try:
_, pid = win32process.GetWindowThreadProcessId(hwnd)
for p in c.query('SELECT Name FROM Win32_Process WHERE ProcessId = %s' % str(pid)):
exe = p.Name
# print(GetFileVersionInfo(exe))
break
return exe
except:
return None
while True :
time.sleep(3)
print(get_process_name(win32gui.GetForegroundWindow()))
import subprocess as sub
import re
import os
from datetime import datetime as influx_timestap
from influxdb import InfluxDBClient
from collections import OrderedDict
insert_json = []
hostname = str(sub.check_output('hostname')).strip()
location = str(sub.check_output(['ps -ef | grep mgr'], shell=True)).split()
current_dir = os.getcwd()
print("script executed")
gg_location_pattern = re.compile(r'mgr\.prm$')
gg_process_pattertn = re.compile(r'^REPLICAT|^EXTRACT')
for index in location:
if gg_location_pattern.search(index) != None:
gg_location = index[:-14]
os.chdir(gg_location)
print("checkpoint1")
get_lag = sub.check_output(str(current_dir) + '/ggsci_test.sh', shell=True)
print("checkpoint2")
processes = get_lag.split("\n")
for process in processes:
if gg_process_pattertn.search(process) != None:
lag_at_chkpnt = int((process.split()[3]).split(":")[0]) * 3600 + int((process.split()[3]).split(":")[1]) *60 + int((process.split()[3]).split(":")[2])
time_since_chkpnt = int((process.split()[4]).split(":")[0]) * 3600 + int((process.split()[4]).split(":")[1]) *60 + int((process.split()[4]).split(":")[2]
)
process_dict = OrderedDict({"measurement": "GoldenGate_Mon_" + str(hostname) + "_Graph",
"tags": {"hostname": hostname, "process_name": process.split()[2]},
"time": influx_timestap.now().isoformat('T'),
"fields": {"process_type": process.split()[0], "process_status": process.split()[1],
"lag_at_chkpnt": lag_at_chkpnt, "time_since_chkpnt": time_since_chkpnt}})
insert_json.append(process_dict)
host = 'xxxxxxxx'
port = 'x'
user = 'x'
password = 'x'
dbname = 'x'
print("before client")
client = InfluxDBClient(host, port, user, password, dbname)
client.write_points(insert_json)
print("after client")
This code works manually perfect, but on the crontab it is not working. After searching on the internet I found that they say change or set your "PATH" variable on the crontab. I changed my "PATH" variable and it is still not working.
Crontab log file write "checkpoint1" after that there is nothing. So, line not working is "get_lag = sub.check_output(str(current_dir) + '/ggsci_test.sh', shell=True)"
What can I do here afterwards?
Take care,
it looks like your external script (ggsci_test.sh) has some issues with the paths / general failure.
From the Python subprocess documentation about subprocess.check_output:
If the return code was non-zero it raises a CalledProcessError. The
CalledProcessError object will have the return code in the returncode
attribute and any output in the output attribute.
So thats the reason why you see the error when catching it, but not being able to continue.
You should check therefore if your shell script has any issues that need to be solved before.
I am trying to import the snmpSessionBaseClass python module in a script I am running, but I do not have the module installed and I can't seem to find where to download it. Does anyone know the pip or yum command to download and install this module? Thanks!
import netsnmp
sys.path.insert(1, os.path.join(sys.path[0], os.pardir))
from snmpSessionBaseClass import add_common_options, get_common_options, verify_host, get_data
from pynag.Plugins import PluginHelper,ok,critical
The following code needs to be added to a file called snmpSessionBaseClass.py and that file needs to be placed in a directory that is in pythons path.
#!/usr/bin/env python
# Copyright (C) 2016 rsmuc <rsmuc#mailbox.org>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with health_monitoring_plugins. If not, see <http://www.gnu.org/licenses/>.
import pynag
import netsnmp
import os
import sys
dev_null = os.open(os.devnull, os.O_WRONLY)
tmp_stdout = os.dup(sys.stdout.fileno())
def dev_null_wrapper(func, *a, **kwargs):
"""
Temporarily swap stdout with /dev/null, and execute given function while stdout goes to /dev/null.
This is useful because netsnmp writes to stdout and disturbes Icinga result in some cases.
"""
os.dup2(dev_null, sys.stdout.fileno())
return_object = func(*a, **kwargs)
sys.stdout.flush()
os.dup2(tmp_stdout, sys.stdout.fileno())
return return_object
def add_common_options(helper):
# Define the common command line parameters
helper.parser.add_option('-H', help="Hostname or ip address", dest="hostname")
helper.parser.add_option('-C', '--community', dest='community', help='SNMP community of the SNMP service on target host.', default='public')
helper.parser.add_option('-V', '--snmpversion', dest='version', help='SNMP version. (1 or 2)', default=2, type='int')
def get_common_options(helper):
# get the common options
host = helper.options.hostname
version = helper.options.version
community = helper.options.community
return host, version, community
def verify_host(host, helper):
if host == "" or host is None:
helper.exit(summary="Hostname must be specified"
, exit_code=pynag.Plugins.unknown
, perfdata='')
netsnmp_session = dev_null_wrapper(netsnmp.Session,
DestHost=helper.options.hostname,
Community=helper.options.community,
Version=helper.options.version)
try:
# Works around lacking error handling in netsnmp package.
if netsnmp_session.sess_ptr == 0:
helper.exit(summary="SNMP connection failed"
, exit_code=pynag.Plugins.unknown
, perfdata='')
except ValueError as error:
helper.exit(summary=str(error)
, exit_code=pynag.Plugins.unknown
, perfdata='')
# make a snmp get, if it fails (or returns nothing) exit the plugin
def get_data(session, oid, helper, empty_allowed=False):
var = netsnmp.Varbind(oid)
varl = netsnmp.VarList(var)
data = session.get(varl)
value = data[0]
if value is None:
helper.exit(summary="snmpget failed - no data for host "
+ session.DestHost + " OID: " +oid
, exit_code=pynag.Plugins.unknown
, perfdata='')
if not empty_allowed and not value:
helper.exit(summary="snmpget failed - no data for host "
+ session.DestHost + " OID: " +oid
, exit_code=pynag.Plugins.unknown
, perfdata='')
return value
# make a snmp get, but do not exit the plugin, if it returns nothing
# be careful! This funciton does not exit the plugin, if snmp get fails!
def attempt_get_data(session, oid):
var = netsnmp.Varbind(oid)
varl = netsnmp.VarList(var)
data = session.get(varl)
value = data[0]
return value
# make a snmp walk, if it fails (or returns nothing) exit the plugin
def walk_data(session, oid, helper):
tag = []
var = netsnmp.Varbind(oid)
varl = netsnmp.VarList(var)
data = list(session.walk(varl))
if len(data) == 0:
helper.exit(summary="snmpwalk failed - no data for host " + session.DestHost
+ " OID: " +oid
, exit_code=pynag.Plugins.unknown
, perfdata='')
for x in range(0, len(data)):
tag.append(varl[x].tag)
return data, tag
# make a snmp walk, but do not exit the plugin, if it returns nothing
# be careful! This function does not exit the plugin, if snmp walk fails!
def attempt_walk_data(session, oid):
tag = []
var = netsnmp.Varbind(oid)
varl = netsnmp.VarList(var)
data = list(session.walk(varl))
for x in range(0, len(data)):
tag.append(varl[x].tag)
return data, tag
def state_summary(value, name, state_list, helper, ok_value = 'ok', info = None):
"""
Always add the status to the long output, and if the status is not ok (or ok_value),
we show it in the summary and set the status to critical
"""
# translate the value (integer) we receive to a human readable value (e.g. ok, critical etc.) with the given state_list
state_value = state_list[int(value)]
summary_output = ''
long_output = ''
if not info:
info = ''
if state_value != ok_value:
summary_output += ('%s status: %s %s ' % (name, state_value, info))
helper.status(pynag.Plugins.critical)
long_output += ('%s status: %s %s\n' % (name, state_value, info))
return (summary_output, long_output)
def add_output(summary_output, long_output, helper):
"""
if the summary output is empty, we don't add it as summary, otherwise we would have empty spaces (e.g.: '. . . . .') in our summary report
"""
if summary_output != '':
helper.add_summary(summary_output)
helper.add_long_output(long_output)
I'm trying to run my code with a multiprocessing function but mongo keep returning
"MongoClient opened before fork. Create MongoClient with
connect=False, or create client after forking."
I really doesn't understand how i can adapt my code to this.
Basically the structure is:
db = MongoClient().database
db.authenticate('user', 'password', mechanism='SCRAM-SHA-1')
collectionW = db['words']
collectionT = db['sinMemo']
collectionL = db['sinLogic']
def findW(word):
rows = collectionw.find({"word": word})
ind = 0
for row in rows:
ind += 1
id = row["_id"]
if ind == 0:
a = ind
else:
a = id
return a
def trainAI(stri):
...
if findW(word) == 0:
_id = db['words'].insert(
{"_id": getNextSequence(db.counters, "nodeid"), "word": word})
story = _id
else:
story = findW(word)
...
def train(index):
# searching progress
progFile = "./train/progress{0}.txt".format(index)
trainFile = "./train/small_file_{0}".format(index)
if os.path.exists(progFile):
f = open(progFile, "r")
ind = f.read().strip()
if ind != "":
pprint(ind)
i = int(ind)
else:
pprint("No progress saved or progress lost!")
i = 0
f.close()
else:
i = 0
#get the number of line of the file
rangeC = rawbigcount(trainFile)
#fix unicode
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
files = io.open(trainFile, "r", encoding="utf8")
str1 = ""
str2 = ""
filex = open(progFile, "w")
with progressbar.ProgressBar(max_value=rangeC) as bar:
for line in files:
line = line.replace("\n", "")
if i % 2 == 0:
str1 = line.translate(non_bmp_map)
else:
str2 = line.translate(non_bmp_map)
bar.update(i)
trainAI(str1 + " " + str2)
filex.seek(0)
filex.truncate()
filex.write(str(i))
i += 1
#multiprocessing function
maxProcess = 3
def f(l, i):
l.acquire()
train(i + 1)
l.release()
if __name__ == '__main__':
lock = Lock()
for num in range(maxProcess):
pprint("start " + str(num))
Process(target=f, args=(lock, num)).start()
This code is made for reading 4 different file in 4 different process and at the same time insert the data in the database.
I copied only part of the code for make you understand the structure of it.
I've tried to add connect=False to this code but nothing...
db = MongoClient(connect=False).database
db.authenticate('user', 'password', mechanism='SCRAM-SHA-1')
collectionW = db['words']
collectionT = db['sinMemo']
collectionL = db['sinLogic']
then i've tried to move it in the f function (right before train() but what i get is that the program doesn't find collectionW,collectionT and collectionL.
I'm not very expert of python or mongodb so i hope that this is not a silly question.
The code is running under Ubuntu 16.04.2 with python 2.7.12
db.authenticate will have to connect to mongo server and it will try to make a connection. So, even though connect=False is being used, db.authenticate will require a connection to be open.
Why don't you create the mongo client instance after fork? That's look like the easiest solution.
Since db.authenticate must open the MongoClient and connect to the server, it creates connections which won't work in the forked subprocess. Hence, the error message. Try this instead:
db = MongoClient('mongodb://user:password#localhost', connect=False).database
Also, delete the Lock l. Acquiring a lock in one subprocess has no effect on other subprocesses.
Here is how I did it for my problem:
import pathos.pools as pp
import time
import db_access
class MultiprocessingTest(object):
def __init__(self):
pass
def test_mp(self):
data = [[form,'form_number','client_id'] for form in range(5000)]
pool = pp.ProcessPool(4)
pool.map(db_access.insertData, data)
if __name__ == '__main__':
time_i = time.time()
mp = MultiprocessingTest()
mp.test_mp()
time_f = time.time()
print 'Time Taken: ', time_f - time_i
Here is db_access.py:
from pymongo import MongoClient
def insertData(form):
client = MongoClient()
db = client['TEST_001']
db.initialization.insert({
"form": form[0],
"form_number": form[1],
"client_id": form[2]
})
This is happening to your code because you are initiating MongoCLient() once for all the sub-processes. MongoClient is not fork safe. So, initiating inside each function works and let me know if there are other solutions.
I want to get CPU information programatically. I already coded a solution for Linux, but I need to build a similar solutions for SunOS. Does anyone have any idea?
def getCpusInfo():
cpuinfos = []
cpuinfo = {}
for line in open('/proc/cpuinfo').readlines():
line = line.strip()
dual = line.split(':')
key = dual[0].replace('\t', '')
if (key == 'processor'):
cpuinfo = {}
cpuinfos.append(cpuinfo)
elif (len(dual)>1):
cpuinfo[key] = trim(dual[1])
return cpuinfos
cpusinfo = getCpusInfo();
print "Model = " + cpusinfo[0]['model name']
print "Quantity = " + str(len(cpusinfo))
print "Cores = " + cpusinfo[0]['cpu cores']
print "Threads = " + cpusinfo[0]['siblings']
I could find a solution in bash so it is easly portable for python.
Thanks for the help in the comments
CORES_F=`/usr/sbin/psrinfo -p`
PROCESSOR=`/usr/sbin/psrinfo -vp | head -1 | awk '{print $5}'`
MODEL=`/usr/sbin/psrinfo -pv | tail -1 | awk '{print $1}'`
echo "Model = " $MODEL
echo "Cores = " $CORES_F
echo "Threads = " $PROCESSOR
It is still missing cpu quantity, but in my case I don't need to save this information anymore.