I have written a script that is supposed to automate taking a data frame with pandas and interacting with it, then putting it to another part of the network as a reference, and then it interacts with SmartSheets. It uses a very basic API and most of the functions and logic work well, but the only issue is the try except block in the end that is supposed to be scheduled out.
There is no error thrown, it just sits in a blank terminal; this is odd because if I stack the functions I wrote one after another, they work with no issue.
This try except block is so that It can help write out errors on the machine and ultimately put into a scheduling function later in the script.
This script is on a VM that runs Ubuntu 18.04. It is written in Python 2.
I have researched online, and I cannot find a situation where the try except block does not error out and does not execute. I made sure my indentation on the IDE (Atom) is correct (4 spaces). I can line up the 4 functions themselves and they execute fine with no errors.
import os
import sys
import datetime
import tempfile
import smartsheet
import glob
import warnings
import platform
import pandas as pd
import math
from apscheduler.schedulers.blocking import BlockingScheduler
#variables
warnings.simplefilter(action='ignore', category=FutureWarning)
now = datetime.datetime.now()
PATH = "/mnt/cifs/rxlog.csv"
csvpath = "/home/ajskrilla/csvtest.csv"
End = '.csv'
today = now.strftime("%Y-%m-%d %H:%M")
path1 = "/mnt/cifs1"+"/Reports"+ now.strftime("%Y-%m-%d")+"/log Import"
path_glob = "/mnt/cifs1"+"/Reports"+ now.strftime("%Y-%m-%d")+"/log Import"+now.strftime("%Y-%m-%d")
Files_to_compare = glob.glob('/mnt/cifs1'+"/Reports"+ now.strftime("%Y-%m-%d")+'/log Import'+now.strftime("%Y-%m-%d")+'*.csv')
Fpath = path1 + now.strftime("%Y-%m-%d %H:%M") + End
SSName = 'Call Sheet/NDC ' + now.strftime("%Y-%m-%d %H:%M") + End
list_of_files = Files_to_compare
sched = BlockingScheduler()
#start of process
def Import_csv():
data_file = pd.read_csv(PATH, error_bad_lines=False, sep="|", parse_dates=True, low_memory=False, quotechar=None, quoting=3)
data_file.to_csv(csvpath)
def Clean_CSV():
file_path_directory = "/mnt/cifs1/"+"Reports" + now.strftime("%Y-%m-%d")
if not os.path.exists(file_path_directory):
os.makedirs(file_path_directory)
fields=['RXNBR', 'RX STOREID', 'FILLDATE', 'PATNAMELAST', 'PATNAMEFIRST', 'NH NBR', 'RX HOLD STATUS', 'RX HOLD REASON']
df = pd.read_csv(csvpath, skipinitialspace=True, usecols=fields, low_memory=False)
df.columns = ['RXNBR','RX_STOREID', 'FILLDATE', 'PATNAMELAST', 'PATNAMEFIRST', 'NH_NBR', 'RX_HOLD_STATUS', 'RX_HOLD_REASON']
nf = df[df.NH_NBR == 0][df.RX_HOLD_STATUS != 'Online Queued']
with tempfile.NamedTemporaryFile(delete=False) as temp:
nf.to_csv(Fpath, index=False)
def Compare_files():
if platform.system() == 'Linux':
if len(list_of_files) > 2:
latest_file = min(list_of_files, key=os.stat)
first_file = max(list_of_files, key=os.stat)
one_file= pd.read_csv(first_file)
two_file= pd.read_csv(latest_file)
out = one_file.append(two_file)
out.drop_duplicates('RXNBR', inplace=True)
with tempfile.NamedTemporaryFile(delete=False) as temp:
out.to_csv(Fpath, index=False)
for file in list_of_files:
if file != latest_file:
for files in list_of_files:
os.remove(files)
else:
pass
#delete the old file
def SS_import():
ss_client = smartsheet.Smartsheet("BANNERID#")
ss_client.errors_as_exceptions(True)
imported_sheet = ss_client.Workspaces.import_csv_sheet(
# need to change this based upon workspace ID
xxxxxxxxxxxxxxxx, #WS ID
Fpath,
SSName,
header_row_index=0
)
def SS_delete():
ss_client = smartsheet.Smartsheet("BANNNERID#")
action = ss_client.Sheets.list_sheets(include_all=True)
for single_sheet in action.data:
Sheetid= single_sheet.id
ss_client.Sheets.delete_sheet(
single_sheet.id)
######################################################################################################################################
#this is where the issue arises, during this function
#full fnct with error handling
def NDC_import():
try:
Import_csv()
except Exception as E:
Import_error_file = open('/mnt/Error Files/IE Error' + now.strftime("%Y-%m-%d %H:%M") + '.txt', 'w+')
for line in Import_error_file:
line.write(E)
line.close()
sys.exit()
try:
Clean_CSV()
except Exception as E:
Clean_CSV_error = open('/mnt/Error Files/CC Error' + now.strftime("%Y-%m-%d %H:%M") + '.txt', 'w+')
for line in Clean_CSV_error:
line.write(E)
line.close()
sys.exit()
try:
Compare_files()
except Exception as E:
Compare_files_error = open('/mnt/Error Files/CF Error' + now.strftime("%Y-%m-%d %H:%M") + '.txt', 'w+')
for line in Compare_files_error:
line.write(E)
line.close()
sys.exit()
try:
SS_import()
except Exception as E:
SS_import_error = open('/mnt/Error Files/SSI Error' + now.strftime("%Y-%m-%d %H:%M") + '.txt', 'w+')
for line in SS_import_error:
line.write(E)
line.close()
sys.exit()
NDC_import()
When the script is run for the NDC_import() function, it won't execute at all. if the 4 functions Import_csv(), Clean_CSV(), Compare_files(), and SS_import() are just by themselves, they execute normally.
Why can't it run in that try except block I wrote? It does not even throw an error and the terminal is blank.
I think the primary issue revolves around your exception handling. In particular, you're opening the files as w+ mode and then iterating through the opened file -- not exactly a syntax error but it's functionally broken. It's also worth noting that if there are no errors, you should not expect to see any printed output.
I would suggest to take a different direction with handling errors entirely and consider using the fabulous logging library.
An example of how you could use it would be
# ... other imports
import logging
# ... your function definitions
def NDC_Import():
try:
Import_csv()
except Exception as E:
logging.exception('error during Import_csv()')
return
# and so on ...
That logging.exception() line will print out the whole traceback to whatever logger is configured, and you can configure it to log to a file.
You're opening the file in w+ mode. Example:
SS_import_error = open('/mnt/Error Files/SSI Error' + now.strftime("%Y-%m-%d %H:%M") + '.txt', 'w+')
w+ means it will create the file if it doesn't exist, or overwrite it if it does. That means that SS_import_error will always point to an empty file after this line. Then you immediately try to loop over the lines of that file:
for line in SS_import_error:
line.write(E)
line.close()
sys.exit()
But attempting to loop over the lines of an empty file will end immediately, without ever executing the code within the block. The end result is that the except block runs, but it doesn't actually do anything (other than potentially wipe a file).
I don't know exactly what you intended the for loops to do, so I can't suggest an fix.
Related
I am working on locating a file which has hyphens(eg., Hours-2021.xml).When I perform the character replacement, I then get an error that the file cannot be found. If I simply use a filname without hyphens it works as expected. I had found on another thread a solution to reformat the filename and it doesnt appear to work. Most likely it is a simple fix that is eluding me. Here is a sample of my code...
import os
import os.path
import win32com.client
import pandas as pd
in_file = input('Enter filename to use:')
for file in os.listdir():
if file.startswith(in_file):
new_fn=file.replace('-','')
new_1 = os.rename(file, new_fn)
try:
xlApp = win32com.client.Dispatch("Excel.Application")
xlWbk = xlApp.Workbooks.Open(new_1)
xlWbk.SaveAs(r"hours_conv.xlsx", 51)
xlWbk.Close(True)
xlApp.Quit()
except Exception as e:
print(e)
finally:
xlWbk = None; xlApp = None
del xlWbk; del xlApp
# READ EXCEL FILE
output_df = pd.read_excel(r"hours_conv.xlsx", skiprows = 3)
print(output_df)
Everything before the try: I can get an output that I expect (eg., Hours2021). Then Further I get the error that in this case ""Sorry, we couldn't find Hours2021.xml ..."
Without delving too deep in your code, it looks like you have an indentation problem. Your try-except-finally block should probably be indented to be under the if file.startswith line.
Plus, you should probably check that new_fn is not the same as new_1 before doing the os.rename(). Or alternatively you can make the call conditional, like changing:
if file.startswith(in_file):
to:
if file.startswith(in_file) and '-' in file:
or something along those lines.
Also, os.rename() does not return a value. So new_1 gets set to None.
Lastly, keep in mind that each time you run your program, it is renaming files. So you may have to rename them back before each run.
Also, keep in mind that your:
xlWbk = xlApp.Workbooks.Open(new_1)
will probably always fail, since new_1 is None.
Assuming I understand what you're trying to do, here is a working version of your code. In addition to the above comments, it also addresses path issues, since Excel wants to know the full path of the doc to work with. The working code is as follows:
import os.path
import win32com.client
import pandas as pd
cwd = os.getcwd()
print(f"{cwd=}")
out_filename = "hours_conv.xlsx"
in_file = input('Enter filename (starting string) to use: ')
for file in os.listdir():
if file.startswith(in_file):
new_fn = file.replace('-','')
if new_fn != file:
os.rename(file, new_fn)
# indent
try:
xlApp = win32com.client.Dispatch("Excel.Application")
xlWbk = xlApp.Workbooks.Open(f"{cwd}/{new_fn}") # mod to be new_fn
xlWbk.SaveAs(f"{cwd}/{out_filename}", 51)
xlWbk.Close(True)
xlApp.Quit()
except Exception as e:
print(e)
finally:
xlWbk = xlApp = None
del xlWbk; del xlApp
# READ EXCEL FILE
if os.path.exists(f"{cwd}/{out_filename}"):
output_df = pd.read_excel(f"{cwd}/{out_filename}", skiprows = 3)
print(output_df)
else:
print(f"Output file {cwd}/{out_filename} Not Found")
I'm a beginner at Python and this site. Sorry if this might be simple.
I have modified a python script that calculates the amount of words in a pdf file "Master.pdf" an writes the time and date plus the amount of words to a .txt file.
I have Python2.7 installed, I have installed Anancoda and I am using the PyCharm editor. When I open my PyCharm editor and run this script, no problems arise, the script executes and everything works.
As I would like this script to run every 15 minutes, I have made it a task using Task Scheduler. The task is "Start a program" the program is:
- C:\Users\alkare\AppData\Local\Continuum\anaconda2\python.exe - and the argument is - "C:/Users/alkare/Desktop/Report/WordCount.py" -.
whenever it runs I see the command prompt open, some text fly across my screen and then the command line terminal closes, BUT no changes are done to my .txt file.
here is the code I am using saved as "WordCount.py":
#!/usr/bin/env python2.7
import os
import sys
import re
import datetime
import PyPDF2
def getPageCount(pdf_file):
pdfFileObj = open(pdf_file, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
pages = pdfReader.numPages
return pages
def extractData(pdf_file, page):
pdfFileObj = open(pdf_file, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
pageObj = pdfReader.getPage(page)
data = pageObj.extractText()
return data
def getWordCount(data):
data = data.split()
return len(data)
def main():
pdfFile = 'Master.pdf'
# get the word count in the pdf file
totalWords = 0
numPages = getPageCount(pdfFile)
for i in range(numPages):
text = extractData(pdfFile, i)
totalWords += getWordCount(text)
Now = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
f = open("TrackingTimeData.txt", "a")
f.write(Now[0:4] + "\t" + Now[4:6] + "/" + Now[6:8] + "\t" + Now[9:11] + ":" + Now[11:13] + "\t" + str(totalWords) + "\n")
f.close()
if __name__ == '__main__':
main()
The problem is that you are allowing the program to fail without providing you any meaningful output (it sounds like it hits an exception and closes).
Instead of just calling main() without guarding it in a try block:
if __name__ == '__main__':
main()
give yourself some slack here to gather information:
if __name__ == '__main__':
try:
main()
except Exception as e:
print("Error {}".format(e))
# drop into a command-prompt debugger:
import pdb
pdb.set_trace()
# slightly more old-school, pause the window to read the exception:
import time
time.sleep(15)
# throwback to DOS windows
import os
os.system('pause')
# read the error, come back to stackoverflow and describe the problem more, etc.
For example, mixing this with task scheduler, you'd want to right-click on your python.exe in Windows, go to properties, set "Run as Administrator" because maybe you're getting an access denied trying to read/write to a .PDF in some special directory. This is just an example of the many guesses people could throw in to randomly help you solve an issue versus knowing exactly what the error is.
I have a Python that is going to read every x seconds a CSV file.
What I do is:
Open the file, read the info as CSV, loop every entry
This is done in this Python file:
import csv
import time
import datetime
CSV_PLAN = "./XoceKochPlan.csv"
chargePlanFile = open(CSV_PLAN, 'rt')
def loopMe():
try:
for eachRow in reader:
print (eachRow)
except Exception, ex:
print ("Error processFileing the Thread" + str(ex))
print ("opening file " + str(CSV_PLAN))
now = datetime.datetime.utcnow().strftime("%a %b %d %H %M %S %Z %Y")
print ("Now " + str(now))
reader = csv.reader(chargePlanFile)
loopMe()
The output is so far so good.
But if I do:
loopMe()
time.sleep(10)
loopMe()
then the file is only printed once!
The question is Why?
What am I missing? What is getting internally consumed, or is the reader just empty after the first loop?
In python the file io handler has an internal pointer. After reading the file it will be at the end of the csv file. Ensure you call the chargePlanFile.close() method and reopen the file before calling the loopme() function. Or use the chargePlanFile.seek(0) to reset the position of the internal pointer.
When you start the second loop, your reader is already at the last line. You should reassign the reader. You should do it inside your loopMe function at the beginning.
def loopMe():
reader = csv.reader(chargePlanFile)
try:
for eachRow in reader:
print (eachRow)
except Exception, ex:
print ("Error processFileing the Thread" + str(ex))
If you would keep the same code, just add reader.seek(0) in the first line of loopMe
I am getting a TypeError: object of type file' has no len()
I have traced down the issue to the path established upon execution.
What am I missing to correct this error found within the "savePath" deceleration or usage within the "temp = os.path.join(savePath, files)"?
def printTime(time):
savePath = "C:\Users\Nicholas\Documents"
files = open("LogInLog.txt", "a")
temp = os.path.join(savePath, files)
files.write("A LogIn occured.")
files.write(time)
print files.read
files.close
main()
The whole program is below for reference:
from time import strftime
import os.path
def main():
getTime()
def getTime():
time = strftime("%Y-%m-%d %I:%M:%S")
printTime(time)
def printTime(time):
savePath = "C:\Users\Nicholas\Documents"
files = open("LogInLog.txt", "a")
temp = os.path.join(savePath, files)
files.write("A LogIn occured.")
files.write(time)
print files.read
files.close
main()
Here's a working version:
from time import strftime
import os.path
def main():
getTime()
def getTime():
time = strftime("%Y-%m-%d %I:%M:%S")
printTime(time)
def printTime(time):
savePath = "C:\Users\Nicholas\Documents"
logFile = "LogInLog.txt"
files = open(os.path.join(savePath, logFile), "a+")
openPosition = files.tell()
files.write("A LogIn occured.")
files.write(time)
files.seek(openPosition)
print(files.read())
files.close()
if __name__ == '__main__':
main()
There were a few problems with the code snippet posted in the question:
Two import statements were concatenated together. Each should be on a separate line.
The os.path.join function doesn't work on an open filehandle.
The read() and close() methods were missing parens.
If the intent is to read what is written in append mode, it's necessary to get the current file position via tell() and seek() to that position after writing to the file.
While it's legal to call main() without any conditional check, it's usually best to make sure the module is being called as a script as opposed to being imported.
I want to create a logfile that adds every time an error occurs a new line to a textfile log.txt. I am pretty new to python, so maybe I miss something...but everytime an error occurs, the log.txt is overwritten and only the current error message is displayed although the error message is different every time (due to timestamp) and I added a \n.
Thats my code so far:
import os
import sys
import time
import datetime
try:
path = sys.argv[1]
ts = time.time()
sttime = datetime.datetime.fromtimestamp(ts).strftime('%Y%m%d_%H:%M:%S - ')
#some more things but nothing else of interest for here
except:
error = "ERROR! No file 'bla' found!"
log = 'log.txt'
logfile = file(log, "w")
logfile.write(sttime + error + '\n')
logfile.close()
sys.exit(0)
Maybe you can help me out here. Do I need a loop somewhere? I tried to create an empty string (error = "") that adds the error message to log.txt with += each time an error occurs, but that didn't work at all :-/
Thank you!
Open the file in append mode as 'w' mode will truncate the file each time., i.e
logfile = open(log, "a")
And you should use with:
with open(log, 'a') as logfile:
logfile.write(sttime + error + '\n')
No need to close the file, this will happen automatically.
Note that if the exception is raised at path = sys.argv[1], the timestamp might not be set when you try to log. It would be better to get the timestamp in the logging code.
Also, you should not use a bare except clause, but at least catch the exception and report it.
from datetime import datetime
except Exception, exc:
sttime = datetime.now().strftime('%Y%m%d_%H:%M:%S - ')
error = "ERROR! {}".format(exc)
log = 'log.txt'
with open(log, 'a') as logfile:
logfile.write(sttime + error + '\n')
raise
# sys.exit(0)
When you do file(log, 'W'). The file log will become empty. If you want to add something you should use a instead of w:
open(log, "a")
class Logger(object):
def __init__(self, n):
self.n = n
self.count = 0
self.log = open('log.txt', 'a')
def write(self, message):
self.count+=1
if self.count<self.n:
self.log.write("%s %s"% (time,message))
self.log.flush()
import sys
sys.stdout= Logger()
time -- is time string formatted the way you want.
Now regular print function will write to file.