Why is Python running a different script? - python

I'm in Windows and I have a script called csv.py, I recently installed Pandas, and created anotherscript.py. The only code I have under anotherscript.py is import pandas.
When I run py anotherscript.py all it is doing is running csv.py. I have renamed csv.py to something else and it is still getting called.
If I removed import pandas, it works. If I move anotherscript.py to a different folder it works fine. It looks like something is cached.
What am I missing???
anotherscript.py
import pandas
cmd call and output
C:\Users\*****>py anotherscript.py
0 634
1 Saturday, January 8, 2022
2 15:00 EST
.
.
.
<cal file created and uploaded>
csv.py
This script scrapes a webpage and creates a calendar file
import openpyxl
from openpyxl import load_workbook
from ics import Calendar, Event
from datetime import datetime
import pytz
from ftplib import FTP
import ftplib
import urllib.request
import requests
response = requests.post("urlRetrated")
with open('u7.xlsx', 'wb') as s:
s.write(response.content)
wb_obj = openpyxl.load_workbook('u7.xlsx')
worksheet = wb_obj.active
data = []
c = Calendar()
EST = pytz.timezone('US/Eastern')
for count, row_cells in enumerate(worksheet.iter_rows(min_row=2,values_only=True)):
for count, cell in enumerate(row_cells):
data.append(cell)
date_and_time = data[1] + " " + data[2].strip('EST ')
game_datetime = datetime.strptime(date_and_time, '%A, %B %d, %Y %H:%M')
if 'SoccerTeam' in data[3]:
data[3] = 'William'
if 'SoccerTeam' in data[5]:
data[5] = 'William'
game_title = data[3] + " Vs " + data[5]
game_location = data[6]
e = Event()
e.name = game_title
e.begin = game_datetime.replace(tzinfo=EST)
e.location = game_location
e.created = datetime.today()
c.events.add(e)
with open('marcos.ics', 'w', newline='') as f:
f.write(str(c))
f.close()
for index, value in enumerate(data):
print(index, value)
data = []
user = '****'
pas = '*****'
try:
ftp = ftplib.FTP('*****', user, pas)
print(ftp.getwelcome())
ftp.cwd('public_html')
file = open('will.ics','rb')
ftp.storbinary('STOR will.ics', file)
file.close()
ftp.quit()
except ftplib.error_perm as error:
if error:
print ('Login Failed')
Thanks

Related

Why do these buttons open the same file regardless of which on I select?

I'm making a pdf 'date checker' in Python which tells me if every page of the pdf has tomorrows date at the top (for checking newspapers as part of my job).
So far so good until I attempted to put it all into a GUI, the buttons display the correct filename, but only open and check the last file in he list the buttons were generated from 'Files[i]'.
Can anybody figure out from my horrible nooby code why this is happening? please excuse the mess (I'm new) :)
Here is my ugly code :) I think the issue is either where I open the file using
'with open(files[i])' or 3rd line from the bottom where the buttons are created.
Any help would be greatly appreciated, thank you.
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
folder_path = 'C:users/axlra/documents/datechecker'
for filename in glob.glob(os.path.join(folder_path, '*.pdf')):
with open(files[i], 'r') as f:
text = f.read()
print (files[i])
print (len(text))
def checknow():
tomorrow = (datetime.now() + timedelta(days=1)).strftime("%d-%m-%Y")
file = open(files[i], 'rb')
reader = PdfReader(files[i])
total = len(reader.pages)
for x in range(total+1):
if x > total: file.close()
page = reader.pages[0]
found = False
text = (page.extract_text())
parts = []
def visitor_body(text, cm, tm, fontDict, fontSize):
y = tm[5]
if y > 1600 and y < 10000:
parts.append(text)
page.extract_text(visitor_text=visitor_body)
text_body = "".join(parts)
#print(text_body)
word = text_body
word=word[22:-1]
#print(word)
prodate = parse(word)
str_date = prodate.strftime("%d-%m-%Y")
print(str_date)
print(files[i])
if tomorrow in str_date:
found = True
if found:
#print(x)
print("Tomorrow's date was found on page"+ " "+str(x))
else:
#print(x)
print("Tomorrow's date was NOT found on page"+ " "+str(x))
location = os.getcwd() # get present working directory location here
counter = 0 #keep a count of all files found
files = [] #list to store all pdf files found at location
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
root = Tk()
btn = [] #creates list to store the buttons ins
for i in range(counter): #this just popultes a list as a replacement for the actual inputs for troubleshooting purposes
files.append(str(i))
for i in range(len(files)): #this says for *counter* in *however many elements there are in the list files*
#the below line creates a button and stores it in an array we can call later, it will print the value of it's own text by referencing itself from the list that the buttons are stored in
btn.append(Button(root, text=files[i], command=checknow))
btn[i].pack() #this packs the buttons
root.mainloop()
Based off the given solutions, this is the working code, the solution was to completely get rid of the 'i list' and just use file_path:
import os
import tkinter as tk
from tkinter import messagebox
import os, glob
import fileinput
import tkinter as tk
import dateutil
import datetime
from dateutil.relativedelta import *
from dateutil.easter import *
from dateutil.parser import *
from dateutil.rrule import *
import PyPDF2
from PyPDF2 import PdfReader
from datetime import datetime, timedelta
from tkinter import *
import re
location = os.getcwd()
counter = 0
files = []
for file in os.listdir(location):
try:
if file.endswith(".pdf"):
print ("pdf file found:\t", file)
files.append(str(file))
counter = counter
except Exception as e:
raise e
print ("No files found here!")
tomorrow = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow = tomorrow.replace(" ", "")
tomorrow2 = (datetime.now() + timedelta(days=-1)).strftime("%d.%m.%Y")
tomorrow2 = tomorrow.replace(" ", "")
tomorrow3 = (datetime.now() + timedelta(days=-1)).strftime("%A%e%B%Y")
tomorrow3 = tomorrow.replace(" ", "")
tomorrow4 = (datetime.now() + timedelta(days=-1)).strftime("%A,%B%e")
tomorrow4 = tomorrow.replace(" ", "")
tomorrow5 = (datetime.now() + timedelta(days=-1)).strftime("%A,%e%B")
tomorrow5 = tomorrow.replace(" ", "")
def open_pdf(file_path):
file = open(file_path, 'rb')
reader = PdfReader(file)
total = len(reader.pages)
for x in range(total):
if x > x: file.close()
page = reader.pages[x]
text = (page.extract_text())
text = text.replace(" ", "")
#print(text)
substring = tomorrow
first_index = text.find(substring)
if first_index != -1:
second_index = text.find(substring, first_index + len(substring))
if second_index != -1:
print("Tomorrows date "+ tomorrow+ " appears twice on page"+ " "+str(x).format(substring))
else:
print("Tomorrows date "+ tomorrow+ " appears only once on page"+ " "+str(x)+" -".format(substring))
else:
print("Tomorrows date "+ tomorrow+ " does not appear on page"+ " "+str(x)+" ---".format(substring))
def create_buttons(directory):
for filename in os.listdir(directory):
if filename.endswith(".pdf"):
file_path = os.path.join(directory, filename)
button = tk.Button(root, text=filename, command=lambda f=file_path: open_pdf(f))
button.pack()
root = tk.Tk()
create_buttons(os.getcwd())
root.mainloop()
The basic answer is that at the end of for i in range(len(files)) the i does not get dereference like it does in some languages. A simple test to do is that this will give you an i of 2.
for i in range(3):
pass
print(i)
So when you call checknow() the referenced file would be the last file in files since your i doesn't change after the loop.
Something I've done in the past is create a class encompassing it so that each one holds to their own references. I did it without subclassing the tkinter class, but you could. A sample for an idea of what I did is
class FileButton:
def checknow(self):
file_name = self._file_name
#as an example of how you can reference the file_name.
#you can also do this by doing self._button.cget("text") and not have to store file_name
pass
def __init__(self, root, file_name):
self._root = root
self._file_name = file_name
self._button = tkinter.Button(root, text=file_name, command=self.checknow)
self._button.pack()
for i in range(len(files)):
btn.append(FileButton(root, files[i]))
I haven't tested this particular code, and my previous uses were more for labels and entries, but the principle of it was the same and I can confirm that using the callback in this manner worked. Also, if you don't need to reference the buttons anymore you don't have to append them to the btn list either.

Python Progress Bar in nested loop

I'm translating some linux log data to a CSV for data analytics. Some of the instructions take some time so, I thought I would put in a progress bar for each file that is being translated. However, when putting in a progress bar with either the progresspar2 or tqdm, my pandas dataframes and null. There's no data at all. When I remove the progress bar, everything works as it should.
Here is my CSV translating function:
import pandas as pd
from dateutil import parser
from tqdm import trange
import os
import glob
import csv
import socket
def logsToCSV():
print("[+] Translating log to CSV")
log_file = open(CSV_FILE_PATH, "w", newline='')
csv_w = csv.writer(log_file)
for filename in glob.glob(os.path.join(LOGS_FILE_PATH, '*.txt')): # Find all files in path with .txt
data_file = open(filename, "r")
file_length = len(data_file.readlines())
for i in trange(file_length, desc='loop', leave=False): # Progress Bar Via TQDM
for new_line in data_file:
new_line = line.strip().split(" ")
date = str("%s %s %s" % (new_line[0], new_line[1], new_line[2])).strip()
date = parser.parse(date)
ip =str(new_line[5]).partition("/")
ip = str(ip[0]).strip()
try:
url = str(new_line[7]).strip()
except:
url = None
csv_w.writerow([date,ip,url])
TQDM is breaking something or I am implementing it incorrectly.
EDIT 1:
I figured it out. I was exhausting the file read during my readlines() to get the length. This works:
def logsToCSV():
print("[+] Translating log to CSV")
log_file = open(CSV_FILE_PATH, "w", newline='')
csv_w = csv.writer(log_file)
path, dirs, files = next(os.walk(LOGS_FILE_PATH))
log_num = len(files)
print(log_num)
for filename in glob.glob(os.path.join(LOGS_FILE_PATH, '*.txt')): # Find all files in path with .txt
data_file = open(filename, "r")
with open(filename, "r") as f:
file_length = len(f.readlines())
f.close()
pbar = tqdm(total=file_length)
for line in data_file:
new_line = line.strip().split(" ")
date = str("%s %s %s" % (new_line[0], new_line[1], new_line[2])).strip()
date = parser.parse(date)
ip =str(new_line[5]).partition("/")
ip = str(ip[0]).strip()
try:
url = str(new_line[7]).strip()
except:
url = None
csv_w.writerow([date,ip,url])
pbar.update(1)
pbar.close()
you can apply tqdm for your main loop :
from tqdm import tqdm
for i in tqdam(condition):

How to get the yahoo finance csv directly into python

Does anybody know how to get yahoo finance csv directly into python?
The problem is that when i try to get the data with this (example) link:
http://real-chart.finance.yahoo.com/table.csv?s=WU&a=4&b=20&c=2015&d=05&e=21&f=2016&g=d&ignore=.csv'
It gives a pop-up asking if i want to download the csv-file. This causes it to bugg when i try to read it in to python. My scripts is:
today = datetime.date.today()
def get_url(stock='GOOG', START_date = str(int(str(today).split('-')[0])-1)+
'-' +str(int(str(today).split('-')[1])-1) + ('-') +
str(int(str(today).split('-')[2])-1), END_date= str(today)):
baseurl = 'http://real-chart.finance.yahoo.com/table.csv?'
stock = 's=WU'
FROM_date = ('&a=' + START_date.split('-')[1] + '&b=' +
START_date.split('-')[2] + '&c=' +
START_date.split('-')[0])
TO_date = ('&d=' + END_date.split('-')[1] + '&e=' +
END_date.split('-')[2] + '&f=' + END_date.split('-')[0])
url = baseurl + stock + FROM_date + TO_date + '&g=d&ignore=.csv'
return url
rawdate = []
with open(get_url()) as csvfile:
reader = csv.reader(csvfile, delimiter = ",")
for row in reader:
rawdata.append(row)
If i download the csv first i can read it into python, but I want to get to access the csv file directly without having to download it first. Is this possible? alternatively have the csv as temp.
Thanks!
I would recommend that you use pandas. Here is a link.
import pandas.io.data as web
import datetime
start = datetime.datetime(2010, 1, 1)
end = datetime.datetime(2013, 1, 27)
f = web.DataReader("F", 'yahoo', start, end)
f.ix['2010-01-04']
Out[6]:
Open 1.017000e+01
High 1.028000e+01
Low 1.005000e+01
Close 1.028000e+01
Volume 6.085580e+07
Adj Close 8.755953e+00
Name: 2010-01-04 00:00:00, dtype: float64
Try it this way.
in this file "C:/Users/your_path/Desktop/symbols/tickers.txt"
you have the following tickers
ibm
sbux
msft
"""
import urllib
import re
import json
symbolslist = open("C:/Users/rshuell001/Desktop/symbols/tickers.txt").read()
symbolslist = symbolslist.split("\n")
for symbol in symbolslist:
myfile = open("C:/Users/rshuell001/Desktop/symbols/" +symbol +".txt", "w+")
myfile.close()
htmltext = urllib.urlopen("http://www.bloomberg.com/markets/chart/data/1D/"+ symbol+ ":US")
data = json.load(htmltext)
datapoints = data["data_values"]
myfile = open("C:/Users/rshuell001/Desktop/symbols/" +symbol +".txt", "a")
for point in datapoints:
myfile.write(str(symbol+","+str(point[0])+","+str(point[1])+"\n"))
myfile.close()
That should give you what you want.

Python Win32Com - ExportAsFixedFormat - MS Excel 2010 file

I have spent the day trying to figure out how to export out a MS Excel File as a PDF. I am in desperate need of someone smarter than I:
Here is what I have so far and the error I get:
import os
import win32com.client
import win32com.client.dynamic
import datetime
import time
#
#Path to Read from where you want all the files read from
InputWkbkPath = "O:/MIS/Reporting/Field Reports/2014_Template_Files/w_code/"
obj = win32com.client.Dispatch("Outlook.Application")
xlApp = win32com.client.DispatchEx('Excel.Application')
OutputWkbkPath ='O:/MIS/Reporting/Field Reports/2015_Template_Files/Directors /Templates/20150123_Archive/'
for subdir, dirs, files in os.walk(InputWkbkPath):
for file in files:
#print os.path.join(subdir, file)
ip= os.path.join(subdir, file)
xlwb= xlApp.Workbooks.Open(ip)
#print xlwb
currentyear = datetime.date.today().strftime("%Y")
currentmonth = datetime.date.today().strftime("%B")
currentday = datetime.date.today().strftime("%d")
currentdate = currentmonth+"-"+currentday+"-"+currentyear
participant = xlwb.Worksheets(1).Range("C4").Value
title = xlwb.Worksheets(1).Range("C5").Value
StaffCode = xlwb.Worksheets(1).Range("C6").Value
OfficeName = xlwb.Worksheets(1).Range("C7").Value
LOCode = xlwb.Worksheets(1).Range("C8").Value
Region = xlwb.Worksheets(1).Range("C9").Value
ESN = str(xlwb.Worksheets(1).Range("C10").Value)
ParticipantEmail= xlwb.Worksheets(1).Range("C11").Value
MDEmail= xlwb.Worksheets(1).Range("C12").Value
RVPEmail = xlwb.Worksheets(1).Range("C13").Value
if title == "Director" or title == "DIRECTOR":
FileName = LOCode+"_"+participant+"_"+ESN+"_Comp_Model_"+currentdate+".xlsx"
#print FileName
else:
FileName = Region+"_"+LOCode+"_"+participant+"_"+ESN+"_Comp_Model_"+currentdate+".pdf"
OutputFile=OutputWkbkPath+FileName
xlwb.Worksheets(1).Activate
#print OutputFile
ws=xlwb.Worksheets(1)
ws.Visible = 1
xlwb.ExportAsFixedFormat(Type="xlTypePDF",OutputFile)
xlwb.Close(True)
I get the following error:
C:\Python27\python.exe C:/Users/username/PycharmProjects/File_Names/Loop_Throug_Open.py
File "C:/Users/username/PycharmProjects/File_Names/Loop_Throug_Open.py", line 55
xlwb.ExportAsFixedFormat(Type="xlTypePDF",OutputFile)
SyntaxError: non-keyword arg after keyword arg
Process finished with exit code 1
Please help. I can not find any on it in the groups.
Thank you ahead of time.
Robert
The problem was in the ExportAsFixedFormat method:
I changed to the following:
xlwb.ExportAsFixedFormat(0, OutputFile)
I also had to put the path with double regular slashes. So the outputWkbkPath looks like the following:
OutputWkbkPath ='O:\\MIS/Reporting\\Field Bonus Plan Reports\\2015__Files\\ DirectorsTemplates\\20150123_Archive\\'
I hope this helps someone else. The following post actually got me there:
.xlsx and xls(Latest Versions) to pdf using python
It is not the same package/module but that part works.

Python FTP get the most recent file by date

I am using ftplib to connect to an ftp site. I want to get the most recently uploaded file and download it. I am able to connect to the ftp server and list the files, I also have put them in a list and got the datefield converted. Is there any function/module which can get the recent date and output the whole line from the list?
#!/usr/bin/env python
import ftplib
import os
import socket
import sys
HOST = 'test'
def main():
try:
f = ftplib.FTP(HOST)
except (socket.error, socket.gaierror), e:
print 'cannot reach to %s' % HOST
return
print "Connect to ftp server"
try:
f.login('anonymous','al#ge.com')
except ftplib.error_perm:
print 'cannot login anonymously'
f.quit()
return
print "logged on to the ftp server"
data = []
f.dir(data.append)
for line in data:
datestr = ' '.join(line.split()[0:2])
orig-date = time.strptime(datestr, '%d-%m-%y %H:%M%p')
f.quit()
return
if __name__ == '__main__':
main()
RESOLVED:
data = []
f.dir(data.append)
datelist = []
filelist = []
for line in data:
col = line.split()
datestr = ' '.join(line.split()[0:2])
date = time.strptime(datestr, '%m-%d-%y %H:%M%p')
datelist.append(date)
filelist.append(col[3])
combo = zip(datelist,filelist)
who = dict(combo)
for key in sorted(who.iterkeys(), reverse=True):
print "%s: %s" % (key,who[key])
filename = who[key]
print "file to download is %s" % filename
try:
f.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
except ftplib.err_perm:
print "Error: cannot read file %s" % filename
os.unlink(filename)
else:
print "***Downloaded*** %s " % filename
return
f.quit()
return
One problem, is it possible to retrieve the first element from the dictionary? what I did here is that the for loop runs only once and exits thereby giving me the first sorted value which is fine, but I don't think it is a good practice to do it in this way..
For those looking for a full solution for finding the latest file in a folder:
MLSD
If your FTP server supports MLSD command, a solution is easy:
entries = list(ftp.mlsd())
entries.sort(key = lambda entry: entry[1]['modify'], reverse = True)
latest_name = entries[0][0]
print(latest_name)
LIST
If you need to rely on an obsolete LIST command, you have to parse a proprietary listing it returns.
Common *nix listing is like:
-rw-r--r-- 1 user group 4467 Mar 27 2018 file1.zip
-rw-r--r-- 1 user group 124529 Jun 18 15:31 file2.zip
With a listing like this, this code will do:
from dateutil import parser
# ...
lines = []
ftp.dir("", lines.append)
latest_time = None
latest_name = None
for line in lines:
tokens = line.split(maxsplit = 9)
time_str = tokens[5] + " " + tokens[6] + " " + tokens[7]
time = parser.parse(time_str)
if (latest_time is None) or (time > latest_time):
latest_name = tokens[8]
latest_time = time
print(latest_name)
This is a rather fragile approach.
MDTM
A more reliable, but a way less efficient, is to use MDTM command to retrieve timestamps of individual files/folders:
names = ftp.nlst()
latest_time = None
latest_name = None
for name in names:
time = ftp.voidcmd("MDTM " + name)
if (latest_time is None) or (time > latest_time):
latest_name = name
latest_time = time
print(latest_name)
For an alternative version of the code, see the answer by #Paulo.
Non-standard -t switch
Some FTP servers support a proprietary non-standard -t switch for NLST (or LIST) command.
lines = ftp.nlst("-t")
latest_name = lines[-1]
See How to get files in FTP folder sorted by modification time.
Downloading found file
No matter what approach you use, once you have the latest_name, you download it as any other file:
with open(latest_name, 'wb') as f:
ftp.retrbinary('RETR '+ latest_name, f.write)
See also
Get the latest FTP folder name in Python
How to get FTP file's modify time using Python ftplib
Why don't you use next dir option?
ftp.dir('-t',data.append)
With this option the file listing is time ordered from newest to oldest. Then just retrieve the first file in the list to download it.
With NLST, like shown in Martin Prikryl's response,
you should use sorted method:
ftp = FTP(host="127.0.0.1", user="u",passwd="p")
ftp.cwd("/data")
file_name = sorted(ftp.nlst(), key=lambda x: ftp.voidcmd(f"MDTM {x}"))[-1]
If you have all the dates in time.struct_time (strptime will give you this) in a list then all you have to do is sort the list.
Here's an example :
#!/usr/bin/python
import time
dates = [
"Jan 16 18:35 2012",
"Aug 16 21:14 2012",
"Dec 05 22:27 2012",
"Jan 22 19:42 2012",
"Jan 24 00:49 2012",
"Dec 15 22:41 2012",
"Dec 13 01:41 2012",
"Dec 24 01:23 2012",
"Jan 21 00:35 2012",
"Jan 16 18:35 2012",
]
def main():
datelist = []
for date in dates:
date = time.strptime(date, '%b %d %H:%M %Y')
datelist.append(date)
print datelist
datelist.sort()
print datelist
if __name__ == '__main__':
main()
I don't know how it's your ftp, but your example was not working for me. I changed some lines related to the date sorting part:
import sys
from ftplib import FTP
import os
import socket
import time
# Connects to the ftp
ftp = FTP(ftpHost)
ftp.login(yourUserName,yourPassword)
data = []
datelist = []
filelist = []
ftp.dir(data.append)
for line in data:
col = line.split()
datestr = ' '.join(line.split()[5:8])
date = time.strptime(datestr, '%b %d %H:%M')
datelist.append(date)
filelist.append(col[8])
combo = zip(datelist,filelist)
who = dict(combo)
for key in sorted(who.iterkeys(), reverse=True):
print "%s: %s" % (key,who[key])
filename = who[key]
print "file to download is %s" % filename
try:
ftp.retrbinary('RETR %s' % filename, open(filename, 'wb').write)
except ftplib.err_perm:
print "Error: cannot read file %s" % filename
os.unlink(filename)
else:
print "***Downloaded*** %s " % filename
ftp.quit()

Categories

Resources