Python 3: urlextract package, PermissionError - python

I am using Windows 10 x64, with Python 3.6.1 x86.
I have this script from a few months ago which was working fine, but right now it gives me a weird error. The script is a simple one that extract URLs from tweets saved in .csv files.
This is the script:
import datetime
from urlextract import URLExtract
twitter_files_list = ['File1.csv', 'File2.csv', 'File3.csv']
input_path = my_path
# Find domain of URL
def find_domain(url):
return url.split("//")[-1].split("/")[0]
# Clean domain from useless chars
def clean_domain(domain):
domain = domain.replace("[", "")
domain = domain.replace("]", "")
domain = domain.replace("\'", "")
return domain
# Extract URLs from Tweets
def url_extract(filename):
print('\n' + filename + ':')
url_counter = 0
url_file = open('extracted_urls/urls_' + filename, 'a')
# Open file
f = open(input_path + filename, "r", encoding="utf8")
lines = f.readlines()
# Search for contents of column "text"
text = []
for x in lines:
text.append(x.split('\t')[4])
# Close file
f.close()
extractor = URLExtract()
for i in range(len(text)):
try:
if extractor.find_urls(text[i]): # Check if URL exists
url = extractor.find_urls(text[i])
domain = find_domain(str(url))
if not " " in domain:
url_file.write(str(clean_domain(domain)) + "\n")
url_counter += 1
except 'Not Found':
continue
url_file.close()
# Main
if __name__ == '__main__':
print('\nURL Characterization:\n')
# Start timer
start = datetime.datetime.now()
# Find the unique usernames for every file
for twitter_file in twitter_files_list:
print('Searching ' + str(twitter_file) + '...')
url_extract(twitter_file)
# End timer
end = datetime.datetime.now()
# Print results
print("\nProcess finished")
print("Total time: " + str(end - start))
This gives me the following error:
Traceback (most recent call last):
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 77, in <module>
url_extract(twitter_file)
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 50, in url_extract
extractor = URLExtract()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\urlextract.py", line 65, in __init__
if not self._download_tlds_list():
File "C:\Program Files (x86)\Python36-32\lib\site-packages\urlextract.py", line 114, in _download_tlds_list
with open(self._tld_list_path, 'w') as ftld:
PermissionError: [Errno 13] Permission denied: 'C:\\Program Files (x86)\\Python36-32\\lib\\site-packages\\.tlds'
I have no idea how to interpret this.

you can try run the script as administrator

Related

KeyError: 'pdf' showing now; code was working previously - why?

Getting the following error:
Traceback (most recent call last):
File "test.gyp", line 37, in <module>
for x in url_list["pdf"]:
KeyError: 'pdf'
When previously code was working fine. Aside from shifting what directory the actual .gyp file was in temporarily, I did not alter code. Any clues as to why this has suddenly become an issue?
#!/usr/bin/env python3
import os
import glob
import pdfx
import wget
import urllib.parse
import requests
## Accessing and Creating Six Digit File Code
pdf_dir = "./"
pdf_files = glob.glob("%s/*.pdf" % pdf_dir)
for file in pdf_files:
## Identify File Name and Limit to Digits
filename = os.path.basename(file)
newname = filename[0:6]
## Run PDFX to identify and download links
pdf = pdfx.PDFx(filename)
url_list = pdf.get_references_as_dict()
attachment_counter = (1)
for x in url_list["url"]:
if x[0:4] == "http":
parsed_url = urllib.parse.quote(x)
extension = os.path.splitext(x)[1]
r = requests.get(x)
with open('temporary', 'wb') as f:
f.write(r.content)
##Concatenate File Name Once Downloaded
os.rename('./temporary', str(newname) + '_attach' + str(attachment_counter) + str(extension))
##Increase Attachment Count
attachment_counter += 1
for x in url_list["pdf"]:
if x[0:4] == "http":
parsed_url = urllib.parse.quote(x)
extension = os.path.splitext(x)[1]
r = requests.get(x)
with open('temporary', 'wb') as f:
f.write(r.content)
##Concatenate File Name Once Downloaded
os.rename('./temporary', str(newname) + '_attach' + str(attachment_counter) + str(extension))
##Increase Attachment Count
attachment_counter += 1
Here is one little snippet from when I had it print out my overall url_list, and you can see that it IS adding items to the dictionary (edited here for privacy) flagged as 'pdf' - so I'm truly at a loss as to why it eventually gives me the error.
'pdf': ['URLSHOWSHERE.pdf']}
You are getting this error due to the fact that your dictionary url_list doesn't have any key named 'pdf'. Please check your dictionary atleast by explicitly printing it to get a glimpse of its content.

"[Errno 2] No such file or directory" Issue [duplicate]

This question already has answers here:
Trying to use open(filename, 'w' ) gives IOError: [Errno 2] No such file or directory if directory doesn't exist
(3 answers)
Closed 6 months ago.
So my prof. gave me this code as the solution of my homework but when I run it it gives me an error. Can you please help me out? I guess I didn't specify the location of the file but not sure if that's the case.The objective of this question is to generate and read files that contain a list of random numbers.
import random
import os
import time
def fillFile(fileSize, fileName):
# Delete file if exists
if os.path.exists(fileName):
os.remove(fileName)
# Open file
FILE = open(fileName, "w")
# Write to file
for i in range(fileSize):
r = random.randint(0,fileSize+1000)
FILE.write(str(r) + "\n")
FILE.close()
def readFile(fileName):
# Open file
if os.path.exists(fileName):
FILE = open(fileName,"r")
else:
print(fileName + " does not exist!")
exit()
# Read File
alist = []
for line in FILE:
alist.append(int(line))
FILE.close()
return alist
def mainForFiles():
# Dosyaları oluştur
fileSizes = [1000, 5000, 10000, 25000, 50000, 100000, 200000]
dirName = ".\\filesForAssignment1\\"
# Delete fileStats.txt file if exists
statFileName = "fileStats.txt"
if os.path.exists(statFileName):
os.remove(statFileName)
# open stat file
statFile = open(statFileName, "w")
statFile.write("fillFile")
print("WRITING TO FILES")
for i in fileSizes:
start = time.time()
fillFile(i, dirName+"file"+str(i))
finish = time.time()
statFile.write(" " + str(finish-start))
print("File Size = " + str(i) + " Write Time = " + str(finish-start))
statFile.write("\n")
print("READING FILES")
statFile.write("readFile")
for i in fileSizes:
fileName = dirName+"file"+str(i)
# Dosyayı oku
finish = time.time()
alist = readFile(fileName)
start = time.time()
statFile.write(" " + str(finish-start))
print ("File Size = " + str(i)+ " Dosya Okuma Zamanı = " + str(finish-start))
statFile.write("\n")
statFile.close()
mainForFiles()
File "C:/Users/emrea/PycharmProjects/helloworld/hello2.py", line 84, in
<module>
mainForFiles()
File "C:/Users/emrea/PycharmProjects/helloworld/hello2.py", line 57, in mainForFiles
fillFile(i, dirName+"file"+str(i))
File "C:/Users/emrea/PycharmProjects/helloworld/hello2.py", line 12, in fillFile
FILE = open(fileName, "w")
FileNotFoundError: [Errno 2] No such file or directory: '.\\filesForAssignment1\\file1000'
FileNotFoundError: [Errno 2] No such file or directory: '.\\filesForAssignment1\\file1000'
The w mode causes the file to be created if it doesn't exist (and truncated if it does so the os.remove is not actually useful there), however it does expect intermediate directories to exist.
This means you should ensure the path to the file ('.\\filesForAssignment1) does exist before trying to create the file.
os.makedirs(os.path.dirname(fileName), exists_ok=True)
should do the trick, or
pathlib.Path(fileName).parent.mkdir(parents=True, exists_ok=True)
for a somewhat more modern take on it.
There's a bunch of other minor issues in the script:
the main function should generally be "gated" so modules can be imported without running them
explicitly closing files has fallen out of favor as it's unreliable
when opening files in "text" mode (the default) you should always provide an encoding
pathlib is fun, also that way you should not have to deal with path separators and all that crap
unless it's required to handle that case, I'd just let open(fname, 'r') error out if the file doesn't exist
Here's a version I think should be slightly improved:
import pathlib
import random
import os
import time
def fillFile(fileSize, fileName):
with fileName.open('w', encoding='utf-8') as f:
for i in range(fileSize):
r = random.randint(0,fileSize+1000)
f.write(f"{r}\n")
def readFile(fileName):
with fileName.open(encoding='utf-8') as f:
return [int(line) for line in f]
OUT_DIR = pathlib.Path.cwd().joinpath("filesForAssignment1")
FILE_SIZES = [1000, 5000, 10000, 25000, 50000, 100000, 200000]
def mainForFiles():
# Dosyaları oluştur
OUT_DIR.mkdir(parents=True, exist_ok=True) # make sure the directory exists
statFilePath = pathlib.Path("fileStats.txt")
with statFilePath.open('w', encoding='utf-8') as statFile:
statFile.write("fillFile")
print("WRITING TO FILES")
for i in FILE_SIZES:
start = time.time()
fillFile(i, OUT_DIR.joinpath(f'file{i}'))
finish = time.time()
statFile.write(f" {finish-start}")
print(f"File Size = {i} Write Time = {finish-start})")
statFile.write("\n")
print("READING FILES")
statFile.write("readFile")
for i in FILE_SIZES:
f = OUT_DIR.joinpath(f'file{i}')
# Dosyayı oku
start = time.time()
alist = readFile(f)
finish = time.time()
statFile.write(f" {finish-start}")
print (f"File Size = {i} Dosya Okuma Zamanı = {finish-start}")
statFile.write("\n")
if __name__ == '__main__':
mainForFiles()
exit() is not doing what you want, it continues with the code.
def readFile(fileName):
# Open file
if os.path.exists(fileName):
FILE = open(fileName,"r")
else:
print(fileName + " does not exist!")
return
# Read File
alist = []
for line in FILE:
alist.append(int(line))
FILE.close()
return alist

IOError: [Errno 2] No such file or directory while using pysftp

I am trying to do a script to find files that contain a text string. The files are on a remote host but when I run it, I get an error
Traceback (most recent call last): File "dirsearch.py", line 55, in
fo = open(search_path + fname) IOError: [Errno 2] No such file or directory: u'/home/black/white/goto/reports/dbs-01-Apr-2017.log'
The script I used is below.
from __future__ import unicode_literals
from __future__ import division
from __future__ import absolute_import
from builtins import input
from builtins import open
from future import standard_library
standard_library.install_aliases()
import pysftp
#connect to sftp server
srv = pysftp.Connection(host="host", username="username",
password="password")
#acess remote directory on server
search_path = ('/home/black/white/goto/reports/')
file_type = '.log'
search_str = input("Enter the search string : ")
#addition ........or fname in os.listdir(path=search_path):
for fname in srv.listdir(search_path):
# Apply file type filter
if fname.endswith(file_type):
# Open file for reading
fo = open(search_path + fname)
# Read the first line from the file
line = fo.readline()
# Initialize counter for line number
line_no = 1
# Loop until EOF
while line != '' :
# Search for string in line
index = line.find(search_str)
if ( index != -1) :
print(fname, "[", line_no, ",", index, "] ", line, sep="")
# Read next line
line = fo.readline()
# Increment line counter
line_no += 1
# Close the files
fo.close()
srv.close()

tarfile.open overwrites the file everytime

I am trying to create python script to archive and compress one year old data in datewise tar file. Script is also generating log file of the archived files. I am using python 2.6 on linux.
Here is my code :
for search_date in dd_list:
tar_file = "/files/yearly_archive/nas_archive_" + search_date + ".tgz"
mytar = tarfile.open(tar_file,"w:gz")
log_file = "/files/yearly_archive/archive_log_" + search_date
fcount = 0
#print tar_file
#print log_file
f = open(log_file,'ab+')
for f_name, d_date in date_file_dict.iteritems():
if d_date == search_date:
#print f_name
fcount += 1
mytar.add(f_name)
f.write(f_name + '\n')
date_occur_dict[search_date] = fcount
mytar.close()
f.close()
Here log file is getting appended if it exists but tar file is getting overwritten every when I run the script. Is there a way I can make sure tar file gets appended if it exists otherwise gets created ?
Edit :
I tried to add code for ungzipping and adding but it is not working.
for search_date in dd_list:
tar_file = "/files/yearly_archive/nas_archive_" + search_date + ".tgz"
zip = 1
try:
with open(tar_file,'ab+'):
import gzip
d_tar = gzip.open(tar_file,'wb')
zip = 0
except IOError:
print "Creating new tar file"
if zip == 1:
mytar = tarfile.open(tar_file,"w:gz")
else:
mytar = tarfile.open(d_tar,"w")
log_file = "/files/yearly_archive/archive_log_" + search_date
fcount = 0
#print tar_file
#print log_file
f = open(log_file,'ab+')
for f_name, d_date in date_file_dict.iteritems():
if d_date == search_date:
#print f_name
fcount += 1
mytar.add(f_name)
f.write(f_name + '\n')
date_occur_dict[search_date] = fcount
mytar.close()
f.close()
I am getting following error:
Traceback (most recent call last):
File "sort_archive.py", line 63, in <module>
mytar = tarfile.open(d_tar,"w")
File "/usr/lib64/python2.6/tarfile.py", line 1687, in open
return cls.taropen(name, mode, fileobj, **kwargs)
File "/usr/lib64/python2.6/tarfile.py", line 1697, in taropen
return cls(name, mode, fileobj, **kwargs)
File "/usr/lib64/python2.6/tarfile.py", line 1518, in __init__
fileobj = bltn_open(name, self._mode)
TypeError: coercing to Unicode: need string or buffer, instance found
You cannot use tarfile to append to compressed tarballs. Either perform the decompress/compress steps separately, or don't use compression in the first place.
< strikethrough >
Did you try to change the mode ? I see w, which clearly overwrites the file. Try with a, or w+.
mytar = tarfile.open(tar_file,"w+:gz")
or
mytar = tarfile.open(tar_file,"a:gz")
< / strikethrough >

Python ioerror errno 2 no such file or directory pilutil

I'm having trouble running a python script using python 2.7.3 on lubuntu 12.10 run through virtual box on an older mac book (summer 2009, model 5,2 perhaps) with an intel processor.
The script seems to detect the video file I specify through the command line and I'm not exactly sure what is going wrong. Here is the trace back and the code and the code which causes the error.
1
src_dir = sys.argv[1]
out_dir = sys.argv[2]
bb_dir = sys.argv[3]
vid_name = sys.argv[4]
vid_src_name = src_dir + vid_name
vid_dest_name = out_dir + vid_name
vid_bb_name = bb_dir + vid_name
vid=video.asvideo(vid_src_name,down_factor)
2
if hasattr(video_source, 'shape') and hasattr(video_source, 'dtype'):
if len(video_source.shape) == 3:
video_source.shape = video_source.shape + (1,)
vshape = video_source.shape
vdtype = video_source.dtype
vid = Video(frames=vshape[0], rows=vshape[1], columns=vshape[2], bands=vshape[3], dtype=vdtype, initialise=False)
vid.V = video_source
return vid
if not os.path.exists(video_source):
raise IOError(video_source + ' not found')
(unusedfh,sample_filename) = tempfile.mkstemp()
cmd = 'ffmpeg -i "' + video_source + '" -vframes 1 ' + sample_filename+' %d' + image_ext + ' 2> /dev/null > /dev/null'
sample_filename = sample_filename + '1' + image_ext
subp.call(cmd, shell = True)
sample_img = sp.misc.imread(sample_filename)
3.
im = Image.open(name)
4.
if mode != "r":
raise ValueError("bad mode")
if isStringType(fp):
import __builtin__
filename = fp
fp = __builtin__.open(fp, "rb")
Output:
cody#cody-VirtualBox:~/Dropbox/annotation/Template_Extractor$ python template_extract_new.py "/home/cody/Work/annotate/UCF50/video/PushUps" "/home/cody/Dropbox/annotation/template/Pushups" "/home/cody/Dropbox/annotation/bb/PushUps" "/v_PushUps_g01_c01.avi"
Traceback (most recent call last):
File "template_extract_new.py", line 192, in <module>
vid=video.asvideo(vid_src_name,down_factor)
File "/home/cody/Dropbox/annotation/Template_Extractor/istare/video.py", line 135, in asvideo
sample_img = sp.misc.imread(sample_filename)
File "/usr/lib/python2.7/dist-packages/scipy/misc/pilutil.py", line 102, in imread
im = Image.open(name)
File "/usr/lib/python2.7/dist-packages/PIL/Image.py", line 1952, in open
fp = __builtin__.open(fp, "rb")
IOError: [Errno 2] No such file or directory: '/tmp/tmpklcTe61.bmp'
The file indicated in the error changes every time and is not there before running but is there after running the command.
I've tried running from uterm, uxterm, and lxterminal (knowing it wouldn't do anything but trying anyways, plus no copy and paste in uterm).

Categories

Resources