Getting data from the url using python and unzipped

Getting data from the url using python and unzipped - python

Problem: I wanted to get the data from the following url, however, I got the following error message.
I was wondering if you could guide me to fix my error. I appreciate your time!
import requests
import os
urls = {'1Q16':'https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip'}
if not os.path.isdir('data'):
os.system('mkdir data')
for file in urls.keys():
if not os.path.exists('data/' + file):
os.system('mkdir ./data/' + file)
print('Requesting response from: ' + urls[file])
req = requests.get(urls[file])
print('Writing response to: /data/' + file + '/' + file + '.zip')
with open('data/' + file + '/' + file + '.zip', 'wb') as f:
f.write(req.content)
os.system('unzip ' + 'data/' + file + '/' + file + '.zip -d data/' + file + '/')
print('Unzipping data...')
os.system('rm ' + 'data/' + file + '/' + file + '.zip')
print(file + ' complete.')
print('------------------------------------------------------------------------------- \n')
Error messegae
Requesting response from: https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip
Writing response to: /data/1Q16/1Q16.zip
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-9-251ee1e9c629> in <module>
9 req = requests.get(urls[file])
10 print('Writing response to: /data/' + file + '/' + file + '.zip')
---> 11 with open('data/' + file + '/' + file + '.zip', 'wb') as f:
12 f.write(req.content)
13
FileNotFoundError: [Errno 2] No such file or directory: 'data/1Q16/1Q16.zip'

The problem is your directory data/<file> is not being created and hence open() can not open a file since a part of path you provided does not exist. To ensure you have full compatibity while joining paths on python, you can use os.path.join(). For you, this would be:
import requests
import os
urls = {'1Q16':'https://f001.backblazeb2.com/file/Backblaze-Hard-Drive-Data/data_Q1_2016.zip'}
if not os.path.isdir('data'):
os.makedirs("data")
for file in urls.keys():
if not os.path.exists('data/' + file):
os.makedirs(os.path.join("data",file))
print('Requesting response from: ' + urls[file])
req = requests.get(urls[file])
print('Writing response to: /data/' + file + '/' + file + '.zip')
with open(os.path.join("data", file, file + '.zip', 'wb') as f:
f.write(req.content)

Related

I recieve an "OSError: [Errno 22] Invalid argument" when I'm trying to change a time format

When i use %H:%M:%S i get this Error. if im using it with dots everything is fine. Can someone explain why? and how to use colons?
import os, time, zipfile
source = ["C:\\Users\\Daniel\\Documents\\Projects", "C:\\Users\\Daniel\\Pictures\\pics"]
target_dir = 'D:\\Backup'
today = target_dir + os.sep + time.strftime('%Y.%m.%d')
comment = input('Enter a comment: ')
com = comment.replace(' ', '_') + '.zip'
now = time.strftime('%Y.%m.%d. %H:%M:%S')
if not os.path.exists(today):
os.mkdir(today)
print('Folder made successfully')
if len(comment) == 0:
target = today + os.sep + now + '.zip'
else:
target = today + os.sep + now + '_' + com
z = zipfile.ZipFile(target, 'w' )
def zip(c):
for a in c:
for folder, subfolder, files in os.walk(a):
for file in files:
z.write(os.path.join(folder, file))
zip(source)
z.close()
Traceback (most recent call last):
File "c:\Users\Daniel\Documents\Programming\backupscript.py", line 38, in
z = zipfile.ZipFile(target, 'w' )
File "C:\Users\Daniel\AppData\Local\Programs\Python\Python310\lib\zipfile.py", line 1249, in init
self.fp = io.open(file, filemode)
OSError: [Errno 22] Invalid argument: 'D:\Backup\2022.11.04\2022.11.04. 16:59:39_wa_w.zip'

The use of : on Windows is restricted. Also, the following are reserved characters:
< (less than)
> (greater than)
" (double quote)
/ (forward slash)
\ (backslash)
| (vertical bar or pipe)
? (question mark)
* (asterisk)
You can read more here

How to show python execution progress

I am trying to show the progress of csv download but I have no idea how to make it using the code bellow:
for url, filename in zip(urls, filenames):
r = requests.get(url, stream=True,verify=False)
with open(r'C:\Users\Lucas\output\ ' + filename + ' - ' + dia.zfill(2) + '_' + mes.zfill(2) + '_' + ano + '.csv', 'wb') as fd:
for chunk in r.iter_content(chunk_size=256):
fd.write(chunk)
How could I make that? Thanks for helping

what is proper syntax for this if statement?

I’m having a syntax error with an if statement. It was working correctly with a different version, but I’m writing to an output log and I didn’t like how it would output for every file it checked, I want it to only write once if the file exists or not.
The first code below is the one that is not working, it says the the third file is an undefined variable (fifth line of code).
The second code block is how it was working before.
Anyone know how to structure this?
if any(file.endswith('.ppt') for file in os.listdir(scanDestinationPath)):
os.startfile(machineFolderDir + machineType + '\\' +
partNumber + ' REV ' + revisionNumber + '\\' +
file, 'print')
errorLog = open(logBookDir + 'log.txt', 'a+')
errorLog.write('\nA setup sheet called PROG' + programNumber +
' ' + partNumber + ' ' + revisionNumber +
'.ppt was printed.\n')
errorLog.close()
else:
errorLog = open(logBookDir + 'log.txt', 'a+')
m = ('The exception occurred in printDecoSetupSheet().There does not appear '
f'to be a .ppt setup sheet file in folder {partNumber} {revisionNumber} '
f'under {machineType}. Moving on...\n')
errorLog.write(m)
errorLog.close()
Second code block:
if file.endswith(".ppt"):
os.startfile(machineFolderDir + machineType + '\\' +
partNumber + ' REV ' + revisionNumber + '\\' +
file, 'print')
errorLog = open(logBookDir + 'log.txt', 'a+')
errorLog.write('\nA setup sheet called PROG' + programNumber +
' ' + partNumber + ' ' + revisionNumber +
'.ppt was printed.\n')
errorLog.close()
else:
errorLog = open(logBookDir + 'log.txt', 'a+')
m = ('The exception occurred in printDecoSetupSheet().There does not appear '
f'to be a .ppt setup sheet file in folder {partNumber} {revisionNumber} '
f'under {machineType}. Moving on...\n')
errorLog.write(m)
errorLog.close()
The traceback is:
Exception has occurred: NameError name 'file' is not defined
File "C:\Users\MacalusoC\Desktop\Technical Docs\TLC_Program_Release\Scripts\Program_Release_v4.py", line 348, in printDecoSetupSheet
file, 'print')
File "C:\Users\MacalusoC\Desktop\Technical Docs\TLC_Program_Release\Scripts\Program_Release_v4.py", line 835, in main
printDecoSetupSheet(scanDestinationPath)
File "C:\Users\MacalusoC\Desktop\Technical Docs\TLC_Program_Release\Scripts\Program_Release_v4.py", line 869, in <module>
main()

Python script to run FME workbench

I have more than 500 xml files and each xml file should processed on FME workbench individually (iteration of FME workbench for each xml file).
For such a propose i have to run a python file (loop.py) to iterate FME workbench for each xml file.
The whole process was working in past on other PC without any problem. Now Once i run Module i got the following error:
Traceback (most recent call last):E:\XML_Data
File "E:\XML_Data\process\01_XML_Tile_1.py", line 28, in
if "Translation was SUCCESSFUL" in open(path_log + "\" + data + ".log").read():
IOError: [Errno 2] No such file or directory: 'E:\XML_Data\data_out\log_01\re_3385-5275.xml.log'
Attached the python code(loop.py).
Any help is greatly appreciated.
import os
import time
# Mainpath and Working Folder:
#path_main = r"E:\XML_Data"
path_main = r"E:\XML_Data"
teil = str("01")
# variables
path_in = path_main + r"\data_in\03_Places\teil_" + teil # "Source folder of XML files"
path_in_tile10 = path_main + r"\data_in\01_Tiling\10x10.shp" # "Source folder of Grid shapefile"
path_in_commu = path_main + r"\data_in\02_Communities\Communities.shp" # "Source folder of Communities shapefile"
path_out = path_main + r"\data_out\teil_" + teil # "Output folder of shapefiles that resulted from XML files (tile_01 folder)"
path_log = path_main + r"\data_out\log_" + teil # "Output folder of log files for each run(log_01 folder)"
path_fme = r"%FME_EXE_2015%" # "C:\Program Files\FME2015\fme.exe"
path_fme_workbench = path_main + r"\process\PY_FME2015.fmw" # "path of FME workbench"
datalists = os.listdir(path_in)
count = 0
# loop each file individually in FME
for data in datalists:
if data.find(".xml") != -1:
count +=1
print ("Run-No." + str(count) + ": with data " + data)
os.system (path_fme + " " + path_fme_workbench + " " + "--SourceDataset_XML"+ " " + path_in + "\\" + data + " " + "--SourceDataset_SHAPE" + " " + path_in_tile10 + " " + "--SourceDataset_SHAPE_COMU" + " " + path_in_commu + " " + "--DestDataset_SHAPE" +" " +path_out + " " +"LOG_FILENAME" + " " + path_log + "\\" + data + ".log" )
print ("Data processed: " + data)
shape = str(data[19:28]) + "_POPINT_CENTR_UTM32N.shp"
print ("ResultsFileName: " + shape)
if "Translation was SUCCESSFUL" in open(path_log + "\\" + data + ".log").read():
# Translation was successful and SHP file exists:
if os.path.isfile(path_out + "\\" + shape):
write_log = open(path_out + "\\" + "result_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " " + shape + "\n")
write_log.close()
print("Everything ok")
#Translation was successful, but SHP file does not exist:
else:
write_log = open(path_out + "\\" + "error_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " Data: " + shape + " unavailable.\n")
write_log.close()
# Translation was not successful:
else:
write_log = open(path_out + "\\" + "error_xml.log", "a")
write_log.write(time.asctime(time.localtime()) + " Translation " + Data + " not successful.\n")
write_log.close()
print ("Number of calculated files: " + str(count))

Most likely, the script failed at the os.system line, so the log file was not created from the command. Since you mentioned a different computer, it could be caused by many reasons, such as a different version of FME (so the environment variable %FME_EXE_2015% would not exist).

Use a workspace runner transformer to do this.

The FME version is outdated.so first check the version whether it is creating the problem.

subprocess.call(["C:/Program Files/fme/FMEStarter/FMEStarter.exe", "C:/Program Files/fme/fme20238/fme.exe", "/fmefile.fmw" "LOG_FILENAME","logfile"], stdin=None, stdout=None, stderr=None, shell=True, timeout=None)

Python 3: urlextract package, PermissionError

I am using Windows 10 x64, with Python 3.6.1 x86.
I have this script from a few months ago which was working fine, but right now it gives me a weird error. The script is a simple one that extract URLs from tweets saved in .csv files.
This is the script:
import datetime
from urlextract import URLExtract
twitter_files_list = ['File1.csv', 'File2.csv', 'File3.csv']
input_path = my_path
# Find domain of URL
def find_domain(url):
return url.split("//")[-1].split("/")[0]
# Clean domain from useless chars
def clean_domain(domain):
domain = domain.replace("[", "")
domain = domain.replace("]", "")
domain = domain.replace("\'", "")
return domain
# Extract URLs from Tweets
def url_extract(filename):
print('\n' + filename + ':')
url_counter = 0
url_file = open('extracted_urls/urls_' + filename, 'a')
# Open file
f = open(input_path + filename, "r", encoding="utf8")
lines = f.readlines()
# Search for contents of column "text"
text = []
for x in lines:
text.append(x.split('\t')[4])
# Close file
f.close()
extractor = URLExtract()
for i in range(len(text)):
try:
if extractor.find_urls(text[i]): # Check if URL exists
url = extractor.find_urls(text[i])
domain = find_domain(str(url))
if not " " in domain:
url_file.write(str(clean_domain(domain)) + "\n")
url_counter += 1
except 'Not Found':
continue
url_file.close()
# Main
if __name__ == '__main__':
print('\nURL Characterization:\n')
# Start timer
start = datetime.datetime.now()
# Find the unique usernames for every file
for twitter_file in twitter_files_list:
print('Searching ' + str(twitter_file) + '...')
url_extract(twitter_file)
# End timer
end = datetime.datetime.now()
# Print results
print("\nProcess finished")
print("Total time: " + str(end - start))
This gives me the following error:
Traceback (most recent call last):
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 77, in <module>
url_extract(twitter_file)
File "C:/Users/Aventinus/url_analysis/url_extractor.py", line 50, in url_extract
extractor = URLExtract()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\urlextract.py", line 65, in __init__
if not self._download_tlds_list():
File "C:\Program Files (x86)\Python36-32\lib\site-packages\urlextract.py", line 114, in _download_tlds_list
with open(self._tld_list_path, 'w') as ftld:
PermissionError: [Errno 13] Permission denied: 'C:\\Program Files (x86)\\Python36-32\\lib\\site-packages\\.tlds'
I have no idea how to interpret this.

you can try run the script as administrator

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Getting data from the url using python and unzipped - python

Related

I recieve an "OSError: [Errno 22] Invalid argument" when I'm trying to change a time format

How to show python execution progress

what is proper syntax for this if statement?

Python script to run FME workbench

Python 3: urlextract package, PermissionError

Categories

Resources