I am trying to write my output into a file what my code is doing is that its looking for matched file names and storing it into a file similary for unmatched files but the problem is when i use write it overwrites the file and when i use append on every run it keeps on appending the file matched file names. What i need is that it refresh te file whenever the script is run and loads it with current data only.
import re
import sys
import os
import glob
import pandas as pd
import logging
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
#matched=file
print(f'File matched:{file}')
fp=open('bad_lines.txt', 'r+')
sys.stdout = fp
else:
path=f'File not matched:{file}'
f=open('filenotmatched.txt','a')
f.seek(0)
f.truncate()
f.write(path+'\n')
f.close()
except Exception as e:
pass
Suggested changes to your code.
import re
import sys
import os
import glob
import pandas as pd
import logging
# We create new 'bad_lines.txt' and
# 'filenotmatched.txt' for each run
with open('bad_lines.txt', 'w') as f_badlines, open('filenotmatched.txt','w') as f_notmatched:
try:
for file in glob.glob('*.csv'):
r = re.search(r'abc_sales_(20[0-9][0-9])-([1-9]|1[0-2]|0[0-9])-([1-9]|1[0-9]|2[0-9]|3[0-1]|0[0-9])-[0-9]{2}_[a-z0-9]{3,5}.csv', file)
if r:
#matched=file
#print(f'File matched:{file}')
#fp=open('bad_lines.txt', 'r+')
# ** Not clear why you redirected
# ** standard out to a file
# ** rather than writing to file directly
#sys.stdout = fp
f_badlines.write(f'File matched:{file}\n')
else:
path=f'File not matched:{file}'
#f=open('filenotmatched.txt','a')
#f.seek(0)
#f.truncate()
#f.write(path+'\n')
#f.close()
f_notmatched.write(path + '\n')
except Exception as e:
pass
Related
I was trying to make a script that gets a .txt from a websites, pastes the code into a python executable temp file but its not working. Here is the code:
from urllib.request import urlopen as urlopen
import os
import subprocess
import os
import tempfile
filename = urlopen("https://randomsiteeeee.000webhostapp.com/script.txt")
temp = open(filename)
temp.close()
# Clean up the temporary file yourself
os.remove(filename)
temp = tempfile.TemporaryFile()
temp.close()
If you know a fix to this please let me know. The error is :
File "test.py", line 9, in <module>
temp = open(filename)
TypeError: expected str, bytes or os.PathLike object, not HTTPResponse
I tried everything such as a request to the url and pasting it but didnt work as well. I tried the code that i pasted here and didnt work as well.
And as i said, i was expecting it getting the code from the .txt from the website, and making it a temp executable python script
you are missing a read:
from urllib.request import urlopen as urlopen
import os
import subprocess
import os
import tempfile
filename = urlopen("https://randomsiteeeee.000webhostapp.com/script.txt").read() # <-- here
temp = open(filename)
temp.close()
# Clean up the temporary file yourself
os.remove(filename)
temp = tempfile.TemporaryFile()
temp.close()
But if the script.txt contains the script and not the filename, you need to create a temporary file and write the content:
from urllib.request import urlopen as urlopen
import os
import subprocess
import os
import tempfile
content = urlopen("https://randomsiteeeee.000webhostapp.com/script.txt").read() #
with tempfile.TemporaryFile() as fp:
name = fp.name
fp.write(content)
If you want to execute the code you fetch from the url, you may also use exec or eval instead of writing a new script file.
eval and exec are EVIL, they should only be used if you 100% trust the input and there is no other way!
EDIT: How do i use exec?
Using exec, you could do something like this (also, I use requests instead of urllib here. If you prefer urllib, you can do this too):
import requests
exec(requests.get("https://randomsiteeeee.000webhostapp.com/script.txt").text)
Your trying to open a file that is named "the content of a website".
filename = "path/to/my/output/file.txt"
httpresponse = urlopen("https://randomsiteeeee.000webhostapp.com/script.txt").read()
temp = open(filename)
temp.write(httpresponse)
temp.close()
Is probably more like what you are intending
I was able to run the regex on multiple files, I want to save the output of this like name_of_file_clean.txt.
Trying to find the best way.
import os, re
import glob
pattern = re.compile(r'(?<=CN=)(.*?)(?=,)')
for file in glob.glob('*.txt'):
with open(file) as fp:
for result in pattern.findall(fp.read()):
print(result)
We'll just open the output file and use the print functions file keyword argument to write to the file
import os, re
import glob
pattern = re.compile(r'(?<=CN=)(.*?)(?=,)')
for file in glob.glob('*.txt'):
with open(file) as fp:
with open(file[:-4] + '_clean.txt', 'w') as outfile:
for result in pattern.findall(fp.read()):
print(result, file=outfile)
I've been trying to make my python code to fill a form in word with data that i scraped off the Internet. I wrote the data in a txt file and are now trying to fill the word file with this code:
import zipfile
import os
import tempfile
import shutil
import codecs
def getXml(docxFilename,ReplaceText):
zip = zipfile.ZipFile(open(docxFilename,"rb"))
xmlString= zip.read("word/document.xml")
for key in ReplaceText.keys():
xmlString = xmlString.replace(str(key), str(ReplaceText.get(key)))
return xmlString
def createNewDocx(originalDocx,xmlString,newFilename):
tmpDir = tempfile.mkdtemp()
zip = zipfile.ZipFile(open(originalDocx,"rb"))
zip.extractall(tmpDir)
#3tmpDir=tmpDir.decode("utf-8")
with open(os.path.join(tmpDir,"word/document.xml"),"w") as f:
f.write(xmlString)
filenames = zip.namelist()
zipCopyFilename = newFilename
with zipfile.ZipFile(zipCopyFilename,"w") as docx:
for filename in filenames:
docx.write(os.path.join(tmpDir,filename),filename)
shutil.rmtree(tmpDir)
f=open('test.txt', 'r',)
text=f.read().split("\n")
print text[1]
Pavarde = text[1]
Replace = {"PAVARDE1":Pavarde}
createNewDocx("test.docx",getXml("test.docx",Replace),"test2.docx")
The file is created but I cant open it.
I get the following error:
Illegal xlm character
My guess would be that theres something with the encoding but I cant find a solution.
Using Python 3 on a windows machine:
I have a function to take a list of lists and open it as a csv file using my default application (excel). Despite closing the file after writing, I get a 'locked for editing' message when excel starts.
def opencsv(data):
"""saves a list of lists as a csv and opens"""
import tempfile
import os
import csv
handle, fn = tempfile.mkstemp(suffix='.csv')
with open(fn,"w", encoding='utf8',errors='surrogateescape',\
newline='') as f:
writer=csv.writer(f)
for row in data:
try:
writer.writerow(row)
except Exception as e:
print ('Error in writing row:',e)
f.close()
url = 'file://' + fn.replace(os.path.sep, '/')
os.startfile(fn)
opencsv([['d1','d2'],['d3','d4','d5']])
How can I fix this?
Answer from swstephe's input:
The issue is that mkstemp opens the file and associates it with an os handle. In my original code I was not closing this file properly. See below for updated code.
def opencsv(data):
"""saves a list of lists as a csv and opens"""
import tempfile
import os
import csv
handle, fn = tempfile.mkstemp(suffix='.csv')
with os.fdopen(handle,"w", encoding='utf8',errors='surrogateescape',\
newline='') as f:
writer=csv.writer(f)
for row in data:
try:
writer.writerow(row)
except Exception as e:
print ('Error in writing row:',e)
print (fn)
os.startfile(fn)
here is my code, i added exception to indexerror, but its not writing to csv file.
import urllib2
import csv
import time
import requests
import os
#a = open(r"C:\Drive F data\Client\Blake\III\2.txt")
a = ['http://www.houzz.com/trk/aHR0cDovL3d3dy5SSUtCLmNvbQ/b020157b98711b4a190eee3331eb0066/ue/MjA5ODg2MQ/1b9b00b9fdc9f270f14688046ef161e2',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5nc2NhYmluZXRyeS5jb20/0323b7db059b9e0357d045685be21a6d/ue/NDY2MjE0/d8815293352eb2a6e40c95060c019697',
'http://www.houzz.com/trk/aHR0cDovL3NpY29yYS5jb20/dc807b3705b95b5da772a7aefe23a803/ue/Njc0NDA4/a73f8bdb38e10abd5899fb5c55ff3548',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5DYXNlRGVzaWduLmNvbQ/d79c6af934e3c815d602c4d79b0d6617/ue/OTY3MDg/ce9a87e31e84871a96bca7538aae9856',
'http://www.houzz.com/trk/aHR0cDovL2phcnJldHRkZXNpZ25sbGMuY29t/9d0009d3544d9c22f6058b20097321b3/ue/MzExNTk1NA/310d49732d317725364368ea3fbfd7c1',
'http://www.houzz.com/trk/aHR0cDovL3d3dy5yb2JlcnRsZWdlcmVkZXNpZ24uY29t/8ac7311be2f794654cefba71474563f7/ue/MTExNTQ4/af201ffdc62de6aba9e2de90f69a770d']
with open("C:\Drive F data\Blake\III/2.csv", "ab")as export:
names = ['source','link']
writer = csv.DictWriter(export, fieldnames=names)
writer.writeheader()
for each in a:
try:
link = urllib2.urlopen(each).geturl()
except IndexError:
pass
print each, link
writer.writerow({'source':each,'link':link})
After removing try & exception , it works fine
I think you miss escape character in your file path. "C:\\Drive F data\\Blake\\III\\2.csv"