Using Argparse to create file converter in Python - python

I have to use the command prompt and python to recieve an input in the form of a csv file, then read it and convert it into a xml file with the same name as the csv file except with .xml file extension or the user can set the ouput file name and path using the -o --output optional command line argument. Well i have searched on google for days, and so far my program allows me to input command line arguments and i can convert the csv to an xml file but it doesn't print it using the same name as the csv file or when the user sets the name. Instead it just prints out a blank file. Here is my code:
import sys, argparse
import csv
import indent
from xml.etree.ElementTree import ElementTree, Element, SubElement, Comment, tostring
parser=argparse.ArgumentParser(description='Convert wordlist text files to various formats.', prog='Text Converter')
parser.add_argument('-v','--verbose',action='store_true',dest='verbose',help='Increases messages being printed to stdout')
parser.add_argument('-c','--csv',action='store_true',dest='readcsv',help='Reads CSV file and converts to XML file with same name')
parser.add_argument('-x','--xml',action='store_true',dest='toxml',help='Convert CSV to XML with different name')
parser.add_argument('-i','--inputfile',type=argparse.FileType('r'),dest='inputfile',help='Name of file to be imported',required=True)
parser.add_argument('-o','--outputfile',type=argparse.FileType('w'),dest='outputfile',help='Output file name')
args = parser.parse_args()
def main(argv):
reader = read_csv()
if args.verbose:
print ('Verbose Selected')
if args.toxml:
if args.verbose:
print ('Convert to XML Selected')
generate_xml(reader)
if args.readcsv:
if args.verbose:
print ('Reading CSV file')
read_csv()
if not (args.toxml or args.readcsv):
parser.error('No action requested')
return 1
def read_csv():
with open ('1250_12.csv', 'r') as data:
return list(csv.reader(data))
def generate_xml(reader):
root = Element('Solution')
root.set('version','1.0')
tree = ElementTree(root)
head = SubElement(root, 'DrillHoles')
head.set('total_holes', '238')
description = SubElement(head,'description')
current_group = None
i = 0
for row in reader:
if i > 0:
x1,y1,z1,x2,y2,z2,cost = row
if current_group is None or i != current_group.text:
current_group = SubElement(description, 'hole',{'hole_id':"%s"%i})
collar = SubElement (current_group, 'collar',{'':', '.join((x1,y1,z1))}),
toe = SubElement (current_group, 'toe',{'':', '.join((x2,y2,z2))})
cost = SubElement(current_group, 'cost',{'':cost})
i+=1
indent.indent(root)
tree.write(open('hole.xml','w'))
if (__name__ == "__main__"):
sys.exit(main(sys.argv))
for the generate_xml() function, you can ignore it since it accepts csv files formatted a certain way so you might not understand it but, i think the problem lies in tree.write() since that part generates the xml file with a name that is written in the code itself and not the arguments at the command prompt.

You need to pass a file argument to generate_xml(). You appear to have the output file in args.outputfile.
generate_xml(reader, args.outputfile)
...
def generate_xml(reader, outfile):
...
tree.write(outfile)
You should probably also make use of args.inputfile:
reader = read_csv(args.inputfile)
...
def read_csv(inputfile):
return list(csv.reader(inputfile))
And this line does not do anything useful, it processes the .csv file, but doesn't do anything with the results:
read_csv()

The following code has been adapted from FB36's recipie on code.activestate.com
It will do what you need and you don't have to worry about the headers in the csv file, though there should only be one header (the first row) in the csv file. Have a look at the bottom of this page if you want to do batch conversion.
'''Convert csv to xml file
csv2xml.py takes two arguments:
1. csvFile: name of the csv file (may need to specify path to file)
2. xmlFile: name of the desired xml file (path to destination can be specified)
If only the csv file is provided, its name is used for the xml file.
Command line usage:
example1: python csv2xml.py 'fileName.csv' 'desiredName.xml'
example2: python csv2xml.py '/Documents/fileName.csv' '/NewFolder/desiredName.xml'
example3: python csv2xml.py 'fileName.csv'
This code has been adapted from: http://code.activestate.com/recipes/577423/
'''
import csv
def converter(csvFile, xmlFile):
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
# there must be only one top-level tag
xmlData.write('<csv_data>' + "\n")
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write('<row>' + "\n")
for i in range(len(tags)):
xmlData.write(' ' + '<' + tags[i] + '>' \
+ row[i] + '</' + tags[i] + '>' + "\n")
xmlData.write('</row>' + "\n")
rowNum +=1
xmlData.write('</csv_data>' + "\n")
xmlData.close()
## for using csv2xml.py from the command line
if __name__ == '__main__':
import sys
if len(sys.argv)==2:
import os
csvFile = sys.argv[1]
xmlFile = os.path.splitext(csvFile)[0] + '.xml'
converter(csvFile,xmlFile)
elif len(sys.argv)==3:
csvFile = sys.argv[1]
xmlFile = sys.argv[2]
converter(csvFile,xmlFile)
else:
print __doc__

Related

How to edit specific line for all text files in a folder by python?

Here below is my code about how to edit text file.
Since python can't just edit a line and save it at the same time,
I save the previous text file's content into a list first then write it out.
For example,if there are two text files called sample1.txt and sample2.txt in the same folder.
Sample1.txt
A for apple.
Second line.
Third line.
Sample2.txt
First line.
An apple a day.
Third line.
Execute python
import glob
import os
#search all text files which are in the same folder with python script
path = os.path.dirname(os.path.abspath(__file__))
txtlist = glob.glob(path + '\*.txt')
for file in txtlist:
fp1 = open(file, 'r+')
strings = [] #create a list to store the content
for line in fp1:
if 'apple' in line:
strings.append('banana\n') #change the content and store into list
else:
strings.append(line) #store the contents did not be changed
fp2 = open (file, 'w+') # rewrite the original text files
for line in strings:
fp2.write(line)
fp1.close()
fp2.close()
Sample1.txt
banana
Second line.
Third line.
Sample2.txt
First line.
banana
Third line.
That's how I edit specific line for text file.
My question is : Is there any method can do the same thing?
Like using the other functions or using the other data type rather than list.
Thank you everyone.
Simplify it to this:
with open(fname) as f:
content = f.readlines()
content = ['banana' if line.find('apple') != -1 else line for line in content]
and then write value of content to file back.
Instead of putting all the lines in a list and writing it, you can read it into memory, replace, and write it using same file.
def replace_word(filename):
with open(filename, 'r') as file:
data = file.read()
data = data.replace('word1', 'word2')
with open(filename, 'w') as file:
file.write(data)
Then you can loop through all of your files and apply this function
The built-in fileinput module makes this quite simple:
import fileinput
import glob
with fileinput.input(files=glob.glob('*.txt'), inplace=True) as files:
for line in files:
if 'apple' in line:
print('banana')
else:
print(line, end='')
fileinput redirects print into the active file.
import glob
import os
def replace_line(file_path, replace_table: dict) -> None:
list_lines = []
need_rewrite = False
with open(file_path, 'r') as f:
for line in f:
flag_rewrite = False
for key, new_val in replace_table.items():
if key in line:
list_lines.append(new_val+'\n')
flag_rewrite = True
need_rewrite = True
break # only replace first find the words.
if not flag_rewrite:
list_lines.append(line)
if not need_rewrite:
return
with open(file_path, 'w') as f:
[f.write(line) for line in list_lines]
if __name__ == '__main__':
work_dir = os.path.dirname(os.path.abspath(__file__))
txt_list = glob.glob(work_dir + '/*.txt')
replace_dict = dict(apple='banana', orange='grape')
for txt_path in txt_list:
replace_line(txt_path, replace_dict)

IOError: [Errno 22] invalid mode ('w') or filename

I am getting this error thrown when trying to make a file. It is being designed to take a created .csv file and put it into a plain text file.
I would like it to create a new file after it has been run with the date and time stamp but I seem to get the Errno 22 when trying to generate the file.
Any ideas?
import csv
import time
f = open(raw_input('Enter file name: '),"r")
saveFile = open ('Bursarcodes_'+time.strftime("%x")+ '_'+time.strftime("%X")+
'.txt', 'w+')
csv_f = csv.reader(f)
for row in csv_f:
saveFile.write( 'insert into bursarcode_lookup(bursarcode, note_id)' +
' values (\'' + row[0] + '\', ' + row[1] + ')\n')
f.close()
saveFile.close()
You cannot have slashes (/) and colons (:, but allowed in Unix) in your file name, but they are exactly what strftime generates in its output.
Python tries to help you, it says:
No such file or directory: 'Bursarcodes_01/09/15_19:59:24.txt'
Replace time.strftime("%x") with this:
time.strftime("%x").replace('/', '.')
...and time.strftime("%X") with this:
time.strftime("%X").replace(':', '_')
A cleaned-up and extended version:
import csv
import sys
import time
def make_output_fname():
# Thanks to #Andrew:
return time.strftime("Bursarcodes_%x_%X.txt").replace("/", "-").replace(":", "-")
def main(csv_fname=None, outfname=None, *args):
if not csv_fname:
# first arg not given - prompt for filename
csv_fname = raw_input("Enter .csv file name: ")
if not outfname:
# second arg not given - use serialized filename
outfname = make_output_fname()
with open(csv_fname) as inf, open(outfname, "w") as outf:
incsv = csv.reader(inf)
for row in incsv:
outf.write(
"insert into bursarcode_lookup(bursarcode, note_id) values ('{0}', '{1}')\n"
.format(*row)
)
if __name__=="__main__":
# pass any command-line arguments to main()
main(*sys.argv[1:])
You can now run it from the command-line as well.
Note that if any data items in your csv file contain unescaped single-quotes (') you will get invalid sql.

How to replace command line arguments sys.argv by stdin stdout?

I realize, that my question is a very simple one, but I can't find any explicit example of the implementation of the stdin stdout into a Python script.
I have a script, working perfectly well with command line arguments:
newlist = []
def f1()
....
def f2(input_file):
vol_id = sys.argv[3]
for line in input_file:
if ... :
line = line.replace('abc','def')
line = line.replace('id', 'id'+vol_id)
....
newlist.append(line)
return newlist
def main():
if len(sys.argv) < 4:
print 'usage: ./myscript.py [file_in... file_out... volume_id]'
sys.exit(1)
else:
filename = sys.argv[1]
filename_out = sys.argv[2]
tree = etree.parse(filename)
extract(tree)
input_file = open(filename, 'rU')
change_class(input_file)
file_new = open(filename_out, 'w')
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
Now I should somehow use stdin and stdout instead of my arguments in order to make my script usable within pipelines, like for example using multiple files as input:
cat input1 input1 input3 | myscript.py
Or to process its output with some UNIX tools before printing it to a file.
I tried to replace arguments in my script by sys.stdin:
filename = sys.stdin
filename_out = sys.stdout
Then I ran my script like this:
./myscript.py < inputfile > outputfile
It resulted in an empty outputfile, but didn't yeld any error messages at all.
Could you please help me with this replacement?
P.S. Then I modified my main() like this:
filename = sys.argv[1]
filename_out = sys.argv[2]
if filename == '-':
filename = sys.stdin
else:
input_file = open(filename, 'rU')
if filename_out == '-':
filename_out = sys.stdout
file_new = filename_out
else:
file_new = open(filename_out, 'w')
tree = etree.parse(filename)
extract(tree)
input_file = filename
change_class(input_file)
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
I tried to run it from the command line like this:
./myscript.py - - volumeid < filein > fileout
But I still got an empty output file :(
The common placeholder for stdin or stdout is -:
./myscript.py - - volumeid
and:
if filename == '-':
input_file = sys.stdin
else:
input_file = open(filename, 'rU')
etc.
In addition, you could default filename and filename_out to - when there are fewer than 3 command line arguments. You should consider using a dedicated command-line argument parser such as argparse, which can handle these cases for you, including defaulting to stdin and stdout, and using -.
As a side note, I'd not use print to write to a file; I'd just use:
file_new.write(x)
which removes the need to strip off the newlines as well.
You appear to read from the input file twice; once to parse the XML tree, once again to call change_class() with the open file object. What are you trying to do there? You'll have problems replicating that with sys.stdin as you cannot re-read the data from a stream the way you can from a file on disk.
You'd have to read all the data into memory first, then parse the XML from it, then read it it again for change_class(). It'd be better if you used the parsed XML tree for this instead, if possible (e.g. read the file only once, then use the parsed structure from there on out).

Python extracting first matching instance from multiple files

I have written a code which opens multiple files in a directory and prints only the first instance of match of required text from each file as output.
Now I want this output in a file. Doing it simply by putting print >> file.txt,... or .write or csv.write inside loop will not serve the purpose.
My code is:
import re, os, csv, sys
path = "D:\\"
in_files = os.listdir(path)
moldesc = ['Match1', 'Match2']
for f in in_files:
file = os.path.join(path, f)
text = open(file, "r")
for line in text:
if moldesc[0] in line:
Text1 = line.split()[-1]
if moldesc[1] in line:
Text2 = line.split()[-1]
print f, Text1, Text2 # I WANT THIS OUTPUT IN A FILE
break
text.close()
print "We are extraction done !!!"
you managed to open a file for reading, that's just one step away from opening a file for writing.
out = open(outfile, "w")
for f in in_files:
...
output_string = "{},{},{}\n".format(f, HOMO, LUMO)
out.write(output_string)

find and replace content between two words in POM

Need a bash script or python script to find and replace text between two tags?
E.g:
<start>text to find and replace with the one I give as input<end>
' text to find and replace with the one I give as input' is just an example and it could vary every time.
I want to do something like ./changetxt inputfile.xxx newtext
where changetxt has the script;
inputfile.xxx has the text that needs a change and newtext is what goes into inputfile.xxx
python:
import sys
if __name__ == "__main__":
#ajust these to your need
starttag = "<foo>"
endtag = "</foo>"
inputfilename = sys.argv[1]
outputfilename = inputfilename + ".out"
replacestr = sys.argv[2]
#open the inputfile from the first argument
inputfile = open(inputfilename, 'r')
#open an outputfile to put the result in
outputfile = open(outputfilename, 'w')
#test every line in the file for the starttag
for line in inputfile:
if starttag in line and endtag in line:
#compose a new line with the replaced string
newline = line[:line.find(starttag) + len(starttag)] + replacestr + line[line.find(endtag):]
#and write the new line to the outputfile
outputfile.write(newline)
else:
outputfile.write(line)
outputfile.close()
inputfile.close()
Save this in a replacetext.py file and run as python replacetext.py \path\to\inputfile "I want this text between the tags"
You could also use BeautifulSoup for this, from their docs:
If you set a tag’s .string attribute, the tag’s contents are replaced
with the string you give:
markup = 'I linked to <i>example.com</i>'
soup = BeautifulSoup(markup)
tag = soup.a
tag.string = "New link text."
tag
# New link text.

Categories

Resources