Simple python script to read GPX files+optparse - python

I'm new to python and trying to write a simple script to extract particular information from a GPX file. The limiting problem in my script below is that it can't seem to find the dtime in the else statement, but can see it in the first if statement. Perhaps my newness to python is escaping me, as I'm pretty sure there is an easy solution for this. Can anyone tell me how to get it to see the variable dtime or what I'm doing wrong?
Furthermore is there a better way to have it use the flags. The -D and -P will never have input by the user but the intention was to have the GPX file be read when those two flags were defined.
Here's the script below:
#!/usr/bin/env pnpython3
import gpxpy.parser
import os
def get_args () :
''' Parse input args
-x gpx filename
-d default deploy time yyyy:jjj:hh:mm:ss.sss
-p default pickup time yyyy:jjj:hh:mm:ss.sss
-D read deploy time from GPX file
-P read pickup time from GPX file
-l line number (array)
'''
global GPX, DEPLOY, PICKUP, LINE
from optparse import OptionParser
oparser = OptionParser ()
oparser.usage = "munge_wp.py -x gps_file_name -d deploy_yyyy:jjj:hh:mm:ss.sss -ppickup_yyyy:jjj:hh:mm:ss.sss -l line_number"
oparser.description = "Read a GPX way point file and produce a CSV file."
oparser.add_option ("-x", "--gpx", dest = "gpx_file",help = "GPX input file",metavar = "gpx_file")
oparser.add_option ("-d", "--deploy", dest = "deploy_time",help="Deploy time yyyy:jjj:hh:mm:ss.sss",metavar = "deploy_time")
oparser.add_option ("-p", "--pickup", dest = "pickup_time",
help="Pickup time yyyy:jjj:hh:mm:ss.sss",
metavar = "pickup_time")
oparser.add_option ("-D", "--Deploy", dest = "D_time",help="D_time will be read from GPX file and put in the following format yyyy:jjj:hh:mm:ss.sss",metavar = "D_time")
oparser.add_option ("-P", dest = "P_time", help="P_time will be read from GPX file and put
in the following format yyyy:jjj:hh:mm:ss.sss",
metavar = "P_time")
oparser.add_option ("-l", "--line", dest = "line_number",help = "The line number. Caution: Assumes that all stations in GPX file are on same line",metavar = "line_number")
options, args = oparser.parse_args()
#print options.outfile
GPX = options.gpx_file
DEPLOY = options.deploy_time
PICKUP = options.pickup_time
LINE = options.line_number
DTIME = options.D_time
PTIME = options.P_time
def open_gpx () :
fh = open (GPX)
gpx_parser = gpxpy.parser.GPXParser (fh)
gpx_parser.parse ()
fh.close ()
return gpx_parser.get_gpx ()
if __name__ == '__main__' :
get_args ()
gpx = open_gpx ()
print "#STA\tDAS\tLAT\tLON\tELEV\tDEPLOY\tPICKUP\tLINE"
for wp in gpx.waypoints :
s=wp.name
staname = s.split('-')[0]
#print staname
das = s.split('-')[1]
#print das
t=wp.time
dtme=os.popen("date -d t +%Y:%j:%H:%M:%S")
dtime=dtme.read()
if "DEPLOY" in locals() or "PICKUP" in locals():
line = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format (staname,
das,
wp.latitude,
wp.longitude,
wp.elevation,
DEPLOY,
PICKUP,
LINE)
print line
else:
line = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format (staname,
das,
wp.latitude,
wp.longitude,
wp.elevation,
dtime,
dtime,
LINE)
print line

Related

Python - Issues with code blocking in loops

I'm new to python and having some issues with blocking. I have a script that I'm calling with options. I'm able to see the arguments come in, however, I have been unable to get the program to work correctly. In the code sample below, I'm trying to grab the arguments and then run the piece of code after the "#if ip address is not defined qpid-route will not work" comment. If I change the indentation after the comment, I get expected indentation or unexpected indentation errors.
The problem is that the way the code currently is it will run the elif opt in ("-i", "--ipaddress"): code and then will continue and run the code through to the bottom and then come back and run the -s loop code and then rerun the code to the bottom.
To fix this, I tried a break or continue command and all I get is indentation errors on this no matter which level I align it with. Can someone help me format this correctly such that I can pull the ipaddress and scac values that I'm grabbing from the arguments and then run the code after the "#if ip address is not defined qpid-route will not work" comment as a separate block.
import re
import os
import sys
import getopt
import pdb
ipaddress = ""
scac = ''
def main(argv):
#print argv
ipaddress = ""
scac = ''
pdb.set_trace()
try:
opts, args = getopt.getopt(argv,"hi:s:",["ipaddress=","scac="])
if not opts: # if no option given
print 'usage test.py -i <ipaddress> -s <scac>'
sys.exit(2)
except getopt.GetoptError:
print 'test.py -i <ipaddress> -s <scac>'
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print 'FedConnectStatus.py -i <iobipaddress> -s <scac>'
sys.exit() # it should be on level of if
elif opt in ("-i", "--ipaddress"):
ipaddress = arg
#break
#continue
elif opt in ("-s", "--scac"):
scac = arg
#if ip address is not defined qpid-route will not work
if not ipaddress:
print 'ip address needed'
else:
print(os.getcwd())
#If no scac is given grab every federated connection and report
if not scac:
# open file to read
f = file('qpid.txt', 'r')
nameList = []
statusList = []
#skip first 4 lines
for i in range(3): f.next() # skip first four lines
# iterate over the lines in the file
for line in f:
# split the line into a list of column values
columns = line.split(None, 5)
# clean any whitespace off the items
columns = [col.strip() for col in columns]
# ensure the column has at least one value before printing
if columns:
#print "Name", columns[0] # print the first column
#print "Status", columns[4] # print the last column
nameList.append(columns[0])
statusList.append(columns[4])
#print name
#print status
#else:
print nameList
print statusList
#if __name__ == "__main__":
main(sys.argv[1:])
This statement if not scac: on line 40 and below has indentation different to that of the rest of the code.
You'll see line 38 also doesn't match the indenting of the above if.

comparing input with a file and minding the sequence

How can I compare the values of input parmeters with a file in such a way that the sequence of the lines in file are "respected". For example:
file sequence.txt has following enteries
aaa
bbb
ccc
ddd
and the input is coming like this (with comas):
./run.py -c migrate -s ddd,bbb
then output is like this:
bbb
ddd
Here is the script I have worked so far
#!/usr/bin/python
import sys
import getopt
import time
import os
def main(argv):
cmd = ''
schemas = ''
script_dir = os.path.dirname(__file__)
seq_file = "system/sequence.txt"
abs_file_path = os.path.join(script_dir, seq_file)
try:
opts, args = getopt.getopt(argv,"h:c:s",["cmd=","schemas="])
except getopt.GetoptError:
print './run.py -c=<command> -s=<schemas> '
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print './run.py -c=<command> -s=<schemas>'
sys.exit()
elif opt in ("-c", "--cmd"):
cmd = arg
elif opt in ("-s", "--schemas"):
schemas = arg
if cmd == "migrate" :
with open(abs_file_path) as z:
for line in z:
print line.rstrip('\n')
if __name__ == "__main__":
main(sys.argv[1:])
I know that I have to do comparisons at position print line.rstrip('\n') but I can't figure out how to do it. Any suggestions?
Also, how can I make -s switch mandatory if -c has "migrate" value?
Thanks in advance.
You need to check whether the current line of the sequence is specified with the -s flag. So you need to modify the schemas value, so that it is a list that contains all schemas and then you can check if the current line is equal to one of the schemas. As for your second question, I'm not familiar with getopt, but you could simply check whether schemas is not empty when -c is migrate and do the approriate error handling.
[...]
schemas = []
[...]
elif opt in ("-s", "--schemas"):
schemas = arg.split(',')
[...]
if cmd == 'migrate':
if not schemas: # list is empty
# do error handling
for line in z:
if line in schemas:
print line

Copy parameters into list

I am trying to copy parameters passed into a python script to a file. Here is the parameters.
["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
I understand that there is a buffer problem and I am getting bad data into my parameters. However, I do not have control over what is being passed in. I am trying to copy, starting at the 5th parameter, the parameters into a file.
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x] + '\n')
f.close()
The result of the file is below:
P123-ND 10Q
10Q H??C
Here is what it should be:
P123-ND
10Q
How can I not include the bad data? What is happening to the spaces between the valid information and the bad information?
As requested, here is the full program:
#!/bin/python
class Argument_Indices:
PRINTER_INDEX = 0
AREA_INDEX = 1
LABEL_INDEX = 2
EMAIL_INDEX = 3
RUN_TYPE_INDEX = 4
import argparse
import json
import os
from subprocess import call
import sys
from time import strftime
def _handle_args():
''' Setup and run argpars '''
parser = argparse.ArgumentParser(description='Set environment variables for and to call Program')
parser.add_argument('time_to_run', default='NOW', choices=['NOW', 'EOP'], help='when to run the report')
parser.add_argument('arguments', nargs='+', help='the remaining command line arguments')
return parser.parse_args()
def _proces_program(arg_list):
time_stamp = strftime("%d_%b_%Y_%H_%M_%S")
printer = arg_list[Argument_Indices.PRINTER_INDEX]
area = arg_list[Argument_Indices.AREA_INDEX]
label = arg_list[Argument_Indices.LABEL_INDEX]
in_file_name = "/tmp/program{0}.inp".format(time_stamp)
os.environ['INPUT_FILE'] = in_file_name
f = open(in_file_name, 'w')
for x in range(5, len(arg_list)):
f.write(arg_list[x])
f.close()
call(['./Program.bin', printer, area, label])
os.remove(in_file_name)
def main():
''' Main Function '''
arg_list = None
args = _handle_args()
if len(args.arguments) < 1:
print('Missing name of input file')
return -1
with open(args.arguments[0]) as input_file:
arg_list = json.load(input_file)
_process_program(arg_list)
return 0
if __name__ == '__main__':
if main() != 0:
print('Program run failed')
sys.exit()
For your exact case (where you're getting duplicated parameters received with some spaces in between) this would work:
received_param_list = ["0013","1","1","\"john.dow#gmail.com\"","1","P123-ND 10Q","10Q H??C"]
arg_list = [i.split(" ")[0] for i in received_param_list]
last_param = received_param_list[-1].split()[-1]
if last_param != arg_list[-1]:
arg_list.append(last_param)
for x in range(5, len(arg_list)):
print (arg_list[x])
Although there might be another simpler way

strange output when using flags in python

I'm currently writing a script in python that takes a number of flags. This is my first attempt at such a program, and I am getting an output from the bash script that I don't quite understand. For example when I run the script in the bash shell:
$ python my_script.py -f <input_file.txt> -k <test_string> -c <user_input>
I get this output before my script's output:
usage: rm [-f | -i] [-dPRrvW] file ...
unlink file
I can't seem to get rid of this, which is frustrating for the prettiness of the output. Any help would be great!
The code I'm using:
import sys, getopt, re, subprocess, collections, itertools
def find_kmers( arguments=sys.argv[1:] ):
required_opts = ['-f','-c','-k']
opts, args = getopt.getopt(arguments,'f:k:c:')
opt_dic = dict(opts)
for opt in required_opts:
if opt not in opt_dic:
return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"
def rev_comp(sequence):
reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
return ''.join(reversed_dic[_] for _ in sequence[::-1])
kmer = opt_dic['-k']
subprocess.call(['bash','-c',"grep '>' S288C_R64.fasta > grep.tmp"])
chromosomes = [_[1:].strip() for _ in open('grep.tmp')]
subprocess.call(['bash','-c','rm','grep.tmp'])
found = False
if any(opt_dic['-c']==_ for _ in chromosomes):
found = True
def get_sequence(file):
sequence = ''
for line in file:
if line.startswith('>'): break
sequence += line.strip()
return sequence.upper()
ofile = open(opt_dic['-f'])
if found == True:
for line in ofile:
if line.startswith('>'):
if line[1:].strip() == opt_dic['-c']:
sequence = get_sequence(ofile)
break
else:
return 'chromosome not found in %s. \n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))
kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))
def print_statement(start,strand):
return '%s\thw1_script\tkmer=%s\t%s\t%s\t.\t%s\t.\tID=S288C;Name=S288C\n'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)
pos_strand = collections.deque()
neg_strand = collections.deque()
for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
pos_strand.append(match1.start()+1)
neg_strand.append(match2.start()+1)
wfile = open('answer.gff3','w')
while len(pos_strand)>0 and len(neg_strand)>0:
if pos_strand[0]<neg_strand[0]:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
else:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
while len(pos_strand)>0:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
while len(neg_strand)>0:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
wfile.close()
return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))
if __name__ == '__main__':
print find_kmers()
Invoking bash one-liners requires that the bash commands be a single string. Change:
subprocess.call(['bash','-c','rm','grep.tmp'])
to:
subprocess.call(['bash', '-c', 'rm grep.tmp'])
Or, more reasonably, don't use subprocesses for this, just do:
os.unlink('grep.tmp') # Or os.remove; same thing, different names
which is much faster and less error prone.
In fact, all of your subprocess usage could be replaced with real Python code, and it would improve it substantially (and much of the Python code simplifies too):
def find_kmers( arguments=sys.argv[1:] ):
required_opts = ['-f','-c','-k']
opts, args = getopt.getopt(arguments,'f:k:c:')
opt_dic = dict(opts)
for opt in required_opts:
if opt not in opt_dic:
return "incorrect arguments, please format as: python_script.py -f <filename> -k <kmer> -c <chromosome_name>"
def rev_comp(sequence):
reversed_dic = {'A':'T','T':'A','C':'G','G':'C'}
return ''.join(reversed_dic[_] for _ in sequence[::-1])
kmer = opt_dic['-k']
# Replaces grep with temp file with trivial Python equivalent
with open('S288C_R64.fasta') as f:
chromosomes = [line[1:].strip() for line in f if '>' in line]
# No need for any loop when just checking for exact value
if opt_dic['-c'] not in chromosomes:
return 'chromosome not found in %s. \n chromosomes in file are:%s'%(opt_dic['-f'],', '.join(str(_) for _ in chromosomes))
def get_sequence(file):
sequence = ''
for line in file:
if line.startswith('>'): break
sequence += line.strip()
return sequence.upper()
with open(opt_dic['-f']) as ofile:
for line in ofile:
if line.startswith('>'):
if line[1:].strip() == opt_dic['-c']:
sequence = get_sequence(ofile)
break
kmer_matches1 = re.finditer('(?=%s)'%opt_dic['-k'],sequence)
kmer_matches2 = re.finditer('(?=%s)'%opt_dic['-k'],rev_comp(sequence))
def print_statement(start,strand):
return '%s\thw1_script\tkmer=%s\t%s\t%s\t.\t%s\t.\tID=S288C;Name=S288C\n'%(opt_dic['-c'],opt_dic['-k'],start,start+len(opt_dic['-k'])-1,strand)
pos_strand = collections.deque()
neg_strand = collections.deque()
for match1,match2 in itertools.izip(kmer_matches1,kmer_matches2):
pos_strand.append(match1.start()+1)
neg_strand.append(match2.start()+1)
with open('answer.gff3','w') as wfile:
while pos_strand and neg_strand:
if pos_strand[0]<neg_strand[0]:
start = pos_strand.popleft()
wfile.write(print_statement(start,'+'))
else:
start = neg_strand.popleft()
wfile.write(print_statement(start,'-'))
for start in pos_strand:
wfile.write(print_statement(start,'+'))
for start in neg_strand:
wfile.write(print_statement(start,'-'))
return 'percent-GC = %s'%str(sum(sequence.count(gc) for gc in ["G","C"])/float(len(sequence)))

Python - py.path.new unicode error

I'm writing a program that fetches metadata from FLAC fliles and batch renames them. To do so, I'm using the py library.
Here is my code:
#!/usr/bin/python
# -*- coding: utf-8 -*-
# This program takes the information from FLAC metadata to rename the files
# according to various naming paterns.
"""
rename-flac takes the information from FLAC metadata to batch rename
the files according to various naming paterns.
Usage:
rename-flac.py (-s | -v) <directory>
rename-flac.py (-h | --help)
rename-flac.py --version
Options:
-h --help Show the help screen
-- version Outputs version information and exits
-s Define album as single artist
-v Define album as various artist
"""
from docopt import docopt
import subprocess
import sys
import os
from py.path import local
# Dependency check
programlist = ["flac", "python-py", "python-docopt"]
for program in programlist:
pipe = subprocess.Popen(
["dpkg", "-l", program], stdout=subprocess.PIPE)
dependency, error = pipe.communicate()
if pipe.returncode:
print """
%s is not installed: this program won't run correctly.
To instal %s, run: aptitude install %s
""" % (program, program, program)
sys.exit()
else:
pass
# Defining the function that fetches metadata and formats it
def metadata(filename):
filename = str(filename).decode("utf-8")
pipe = subprocess.Popen(
["metaflac", "--show-tag=tracknumber", filename],
stdout=subprocess.PIPE)
tracknumber, error = pipe.communicate()
tracknumber = tracknumber.decode("utf-8")
tracknumber = tracknumber.replace("tracknumber=", "")
tracknumber = tracknumber.replace("TRACKNUMBER=", "")
tracknumber = tracknumber.rstrip() # Remove whitespaces
if int(tracknumber) < 10:
if "0" in tracknumber:
pass
else:
tracknumber = "0" + tracknumber
else:
pass
pipe = subprocess.Popen(
["metaflac", "--show-tag=title", filename],
stdout=subprocess.PIPE)
title, error = pipe.communicate()
title = title.decode("utf-8")
title = title.replace("TITLE=", "")
title = title.replace("title=", "")
title = title.rstrip()
pipe = subprocess.Popen(
["metaflac", "--show-tag=artist", filename],
stdout=subprocess.PIPE)
artist, error = pipe.communicate()
artist = artist.decode("utf-8")
artist = artist.replace("ARTIST=", "")
artist = artist.replace("artist=", "")
artist = artist.rstrip()
return tracknumber, title, artist
# Defining function that renames the files
def rename(root):
if output == str(filename.purebasename).decode("utf-8"):
print "%s is already named correctly\n" % (title)
else:
filename.rename(filename.new(purebasename=output))
# Importing command line arguments
args = docopt(__doc__, version="rename-flac 0.5")
for option, value in args.iteritems():
global root, choice
if option == "<directory>":
root = local(value)
elif option == "-s" and value == True:
choice = 1
elif option == "-v" and value == True:
choice = 2
else:
pass
# 1 - Single artist
# File naming partern: TRACKNUMBER - TITLE.flac
if choice == 1:
for filename in root.visit(fil="*.flac", rec=True):
tracknumber, title, artist = metadata(filename)
output = "%s - %s" % (tracknumber, title)
rename(root)
print "Files renamed"
else:
pass
# 2 - Various artists
# File naming pattern: TRACKNUMBER - ARTIST - TITLE.flac
if choice == 2:
for filename in root.visit(fil="*.flac", rec=True):
tracknumber, title, artist = metadata(filename)
output = "%s - %s - %s" % (tracknumber, artist, title)
rename(root)
print "Files renamed"
else:
pass
My code runs fine when filename has utf-8 characters, but when the path to filename has utf-8 characters it get this error message:
Traceback (most recent call last):
File "/media/Main/Programmes/Rename_FLAC/rename-flac.py", line 122, in <module>
rename(root)
File "/media/Main/Programmes/Rename_FLAC/rename-flac.py", line 97, in rename
filename.rename(filename.new(purebasename=output))
File "/usr/lib/python2.7/dist-packages/py/_path/local.py", line 273, in new
"%(dirname)s%(sep)s%(basename)s" % kw)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 28: ordinal not in range(128)
This may seem obvious to more experienced programmers, but I have been trying to resolve that error for a few hours now...
This was the problem:
filename.rename(filename.new(purebasename=output)) crashed because output was a unicode mess.
output was created by joining tracknumber, artist and title. In metadata(filename) I was reencoding them as unicode with string = string.decode("utf-8"), but they were already in unicode, resulting in output being mess-up and the crash.
So I deleted string = string.decode("utf-8") and it works.
yay.

Categories

Resources