Hot skip special character in txt file python - python

I have some issues while reading txt files. What i have to do is read files ( about 360 ) and make a plot. Everything works except when there is a special character in my file such us: "". When my reading function finds that character it crashes. Is there any way to skip it? My code:
import os
import matplotlib.pyplot as plt
import numpy as np
i = 10
j = 0
X = []
Y = []
Z = []
k = 0
A = np.zeros([360,719])
for i in range(10,360,10):
X = []
Y = []
if len(str(i)) == 2:
data = open(dir + '\\150317_ScPONd_0%s_radio.txt'%i, 'r')
else:
data = open(dir + '\\150317_ScPONd_%s_radio.txt'%i, 'r')
z = data.readlines()
data.close()
for line in z:
if not line.startswith('$'):
data_2 = line.split('\t')
X.append(data_2[0])
Y.append(data_2[1])
A[j,:] = X
A[(j+1),:] = Y
And here is how my file looks like:
Is there any way to skip those "$" lines? Sorry for that picture, I have no idea how to attache It better.

Thaks to #user1753919 I have found an answer. If someone would be still interested in this, here is working code:
for i in range(10,360,10):
X = []
Y = []
if len(str(i)) == 2:
data = np.genfromtxt(dir + '\\150317_ScPONd_0%s_radio.txt'%i,skip_header = 12)
else:
data = np.genfromtxt(dir + '\\150317_ScPONd_%s_radio.txt'%i,skip_header = 12)
for line in data:
X.append(line[0])
Y.append(line[1])
A[j,:] = X
A[(j+1),:] = Y
plt.plot(A[j,:],A[(j+1),:],label = '{} K'.format(i))
plt.hold
j = j+2

genfromtxt is overkill.
np.loadtxt(file, comments='$')

Related

Error to separate list of coordinates in Python

I have a file that contains 3 lists with pairs of coordinates. I would like to read the files and separate the first field as a name and the rest as coordinates. However, I don't know how to do this.
I am using the following code to read the txt file.
arquivo = open('dados_utm.txt', 'rt')
t = ' '
t1 = ' '
while t != '':
t = arquivo.readline()
t1 = t.split(' ')
print(t1)
Output:
['Poly', '"Pampulha"', '420545.,8039109.', '421826.,8039269.',
'424213.,8041682.', '424189.,8043000.', '424331.,8044861.',
'426457.,8047689.', '427082.,8047013.', '427713.,8044612.',
'427710.,8042703.', '428712.,8040642.', '428713.,8040196.',
'428790.,8039499.', '428356.,8038819.', '427844.,8039050.',
'426759.,8038697.', '426595.,8035314.', '427213.,8033950.',
'426558.,8030343.', '426113.,8030041.', '420041.,8030502.',
'419081.,8031438.', '419678.,8037604.', '420545.,8039109.\n']
['Poly',
'"Jacaré"', '425846.,8055763.', '424723.,8057841.',
'422398.,8058414.', '413568.,8058765.', '410307.,8060688.',
'403022.,8068114.', '402543.,8071067.', '403423.,8071846.',
'417134.,8073069.', '419408.,8074047.', '424638.,8068255.',
'429946.,8065755.', '430183.,8064351.', '433594.,8058696.',
'434290.,8058940.', '434296.,8057197.', '431016.,8051616.',
'430041.,8051612.', '428278.,8051122.\n']
['Poly', '"Patos"',
'437525.,7991091.', '439184.,7993615.', '435440.,8005422.',
'437290.,8006397.', '443981.,8000217.', '445662.,7995572.',
'448275.,7988217.', '446432.,7984918.', '438654.,7985476.',
'437525.,7991091.'] ['']
The second step is to separate the x and y coordinates for different variables. For this I am using the following code.
for i in t1[1,0]:
x = []
y = []
xy = t1.readline()
xy = xy.split(',')
x.append(float(xy[0]))
y.append(float(xy[1]))
print(x, y)
However I have the following error:
TypeError: list indices must be integers or slices, not tuple
txt file:
Poly "Pampulha" 420545.,8039109. 421826.,8039269. 424213.,8041682.
424189.,8043000. 424331.,8044861. 426457.,8047689. 427082.,8047013. 427713.,8044612. 427710.,8042703. 428712.,8040642. 428713.,8040196. 428790.,8039499. 428356.,8038819. 427844.,8039050. 426759.,8038697. 426595.,8035314. 427213.,8033950. 426558.,8030343. 426113.,8030041. 420041.,8030502. 419081.,8031438. 419678.,8037604. 420545.,8039109.
Poly "Jacaré" 425846.,8055763. 424723.,8057841. 422398.,8058414.
413568.,8058765. 410307.,8060688. 403022.,8068114. 402543.,8071067. 403423.,8071846. 417134.,8073069. 419408.,8074047. 424638.,8068255. 429946.,8065755. 430183.,8064351. 433594.,8058696. 434290.,8058940. 434296.,8057197. 431016.,8051616. 430041.,8051612. 428278.,8051122.
Poly "Patos" 437525.,7991091. 439184.,7993615. 435440.,8005422.
437290.,8006397. 443981.,8000217. 445662.,7995572. 448275.,7988217. 446432.,7984918. 438654.,7985476. 437525.,7991091.
what am I doing wrong?
You need more than one list because you're overwriting t1, you got that error from having [1, 0] after t1 in your for loop, t1 is a list so readline() won't work.
This should work and put coords as lists of tuples into dict t2 with the names as keys:
arquivo = open('dados_utm.txt', 'rt')
t = None
t1 = []
while t != '':
t = arquivo.readline()
t1.append(t.split(' '))
t2 = {}
for a in t1:
name = a.pop(0) + ' ' + a.pop(1)
t2[name] = []
for ele in a:
xy = ele.split(',')
x, y = float(xy[0]), float(xy[1])
t2[name].append((x, y))
print(t2)
You might want to think about pandas its a good library.
text = open('untitled.txt', 'rt').read()
lst = [item for item in text.split('\n') if item]
lst = [item.split(' ') for item in lst]
t2 = {}
for itr in lst:
name = ''.join(itr[0:2]).replace('"',' ')
t2[name] = {}
df = pd.DataFrame(map(lambda x: x.split(','),itr[2:]),columns=["X","Y"])
t2[name] = {
"X": df["X"].to_list(),
"Y": df["Y"].to_list()
}
print(t2)

How do I get this code to add to the salary of the employees as part of a list

Here is my code:
inputFile = open("Employees.txt", "r").read()
inputList = inputFile.split("\n")
fList = []
def listString(s):
string = ""
return (string.join(s))
for i in inputList:
for x in i.split(","):
fList.append(x)
for y in range (len(fList)):
**if fList[y] == "90000":
fList[y] = str(90000 * 1.05) + "\n"
elif fList[y] == "75000":
fList[y] = str(75000 * 1.05) + "\n"
elif fList[y] == "110000":
fList[y] = str(110000 * 1.05) + "\n"
else:
fList[y] = fList[y] + ","**
print(listString(fList))
file = open("Emp_Bonus.txt", "a")
file.write(listString(fList))
Employees.txt contains the following:
Adam Lee,Programmer,90000
Morris Heather,DA,75000
John Lee,PM,110000
I am trying to get the following output:
Adam Lee,Programmer,94500
Morris Heather,DA,78750
John Lee,PM,115500
The part of the code that is in bold is the problem, The input salaries need to be able to be different values instead of the code only working for the sample input. The input salaries have to be multiplied by 1.05. How should I go about doing this? Thanks!
Another way without any library. Just read lines of the file as a list using readlines() and then iterate each line. Only modify the last part after splitting it using split(',') e.g salary of each line and finally create the new file as per the requirements.
multiply, final_result = 1.05, []
with open('Employees.txt', 'r') as f:
fList = f.readlines()
if fList:
for line in fList:
employee_info = line.split(',')
name = employee_info[0]
designation = employee_info[2]
salary = float(employee_info[2].replace('\n','').strip()) * multiply
final_result.append(f"{name},{employee_info[1]},{salary}")
if final_result:
with open('Emp_Bonus.txt', 'w') as f:
f.write('\n'.join(final_result))
Output:
Adam Lee,Programmer,94500.0
Morris Heather,DA,78750.0
John Lee,PM,115500.0
I will like to use Pandas:
import pandas as pd
df = pd.read_csv("Employees.txt",header=None)
df[2] = df.loc[df[2].isin([90000,75000,110000]),2]*1.05
df[2] = df[2].astype(int)
df.to_csv("Emp_Bonus.txt",mode="a",header=None)

Pandas read_csv not reading the file (while-loop)

I am trying to read several (~30) csv-sheets i have stored on my PC.
i=2
Liste1 = []
Liste2 = []
x = 0
while i < 32:
string = str(i)
if i < 10:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\2020010'+string+'.csv'
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20])
Tabelle.columns = ['AC', 'DC']
neueTabelle1 = Tabelle['AC']
neueTabelle = Tabelle['DC']
Schleifenlaenge = len(neueTabelle)
j = 0
del(Tabelle)
while j < Schleifenlaenge:
Datenwert1 = neueTabelle.iloc[j]
Datenwert2 = neueTabelle1.iloc[j]
Liste1.append(Datenwert1)
Liste2.append(Datenwert2)
j = j + 1
except FileNotFoundError:
i=i+1
elif i >= 10 and i < 32:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\202001' + string + '.csv'
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20])
Tabelle.columns = ['AC', 'DC']
neueTabelle1 = Tabelle['AC']
neueTabelle = Tabelle['DC']
Schleifenlaenge = len(neueTabelle)
j = 0
while j < Schleifenlaenge:
Datenwert1 = neueTabelle1.iloc[j]
Datenwert2 = neueTabelle.iloc[j]
Liste1.append(Datenwert1)
Liste2.append(Datenwert2)
j = j + 1
except FileNotFoundError:
i = i+1
i = i + 1
When the while-loop is running for the first time everything works fine. When it comes into the first iteration, the read_csv does not read the file like it did before. I would expect to get a DataFrame where the 7th and 20th column is stored. But i do get a DataFrame with no content at all - just the header.
I tried a lot, but certainly i can´t fix it.
The issue was with how it was reading in the ';' in the other files (except the first one). If you open them in excel, you might be able to see what I'm talking about. So what you'll need to do is skip those rows at the beginning of the file.
import pandas as pd
Liste1 = []
Liste2 = []
for i in range(2,32):
skipRows = 7
if i != 2:
skipRows += 1
if i < 10:
try:
Name = 'D:\\FTPDaten\\2020\\Alle\\2020010{string}.csv'.format(string=i)
Tabelle = pd.read_csv(Name, sep=';', decimal=",", header=0, usecols=[7, 20], skiprows=skipRows)
Tabelle.columns = ['AC', 'DC']
if i < 10:
Datenwert1 = list(Tabelle['DC'])
Datenwert2 = list(Tabelle['AC'])
elif i >= 10 and i < 32:
Datenwert1 = list(Tabelle['AC'])
Datenwert2 = list(Tabelle['DC'])
Liste1 += Datenwert1
Liste2 += Datenwert2
except FileNotFoundError as e:
print(e)
df = pd.DataFrame({'col1':Datenwert1, #<-- change 'col1', 'col2' to whatever you want to name them
'col2':Datenwert2})
Try creating a new dataframe here instead of iterate over the existing one
cols = ['AC', 'DC']
new_Tabelle = pd.DataFrame(columns = cols)
new_Tabelle['AC']=Tabelle['AC']
new_Tabelle['DC']=Tabelle['DC']
I don't have 30 semi-colon delimited files. However this can be so easily simplified to only pick up files that exist and match a pattern using glob
import pandas as pd
from pathlib import Path
import random
for i in range(30):
with open(f"2020010_os_{i}.csv", "w") as fp: fp.write(f"id;val\n{i};{random.randint(10,20)}\n")
pd.concat([pd.read_csv(fn, sep=";") for fn in Path().cwd().glob("2020010*.csv")])

Reconciling an array slicer

I've built a function to cut the extraneous garbage out of text entries. It uses an array slicer. I now need to reconcile the lines that've been removed by my cleanup function so all the lines_lost + lines_kept = total lines. Source code below:
def header_cleanup(entry_chunk):
# Removes duplicate headers due to page-continuations
entry_chunk = entry_chunk.replace("\r\n\r\n","\r\n")
header = lines[1:5]
lines[:] = [x for x in lines if not any(header == x for header in headers)]
lines = headers + lines
return("\n".join(lines))
How could I count the lines that do not show up in lines after the slice/mutation, i.e:
original_length = len(lines)
lines = lines.remove_garbage
garbage = lines.garbage_only_plz
if len(lines) + len(garbage) == original_length:
print("Good!")
else:
print("Bad! ;(")
Final answer ended up looking like this:
def header_cleanup(entry_chunk):
lines = entry_chunk.replace("\r\n\r\n","\r\n")
line_length = len(lines)
headers = lines[1:5]
saved_lines = []
bad_lines = []
saved_lines[:] = [x for x in lines if not any(header == x for header in headers)]
bad_lines[:] = [x for x in lines if any(header == x for header in headers)]
total_lines = len(saved_lines) + len(bad_lines)
if total_lines == line_length:
print("Yay!")
else:
print("Boo.")
print(f"{rando_trace_info}")
sys.exit()
final_lines = headers + saved_lines
return("\n".join(final_lines))
Okokokokok - I know you're thinking: that's redundant, but it's required. Open to edits after solution for anything more pythonic. Thanks for consideration.
Don't reuse the lines variable, use a different variable, so you can get the garbage out of the original lines.
clean_lines = remove_garbage(lines)
garbage = garbage_only(lines)
if len(clean_lines) + len(garbage) == len(lines):
print("Good!")
else:
print("Bad!")
You might want to have a single function that returns both:
clean_lines, garbage = filter_garbage(lines)

I am trying to apply a python code to all the files in a directory but it gives me a error

I am trying to apply a python code to all the files in a directory but it gives me a error:
test_image = cv2.imread(sys.argv[1],0)
IndexError: list index out of range
I dont know what to change I tried few things but it does not help so if someone can help with this that would be great. And using stackoverflow for the first time, just to see how it works.
import sys
import cv2
import os
import numpy as np
from utils import pointsInsideCircle, compare, zigzag
from math import pi as PI
filepath = os.path.join("/Users/ssm/Desktop/X/1/Original Images", "*.tif")
W = 8 #block size for comparision
Dsim = 0.1 #threshold for symmetry
Nd = 25 #nearest block
quadrants_points = pointsInsideCircle(W/4) #(i,j) position of blocks which are partially/completely inside circle of radius W/2
zigzag_points = zigzag(W/2)
test_image = cv2.imread(sys.argv[1],0)
height,width = test_image.shape[:2]
#print (height,width)
vectors_list = []
for j in range(0,height-W+1):
for i in range(0,width-W+1):
block = test_image[j:j+W,i:i+W]
dct_block = cv2.dct(np.float32(block))
feature_block = [[],[],[],[]]
for index,coeff_list in enumerate(zigzag_points):
for coeff in coeff_list:
feature_block[index].append(dct_block[coeff[0],coeff[1]])
feature_block_np = np.array(feature_block)
feature_vector = []
for quadrant,points in quadrants_points.iteritems():
summ = 0
for point in points:
summ = summ + feature_block_np[point[0],point[1]]
feature_vector.append(summ/PI)
vectors_list.append(np.array(feature_vector))
vectors_list2 = cv2.sort(np.array(vectors_list),cv2.SORT_EVERY_ROW)
print "vectors calculated"
import json
with open('data.json', 'w') as outfile:
json.dump(vectors_list2.tolist(), outfile)
i=0
blocks = []
for i in range(0,len(vectors_list)):
if i%width == 0:
print i/width
posA = [i/width,i%width]
j = i+1
for j in range(i+1,len(vectors_list)):
posB = [j/width,j%width]
if compare(vectors_list[i],vectors_list[j],posA,posB,Dsim,Nd):
print (posA,posB)
blocks.append([posA,posB])
output_image = cv2.imread(sys.argv[1],1)
for block in blocks:
x1 = block[0][0]
x1_8 = block[0][0]+W
y1 = block[0][1]
y1_8 = block[0][1]+W
output_image[x1:x1_8,y1:y1_8] = [0,0,255]
x2 = block[1][0]
x2_8 = block[1][0]+W
y2 = block[1][1]
y2_8 = block[1][1]+W
output_image[x2:x2_8,y2:y2_8]=[0,255,0]
cv2.imwrite("output.jpg",output_image)
print "feature vectors extracted"
test_image = cv2.imread(sys.argv[1],0)
is checking the list provided by the commandline for a file name. For example if you invoked this script with:
$python myprog.py afilename.xxx
sys.argv would be ['myprog', 'afilename.xxx'], and this imread line would load an image from afilename.xxx.
If you don't provide that filename, sys.argv will only have the script name, and sys.argv[1] will raise this error.

Categories

Resources