Search in a csv file - python

I am writing a script that reads files from different directories; then I am using the file ID to search in the csv file. Here is the piece of code.
import os
import glob
searchfile = open("file.csv", "r")
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
for line in searchfile:
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
searchfile.close()
train_file.close()
However, I am only able search couple of ID's from the csv file. Can someone point out my mistake. (please see comments for description)
EDITED
Instance of the text file.
192397335,carrello porta utensili 18x27 eh l 411 x p 572 x h 872 6 cassetti,,691.74,192397335.jpg

Your issue is that when you do for line in searchfile: you're looping over a generator. The file doesn't reset for every id - for example, if the first id you pass to it is in line 50, the next id will start checking at line 51.
Instead, you can read your file to a list and loop over the list instead:
import os
import glob
with open("file.csv", "r") as s:
search_file = s.readlines()
train_file = open('train.csv', 'w')
list_of_files = os.listdir("train")
for l in list_of_files:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
fname = os.path.splitext(os.path.basename(d))
print fname[0] # ID
for line in search_file:
if fname[0] in line: # search in csv file
value = line.split(",")
value = value[1]+" " + value[2] + "\n"
train_file.write(fname[0]+","+value) # write description
break
train_file.close()
I made a couple of other changes too - firstly, you shouldn't use the name id as it has meaning in Python - I picked fname instead to indicate the file name. Secondly, I canged your CamelCase names to lowercase, as is the convention. Finally, getting the file name and extension is neat and fairly consistent through a combination of os.path.splitext and os.path.basename.

You need to browse of lines of searchfile for each id found, but as you open the file outside of the loop, you only read each line once in the whole loop.
You should either load the whole file in a list and iterate the list of lines inside the loop, or if searchfile is really large and would hardly fit in memory reopen the file inside the loop:
List version:
with open("file.csv", "r") as searchfile:
searchlines = searchfile.readlines()
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
for line in searchlines: # now a list so start at the beginning on each pass
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
train_file.close()
Re-open version
train_file = open('train.csv','w')
listOfFiles = os.listdir("train")
for l in listOfFiles:
dirList = glob.glob(('/train/%s/*.jpg') % (l))
for d in dirList:
id = d.split("/")
id = id[-1].split(".")
print id[0] # ID
searchfile = open("file.csv", "r")
for line in searchfile:
if id[0] in line: # search in csv file
value= line.split(",")
value= value[1]+" "+ value[2] + "\n"
train_file.write(id[0]+","+value) # write description
break
searchfile.close()
train_file.close()

Related

Store the contents of text file in return variable and manipulate wherever required in python?

I have below function & I am trying to get/store the contents of text file in another temp file(removing unnecessary line) with appending special character.
But I also want the same content which is in temp text file with different special character next time but I am not able to do that.Below function is creating a temp file.To get desired output I need to create file every time with same function again which is not good way.Is there anything we can do without creating a temp/extra file and store the contents in return variable and append the special character whatever we want multiple times
import os
import re
def mainfest():
pathfile = "abc_12.txt"
with open(pathfile, 'r') as firstfile, open('tmp.txt', 'r') as s:
for line in firstfile:
if line.strip().startswith("-") or line.startswith("<"):
print"ignore")
elif re.search('\\S', line):
name = str(os.path.basename(line))
s.write("*" +fname)
def contents():
temppath = "temp.txt"
with open(temp path, 'r') as f:
lines = f.readlines()
lines+= lines
return lines
manifest()
value = contents()
file abc_12.txt
---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
I want the contents of abc_12.txt file I can get in return something like that
abc.py
abc.py
nice.py
abc.py
jkl.oy
abc.py
and manipulate them wherever I want similar to below output
Output 1:
* abc.py
* abc.py
* nice.py
* abc.py
* jkl.oy
* abc.py
Output 2:
##abc.py
##abc.py
##nice.py
##abc.py
##jkl.oy
##abc.py
Maybe first you should read file, search names and keep on list
def read_data():
results = []
with open("abc_12.txt") as infile:
for line in infile:
if line.strip().startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
elif re.search('\\S', line):
name = os.path.basename(line)
results.append(name)
return results
And later you can use this list to create temp file or other file
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}')
#print(f'* {line}', end='')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}')
#print(f'##{line}', end='')
EDIT:
Minimal working code.
I used io.StringIO only to simulate file in memory - so everyone can simply copy and test it.
import os
import re
import io
text = r'''---ABC-123
nice/abc.py
xml/abc.py
<<NOP-123
bac\nice.py
abc.py
---CDEF-345
jkl.oy
abc.py
'''
def read_data():
results = []
with io.StringIO(text) as infile:
#with open("abc_12.txt") as infile:
for line in infile:
line = line.strip()
if line:
if line.startswith(("-", "<")): # `startswith`/`endswith` can use tuple
print("ignore:", line)
else:
name = os.path.basename(line)
results.append(name)
return results
data = read_data()
with open('temp.txt', 'w') as outfile:
for line in data:
outfile.write(f'* {line}\n')
print(f'* {line}')
with open('other.txt', 'w') as outfile:
for line in data:
outfile.write(f'##{line}\n')
print(f'##{line}')
EDIT:
If you don't want to save in file then you still need for-loop to create string
data = read_data()
string_1 = ''
for line in data:
string_1 += f'* {line}\n'
string_2 = ''
for line in data:
string_2 += f'##{line}\n'
or to create new list (and eventually string)
data = read_data()
list_1 = []
for line in data:
list_1.append(f'* {line}')
list_2 = []
for line in data:
list_2.append(f'##{line}')
string_1 = "\n".join(list_1)
string_2 = "\n".join(list_2)

IndexError: list index out of range - PythonError

I'm creating a program that should create a file (.txt) based on each line of 'clouds.txt'. This is my code:
def CreateFile():
global file_name
f = open(file_name,"w+")
f.write(list_email + ":")
f.close()
def WriteInConfig():
f = open("config/config.txt","a")
f.write(list_name + "\n")
f.close()
with open("clouds.txt","r") as f:
list_lines = sum(1 for line in open('clouds.txt'))
lines = f.readline()
for line in lines:
first_line = f.readline().strip()
list_email = first_line.split('|')[1] #email
print("Email: " + list_email)
list_pass = first_line.split('|')[2] #pass
print("Pass: " + list_pass)
list_name = first_line.split('|')[3] #name
print(list_name)
global file_name
file_name = "config/." + list_name + ".txt"
with open('clouds.txt', 'r') as fin:
data = fin.read().splitlines(True)
with open('clouds.txt', 'w') as fout:
fout.writelines(data[1:])
CreateFile()
WriteInConfig()
The clouds.txt file looks like this:
>|clouds.n1c0+mega01#gmail.com|cwSHklDIybllCD1OD4M|Mega01|15|39.91|FdUkLiW0ThDeDkSlqRThMQ| |x
|clouds.n1c0+mega02#gmail.com|tNFVlux4ALC|Mega02|50|49.05|lq1cTyp13Bh9-hc6cZp1RQ|xxx|x
|clouds.n1c0+mega03#gmail.com|7fe4196A4CUT3V|Mega03|50|49.94|BzW7NOGmfhQ01cy9dAdlmg|xxx|xxx >
Everything works fine until 'Mega48'. There I get "IndexError: list index out of range"
>|clouds.n1c0+mega47#gmail.com|bd61t9zxcuC1Yx|Mega47|50|10|Xjff6C8mzEqpa3VcaalUuA|xxx|x
|clouds.n1c0+mega48#gmail.com|kBdnyB6i0PUyUb|Mega48|50|0|R6YfuGP2hvE-uds0ylbQtQ|xxx|x
|clouds.n1c0+mega49#gmail.com|OcAdgpS4tmSLTO|Mega49|50|28.65|xxx| >
I checked and there are no spaces/other characters. As you could see, after creating the file, the program deletes the line. After the error, if I'm starting the program again (and starts from 'Mega47') it doesn't show the error, and everything works as planned.
Any ideas how to fix this?
I see many mistakes in your code. First, what do you want with this list_lines = sum(1 for line in open('clouds.txt'))?
You have a problem in your for loop because you did lines = f.readline() so lines is the first line, then you do for line in lines where line will be each character of the first line and there are more character in the first line than lines in your file to read.
[edited]
you don't need to know the number of lines in the file to do a for loop. You can just do for line in f:, then you don't need to read the line again with readline it is already in the variable line

How to edit specific line for all text files in a folder by python?

Here below is my code about how to edit text file.
Since python can't just edit a line and save it at the same time,
I save the previous text file's content into a list first then write it out.
For example,if there are two text files called sample1.txt and sample2.txt in the same folder.
Sample1.txt
A for apple.
Second line.
Third line.
Sample2.txt
First line.
An apple a day.
Third line.
Execute python
import glob
import os
#search all text files which are in the same folder with python script
path = os.path.dirname(os.path.abspath(__file__))
txtlist = glob.glob(path + '\*.txt')
for file in txtlist:
fp1 = open(file, 'r+')
strings = [] #create a list to store the content
for line in fp1:
if 'apple' in line:
strings.append('banana\n') #change the content and store into list
else:
strings.append(line) #store the contents did not be changed
fp2 = open (file, 'w+') # rewrite the original text files
for line in strings:
fp2.write(line)
fp1.close()
fp2.close()
Sample1.txt
banana
Second line.
Third line.
Sample2.txt
First line.
banana
Third line.
That's how I edit specific line for text file.
My question is : Is there any method can do the same thing?
Like using the other functions or using the other data type rather than list.
Thank you everyone.
Simplify it to this:
with open(fname) as f:
content = f.readlines()
content = ['banana' if line.find('apple') != -1 else line for line in content]
and then write value of content to file back.
Instead of putting all the lines in a list and writing it, you can read it into memory, replace, and write it using same file.
def replace_word(filename):
with open(filename, 'r') as file:
data = file.read()
data = data.replace('word1', 'word2')
with open(filename, 'w') as file:
file.write(data)
Then you can loop through all of your files and apply this function
The built-in fileinput module makes this quite simple:
import fileinput
import glob
with fileinput.input(files=glob.glob('*.txt'), inplace=True) as files:
for line in files:
if 'apple' in line:
print('banana')
else:
print(line, end='')
fileinput redirects print into the active file.
import glob
import os
def replace_line(file_path, replace_table: dict) -> None:
list_lines = []
need_rewrite = False
with open(file_path, 'r') as f:
for line in f:
flag_rewrite = False
for key, new_val in replace_table.items():
if key in line:
list_lines.append(new_val+'\n')
flag_rewrite = True
need_rewrite = True
break # only replace first find the words.
if not flag_rewrite:
list_lines.append(line)
if not need_rewrite:
return
with open(file_path, 'w') as f:
[f.write(line) for line in list_lines]
if __name__ == '__main__':
work_dir = os.path.dirname(os.path.abspath(__file__))
txt_list = glob.glob(work_dir + '/*.txt')
replace_dict = dict(apple='banana', orange='grape')
for txt_path in txt_list:
replace_line(txt_path, replace_dict)

How to read and process data from a set of text files sequentially?

I have 50 text files (namely Force1.txt, Force2.txt, ..., Force50.txt). The files look like this:
0.0000000e+000 -1.4275799e-003
2.0000000e-002 -1.1012760e-002
4.0000000e-002 -1.0298970e-002
6.0000000e-002 -8.9733599e-003
8.0000000e-002 -9.6871497e-003
1.0000000e-001 -1.2236400e-002
1.2000000e-001 -1.4479739e-002
1.4000000e-001 -1.3052160e-002
1.6000000e-001 -1.1216700e-002
1.8000000e-001 -8.6674497e-003
2.0000000e-001 -8.6674497e-003
2.2000000e-001 -1.3358070e-002
2.4000000e-001 -1.7946720e-002
2.6000000e-001 -1.9782179e-002
I wish to read data from Force1.txt, store data in a list of tuples, and analize these data (the details of such analysis are not relevant to the question). Then I have to do the same with Force2.txt, Force3.txt, and so on.
Here is my attempt:
def load_data(fn):
with open(fn) as f:
lines = f.readlines()
return [tuple(map(float, x)) for x in [row.split() for row in lines]]
def display_data(lst):
return lst.__repr__().replace('[', '').replace(']', '')
pp = []
for file in os.listdir("dir"):
if file.endswith('.txt'):
if file.startswith('Force'):
print os.path.join(r"dir", file)
with open(file) as f:
for line in f:
pp.append(map(float, line.split()))
mdb.models['Model-1'].TabularAmplitude(data=pp, name='Table', smooth=SOLVER_DEFAULT, timeSpan=STEP)
I'm getting this error:
'Table', smooth=SOLVER_DEFAULT, timeSpan=STEP):
Invalid time values, expected monotonically increasing numbers
How can I solve this issue?
This code should do:
import os
def load_data(fn):
with open(fn) as f:
lines = f.readlines()
return [tuple(map(float, x)) for x in [row.split() for row in lines]]
def display_data(lst):
return lst.__repr__().replace('[', '').replace(']', '')
dirname = r"Your dir name goes here"
for filename in os.listdir(dirname):
if filename.endswith('.txt'):
if filename.startswith('Force'):
pathfile = os.path.join(dirname, filename)
print pathfile
pp = load_data(pathfile)
print display_data(pp)
mdb.models['Model-1'].TabularAmplitude(data=pp,
name='Table',
smooth=SOLVER_DEFAULT,
timeSpan=STEP)
You just need to update dirname with the name of the directory which contains the text files. I recommend you not to use file as a variable identifier because file is a reserved word in Python. I've used filename instead.

Python importing files - for loops

I have two simple files that I want to open in python and based on a keyword print information in the file
file a.txt contains:
'Final
This is ready'
file b.txt contains:
'Draft
This is not ready'
I want to read these two files in and if the file reads 'Final' anywhere in the txt file to print out the rest of the text (excluding the word 'Final'). My for loop is not outputting correctly:
fileList = ['a.txt','b.txt']
firstLineCheck = 'Final\n'
for filepath in fileList:
f = open(filepath, 'r') #openfiles
for line in f:
if line == firstLineCheck:
print line
else:
break
I feel like this is something simple - appreciate the help
fileList = ['a.txt', 'b.txt']
firstLineCheck = 'Final\n'
for filepath in fileList:
with open(filepath, 'r') as f:
line = f.readline()
while line:
if line == firstLineCheck:
print f.read()
line = f.readline()
There are three faults in your code. First you will only print lines that match and second is that you trigger only on lines that contains only "Final", third it does not exclude the line containing "Final" as specified. The fix would be to use a flag to see if you found the "Final":
fileList = ['a.txt','b.txt']
firstLineCheck = 'Final'
firstLineFound = False
for filepath in fileList:
f = open(filepath, 'r') #openfiles
for line in f:
if firstLineFound:
print line
elif firstLineCheck in line:
# print line # uncomment if you want to include the final-line
firstLineFound = True
else:
break
if you wanted to trigger only on lines containing only "Final" then you should instead use firstLineCheck = "Final\n" and elif line==firstLineCheck.
Assuming you want to print all lines starting a line that has only your firstLineCheck in it, and using your code ....
fileList = ['a.txt','b.txt']
firstLineCheck = 'Final\n'
for filepath in fileList:
f = open(filepath, 'r') #openfiles
do_print = False
for line in f:
if line == firstLineCheck:
do_print = True
continue
if do_print:
print line
Note that break takes you out of the loop, and continue will move to the next iteration.
Assuming your keyword is the first line of the file, you can do this. This makes more sense as you could have the word "Final" somewhere in the content of "draft".
fileList = ['a.txt','b.txt']
firstLineCheck = 'Final\n'
for filepath in fileList:
with open(filepath, 'r') as f:
first_line = f.readline() # read the first line
if first_line == firstLineCheck:
print f.read()
Since you wanted to check if Final was present in the first line you could read the file as a list and see if first element contains final if so prints the entire file except first line
fileList = ['a.txt','b.txt']
firstLineCheck = 'Final'
for filepath in fileList:
f = open(filepath, 'r').readlines() #openfiles
if firstLineCheck in f[0]:
print "".join(f[1:])
output:
This is ready'

Categories

Resources