Here is my code :
filenames = []
data = []
found = False
axconfig = open('axcfgpasww.dat', "r")
dictionnary = open("hello.txt", "r")
output_value = "bonjour"
for line in axconfig:
for word in dictionnary:
element = word.split("=")[0]
if element in axconfig:
# old_value = line.split("=")
# data.append(line.replace(old_value, output_value+'\n'))
# found = True
# else:
# data.append(line)
print(element)
if not found:
print('No match found')
# Create a new file, from data array
with open("olivier.txt", 'w') as outfile:
for line in data:
outfile.write(line)
The dynamic value is "element".
I have a .txt file, that have keywords and want to look "element" with the values inside the .txt file.
But my code doesn't work..
It only adds in olivier.txt the first line of axcfgpasww.dat..
And is also returning "No match found"
Thanks guys !
edit :
axcfgpasww.dat
T72_BANK_IDENTIFIER_CODE_3=SOAPFR22
T72_ISSUER_COUNTRY_CODE_3=FR
T72_MERCHANT_ID_3=
T72_VAS_SCHEME_IDENTIFIER_3=
hello.txt
T72_VAS_SERVICE_IDENTIFIER_7=
T72_VAS_SCHEME_IDENTIFIER_3=
The problem is with the line:
if element in axconfig:
try instead:
if element in line.split("=")
For my convenience if I have considered both as text files.
Below code will match each line of axconfig with hello and then print whatever is matched and also will store that value is a variable called "value"
filenames = []
data = []
found = False
value = ""
axconfig = open('axcfgpasww.txt', "r")
lines = axconfig.readlines()
dictionnary = open("hello.txt", "r")
output_value = "bonjour"
for line in lines:
match = line.split("=")[0]
for word in dictionnary:
element = word.split("=")[0]
if element == match:
print(element)
value = element
Related
I am trying to search multiple text files for the text "1-2","2-3","3-H" which occur in the last field of the lines of text that start with "play".
An example of the text file is show below
id,ARI201803290
version,2
info,visteam,COL
info,hometeam,ARI
info,site,PHO01
play,1,0,lemad001,22,CFBBX,HR/78/F
play,1,0,arenn001,20,BBX,S7/L+
play,1,0,stort001,12,SBCFC,K
play,1,0,gonzc001,02,SS>S,K
play,1,1,perad001,32,BTBBCX,S9/G
play,1,1,polla001,02,CSX,S7/L+.1-2
play,1,1,goldp001,32,SBFBBB,W.2-3;1-2
play,1,1,lambj001,00,X,D9/F+.3-H;2-H;1-3
play,1,1,avila001,31,BC*BBX,31/G.3-H;2-3
play,2,0,grayj003,12,CC*BS,K
play,2,1,dysoj001,31,BBCBX,43/G
play,2,1,corbp001,31,CBBBX,43/G
play,4,1,avila001,02,SC1>X,S8/L.1-2
For the text file above, I would like the output to be '4' since there are 4 occurrences of "1-2","2-3" and "3-H" in total.
The code I have got so far is below, however I'm not sure where to start with writing a line of code to do this function.
import os
input_folder = 'files' # path of folder containing the multiple text files
# create a list with file names
data_files = [os.path.join(input_folder, file) for file in
os.listdir(input_folder)]
# open csv file for writing
csv = open('myoutput.csv', 'w')
def write_to_csv(line):
print(line)
csv.write(line)
j=0 # initialise as 0
count_of_plate_appearances=0 # initialise as 0
for file in data_files:
with open(file, 'r') as f: # use context manager to open files
for line in f:
lines = f.readlines()
i=0
while i < len(lines):
temp_array = lines[i].rstrip().split(",")
if temp_array[0] == "id":
j=0
count_of_plate_appearances=0
game_id = temp_array[1]
awayteam = lines[i+2].rstrip().split(",")[2]
hometeam = lines[i+3].rstrip().split(",")[2]
date = lines[i+5].rstrip().split(",")[2]
for j in range(i+46,i+120,1): #only check for plate appearances this when temp_array[0] == "id"
temp_array2 = lines[j].rstrip().split(",") #create new array to check for plate apperances
if temp_array2[0] == "play" and temp_array2[2] == "1": # plate apperance occurs when these are true
count_of_plate_appearances=count_of_plate_appearances+1
#print(count_of_plate_appearances)
output_for_csv2=(game_id,date,hometeam, awayteam,str(count_of_plate_appearances))
print(output_for_csv2)
csv.write(','.join(output_for_csv2) + '\n')
i=i+1
else:
i=i+1
j=0
count_of_plate_appearances=0
#quit()
csv.close()
Any suggestions on how I can do this? Thanks in advance!
You can use regex, I put your text in a file called file.txt.
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
count = 0
with open('file.txt', 'r') as f:
for line in f.readlines():
count += len(find_this.findall(line)) # Each findall returns the list of things found
print(count) # 7
or a shorter solution: (Credit to wjandrea for hinting the use of a generator)
import re
a = ['1-2', '2-3', '3-H'] # What you want to count
find_this = re.compile('|'.join(a)) # Make search string
with open('file.txt', 'r') as f:
count = sum(len(find_this.findall(line)) for line in f)
print(count) # 7
I'm using this function to search a site to see if a particular item i'm interested in s on sale. It first grabs the html from the page, then searches for an item i'm interested. When it finds the item it adds a number of the following lines (dictated by the rangenumber) to the variable 'endresult'. It then searches for the keyword ("sale") in endresult, at which point I'd like it to notify me if the keyword is present or not.
When I print endresult the output contains the keyword, but the if statement at the very end of the function always returns "keyword is missing" despite this and I can't work out why.
def bargainscraper(self, website, item, keyword,rangenum):
request = urllib.request.Request(website)
response = urllib.request.urlopen(request)
data = response.read()
html = str(data)
data1 = html2text.html2text(html)
fw = open('result1.txt', 'w')
fw.write(str(data1))
fw.close()
with open('result1.txt', 'r') as f:
for line in f:
if item in line:
for x in range(rangenum):
endresult = str(f.readline())
print (endresult)
if keyword in endresult:
print("keyword is present")
else:
print("keyword is missing")
Possibly need to concatenate the endresult instead of overwriting it with something like: endresult += str(f.readline()) notice the "+" before the "=".
I found writing the endresult to a file within the for loop then searching that file for the keyword outside of the for loop was the answer I was looking for:
def bargainscraper(self, website, item, keyword,rangenum):
request = urllib.request.Request(website)
response = urllib.request.urlopen(request)
data = response.read()
html = str(data)
data1 = html2text.html2text(html)
fw = open('result1.txt', 'w')
fw.write(str(data1))
fw.close()
with open('result1.txt', 'r') as f:
for line in f:
if item in line:
for x in range(rangenum):
endresult = str(f.readline())
# the 'a' switch is used to append
with open('result2.txt', 'a') as n:
n.write(endresult)
# This is outside of the for loop as otherwise it will iterate for each line of the rangenum
if keyword in open('result2.txt').read():
print ("keyword is present")
else:
print ("keyword is missing")
So as most of us are thinking it's a duplicate which is not, so what I'm trying to achieve is let's say there is a Master string like the below and couple of files mentioned in it then we need to open the files and check if there are any other files included in it, if so we need to copy that into the line where we fetched that particular text.
Master String:
Welcome
How are you
file.txt
everything alright
signature.txt
Thanks
file.txt
ABCDEFGHtele.txt
tele.txt
IJKL
signature.txt
SAK
Output:
Welcome
How are you
ABCD
EFGH
IJKL
everything alright
SAK
Thanks
for msplitin [stext.split('\n')]:
for num, items in enumerate(stext,1):
if items.strip().startswith("here is") and items.strip().endswith(".txt"):
gmsf = open(os.path.join(os.getcwd()+"\txt", items[8:]), "r")
gmsfstr = gmsf.read()
newline = items.replace(items, gmsfstr)
How to join these replace items in the same string format.
Also, any idea on how to re-iterate the same function until there are no ".txt". So, once the join is done there might be other ".txt" inside a ".txt.
Thanks for your help in advance.
A recursive approach that works with any level of file name nesting:
from os import linesep
def get_text_from_file(file_path):
with open(file_path) as f:
text = f.read()
return SAK_replace(text)
def SAK_replace(s):
lines = s.splitlines()
for index, l in enumerate(lines):
if l.endswith('.txt'):
lines[index] = get_text_from_file(l)
return linesep.join(lines)
You can try:
s = """Welcome
How are you
here is file.txt
everything alright
here is signature.txt
Thanks"""
data = s.split("\n")
match = ['.txt']
all_matches = [s for s in data if any(xs in s for xs in match)]
for index, item in enumerate(data):
if item in all_matches:
data[index] ="XYZ"
data = "\n".join(data)
print data
Output:
Welcome
How are you
XYZ
everything alright
XYZ
Thanks
Added new requirement:
def file_obj(filename):
fo = open(filename,"r")
s = fo.readlines()
data = s.split("\n")
match = ['.txt']
all_matches = [s for s in data if any(xs in s for xs in match)]
for index, item in enumerate(data):
if item in all_matches:
file_obj(item)
data[index] ="XYZ"
data = "\n".join(data)
print data
file_obj("first_filename")
We can create temporary file object and keep the replaced line in that temporary file object and once everything line is processed then we can replace with the new content to original file. This temporary file will be deleted automatically once its come out from the 'with' statement.
import tempfile
import re
file_pattern = re.compile(ur'(((\w+)\.txt))')
original_content_file_name = 'sample.txt'
"""
sample.txt should have this content.
Welcome
How are you
here is file.txt
everything alright
here is signature.txt
Thanks
"""
replaced_file_str = None
def replace_file_content():
"""
replace the file content using temporary file object.
"""
def read_content(file_name):
# matched file name is read and returned back for replacing.
content = ""
with open(file_name) as fileObj:
content = fileObj.read()
return content
# read the file and keep the replaced text in temporary file object(tempfile object will be deleted automatically).
with open(original_content_file_name, 'r') as file_obj, tempfile.NamedTemporaryFile() as tmp_file:
for line in file_obj.readlines():
if line.strip().startswith("here is") and line.strip().endswith(".txt"):
file_path = re.search(file_pattern, line).group()
line = read_content(file_path) + '\n'
tmp_file.write(line)
tmp_file.seek(0)
# assign the replaced value to this variable
replaced_file_str = tmp_file.read()
# replace with new content to the original file
with open(original_content_file_name, 'w+') as file_obj:
file_obj.write(replaced_file_str)
replace_file_content()
I have a list, abbreviations, filled with string objects. I am trying to call the .index of a string in my list. When I call the .index method with a string I get a ValueError: 'LING' is not in list, when it clearly is in the list.
My code:
for item in abbreviations:
print item
print abbreviations.index("LING")
Why does 'LING' not exist when it clearing does? I have added my following lines of code, which searches 'abbreviations' for the index of a string. I am baffled -- "LING" is clearly in my abbreviations list.
EDIT (Additional Code):
import csv
myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
lines = [tuple(row) for row in csv.reader(myfile)]
longSubjectNames = []
abbreviations = []
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
masterSchedule = tuple(open("/Users/it/Desktop/Classbook/masterClassList.txt", 'r'))
for masterline in masterSchedule:
masterline.strip()
masterSplitLine = masterline.split("|")
subjectAbrev = ""
if masterSplitLine[0] != "STATUS":
subjectAbrev = ''.join([i for i in masterSplitLine[2] if not i.isdigit()])
masterAbrevs.append(subjectAbrev)
finalAbrevs = []
for subject in masterAbrevs:
if (subject[-1] == 'W') and (subject[-2:] != 'UW'):
subject = subject[:-1]
finalAbrevs.append(subject)
x = 0
for item in abbreviations:
print item
print abbreviations.index("LING")
for item in finalAbrevs:
if masterSplitLine[0] != "STATUS":
concat = abbreviations.index(str(finalAbrevs[x]).strip())
print "The abbreviation for " + str(item) + " is: " + longSubjectNames[concat]
x = x + 1
The output of:
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
print '-'.join(abbreviations)
is:
ACA-ACCY-AFST-AMST-ANAT-ANTH-APSC-ARAB-AH-FA-ASTR-BIOC-BISC-BME-BMSC-BIOS-BADM-CHEM-CHIN-CE-CLAS-CCAS-COMM-CSCI-CFA-CNSL-CPED-DNSC-EALL-ECON-EDUC-ECE-EHS-ENGL-EAP-EMSE-ENRP-EPID-EXSC-FILM-FINA-FORS-FREN-GEOG-GEOL-GER-GREK-HCS-HSCI-HLWL-HSML-HEBR-HIST-HOMP-HONR-HDEV-HOL-HSSJ-ISTM-IDIS-IAD-INTD-IAFF-IBUS-ITAL-JAPN-JSTD-KOR-LATN-LAW-LSPA-LING -MGT-MKTG-MBAD-MATH-MAE-MED-MICR-MMED-MSTD-MUS-NSC-ORSC-PSTD-PERS-PHAR-PHIL-PT-PA-PHYS-PMGT-PPSY-PSC-PORT-PSMB-PSYD-PSYC-PUBH-PPPA-REL-SEAS-SMPA-SLAV-SOC-SPAN-SPED-SPHR-STAT-SMPP-SUST-TRDA-TSTD-TURK-UW-WLP-WSTU
Traceback (most recent call last):
File "/Users/it/Desktop/Classbook/sortClasses.py", line 25, in <module>
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as anything:
IOError: [Errno 2] No such file or directory: '/Users/it/Desktop/Classbook/masterClassList.txt'
myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
lines = [tuple(row) for row in csv.reader(myfile)]
longSubjectNames = []
abbreviations = []
masterAbrevs = []
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
The problem is here;
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile:
You defined myfile before here,
myfile = open("/Users/it/Desktop/Classbook/classAbrevs.csv", "rU")
So actually abbreviations = [] is not taking data from classAbrevs.csv.Because it's taking data from masterClassList.txt as you defined myfile with this line;
with open ("/Users/it/Desktop/Classbook/masterClassList.txt", "r") as myfile
That's why your string not in that list.Also this line;
for item in lines:
longSubjectNames.append(item[0])
abbreviations.append(item[1])
Are you sure is item[1] has all of the strings that you want?
And I tried these codes I just copy-pasted it from your's and here is the result;
The problem is, from the result you ran:
"LING\t" is shown in your list, not "LING"
with running this I get the desired index:
abbreviations.index("LING\t")
71
To correct this, there are many methods to strip the \t, I'm showing one of those:
abbreviations.append(item[1].strip())
By correcting this line, your item[1] will strip the \t before appending to your abbreviations list.
The code below is supposed to lookup first column (key) from a file Dict_file and replace the first column of another file fr, with the value of the key found from dict_file. But it keeps the dict_file as an updated dictionary for future lookups.
Every time the code is run, it initializes a dictionary from that dict_file file. If it finds a new email address from another file, it adds it to the bottom of the dict_file.
It should work fine according to my understanding because if it doesn't find an # symbol it assigns looking_for the value of "Dummy#dummy.com".. Dummy#dummy.com should be appended to the bottom of dict_file.
But for some reason, I keep getting new lines and blank lines appended along with other new emails at the end of the dict_file. I can't be writing blanks and newlines to the end of the dict_file.
Why is this happening? Whats wrong in the code below, my brain is about to explode! Any help will be greatly appreciated!
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Map file
for line in f:
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # target file
for line in fr:
columns = line.split("|")
looking_for = columns[0] # this is what we need to search
if looking_for in d:
# by default, iterating over a dictionary will return keys
if not looking_for.find("#"):
looking_for == "Dummy#dummy.com"
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write("\n"+looking_for+" "+new_idx)
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)
Here is the dict_file:
WHATEmail#SIMPLE.COM 223
SamHugan#CR.COM 224
SAMASHER#CATSTATIN.COM 225
FAKEEMAIL#SLOW.com 226
SUPERMANN#MYMY.COM 227
Here is the fr file that gets the first column turned into the id from the dict_file lookup:
WHATEmail#SIMPLE.COM|12|1|GDSP
FAKEEMAIL#SLOW.com|13|7|GDFP
MICKY#FAT.COM|12|1|GDOP
SUPERMANN#MYMY.COM|132|1|GUIP
MONITOR|132|1|GUIP
|132|1|GUIP
00 |12|34|GUILIGAN
Firstly, you need to ignore blanks in your initial dictionary read, otherwise you will get an index out of range error when you run this script again. Do the same when you read via the fr object to avoid entering nulls. Wrap your email check condition further out for greater scope. Do a simple check for the "#" using the find method. And you're good to go.
Try the below. This should work:
#!/usr/bin/python
import sys
d = {}
line_list=[]
alist=[]
f = open(sys.argv[3], 'r') # Persisted Dictionary File
for line in f:
line = line.strip()
if line =="":
continue
alist = line.split()
key = alist[0]
value = alist[1]
d[str(key)] = str(value)
alist=[]
f.close()
fr = open(sys.argv[1], 'r') # source file
fw = open(sys.argv[2]+"/masked_"+sys.argv[1], 'w') # Target Directory Location
for line in fr:
line = line.strip()
if line == "":
continue
columns = line.strip().split('|')
if columns[0].find("#") > 1:
looking_for = columns[0] # this is what we need to search
else:
looking_for = "Dummy#dummy.com"
if looking_for in d:
# by default, iterating over a dictionary will return keys
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
else:
new_idx = str(len(d)+1)
d[looking_for] = new_idx
kv = open(sys.argv[3], 'a')
kv.write(looking_for+" "+new_idx+'\n')
kv.close()
new_line = d[looking_for]+'|'+'|'.join(columns[1:])
line_list.append(new_line)
fw.writelines(line_list)