Get rows from all .txt files in directory using python - python

I have some txt files in a directory and I need to get the last 15 lines from all of them. How could I do it using python?
I chose this code:
from os import listdir
from os.path import isfile, join
dir_path= './'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
out = []
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
out.append(filedata)
f = open(r'./fin.txt','w')
f.writelines(out)
f.close()
but I get the error "TypeError: writelines() argument must be a sequence of strings". I think it's because of Russian letters in the lines.

import os
from collections import deque
for filename in os.listdir('/some/path'):
# might want to put a check it's actually a file here...
# (join it to a root path, or anything else....)
# and sanity check it's text of a usable kind
with open(filename) as fin:
last_15 = deque(fin, 15)
deque will automatically discard the oldest entry and peak the max size to be 15, so it's an efficient way of keeping just the "last" 'n' items.

Try this:
from os import listdir
from os.path import isfile
for filepath in listdir("/path/to/folder")
if isfile(filepath): # if need
last_five_lines = open(filepath).readlines()[-15:]
# or, one line:
x = [open(f).readlines()[-15:] for f in listdir("/path/to/folder") if isfile(f)]
Updated:
lastlines = []
for file in files:
lastlines += open(join(dir_path, file), "r").readlines()[-15:]
with open('./fin.txt', 'w') as f:
f.writelines(lastlines)

from os import listdir
from os.path import isfile, join
dir_path= '/usr/lib/something'
files = [ f for f in listdir(dir_path) if isfile(join(dir_path,f)) ]
for file in files:
filedata = open(join(dir_path, file), "r").readlines()[-15:]
#do something with the filedata

Hope this helps:
import os
current_dir = os.getcwd()
dir_objects = os.listdir(current_dir)
dict_of_last_15 = {}
for file in dir_objects:
file_obj = open(file, 'rb')
content = file_obj.readlines()
last_15_lines = content[-15:]
dict_of_last_15[file] = last_15_lines
print "#############: %s" % file
print dict_of_last_15[file]
file_to_check.close()

Related

Python txt file operation

I have a folder with files and want to separate the files in two variables.
For example these are the files:
EXfileone.txt
fileone.txt
EXtest.txt
simple.txt
Now the thing I can't do is this (in pseudocode):
If "EX" in filename:
add file to variable: EXFILES
IF "EX" not in filename:
add file to variable : NORMALFILES
So now:
EXFILES = [EXFfileone.txt, EXtest.txt]
NORMALFILES = [fileone.txt, simple.txt]
Then I will use a for loop to make operation with the files:
for file in EXFILES:
...
I'm using Python 3.
You can simply use the standard library glob module to match pathnames:
import glob
EXFILES = []
NORMALFILES = []
filename_list = glob.glob("*.txt")
for filename in filename_list:
if "EX" in filename:
EXFILES.append(filename)
else:
NORMALFILES.append(filename)
Try this:
from pathlib import Path
folder = Path('/path/to/your/folder')
exfiles = list(folder.glob('EX*.txt'))
normalfiles = [f for f in folder.glob('*.txt') if not f.name.startswith('EX')]
That will give you the list of files as you wanted.
But it is better do something like this instead:
from pathlib import Path
folder = Path('/path/to/your/folder')
for f in folder.glob('*.txt'):
if f.name.startswith('EX'):
# do something with your EX*.txt file
else:
# do something with your normal file
I hope it helps.
You can make each variable a list and store the names of the files in them:
from os import listdir
from os.path import isfile, join
folder_path = '/home/youruser/example' # path to folder
# classify files
exfiles = []
normalfiles= []
for f in listdir(folder_path):
if isfile(join(folder_path, f)):
if f.startswith('EX'):
exfiles.append(join(folder_path, f))
else:
normalfiles.append(join(folder_path, f))
for fname in exfiles:
with open(fname) as f:
# do operations
use the in operator. It checks if a string contains a sub-string:
from os import listdir
EX_files = []
NORMAL_files = []
for file_name in listdir():
if "EX" in file_name:
EX_files.append(file_name)
else:
NORMAL_files.append(file_name)
You can also use Pythons built-in filter and lambda functions:
import os
my_list = list(filter(lambda x: 'EX' in x, os.listdir(some_dir)))
my_other_list = list(filter(lambda x: 'EX' not in x, os.listdir(some_dir)))
Will output two lists filtered with your search criterion.

Create multiply amount of files with random names in python and zip them

I'm newbie in Python,
I need to create a large quantity of files with random names in Dest_Dir (my destination directory), And then Zip them to one file.
Does anyone have an idea how to do that?
I managed to create files like that with a for loop into a specific folder, but it doesn't fit in case I want to create large amount of file (Lets say 100)
And the names I created aren't random.
import os
import sys
import platform
SRC_Dir = os.path.dirname(__file__)
Dest_Dir = os.path.join(SRC_Dir, 'dest')
items = ["one", "two", "three"]
for item in items:
#(os.path.join(Dest_Dir, filename), 'wb') as temp_file:
with open(os.path.join(Dest_Dir, item), 'wb') as f:
f.write("This is my first line of code")
f.write("\nThis is my second line of code with {} the first item in my list".format(item))
f.write("\nAnd this is my last line of code")
You could make use of the built-in tempfile
import os
import tempfile
for _ in range(100):
file_descriptor, file_path = tempfile.mkstemp(".txt", "prefix-", Dest_Dir)
file_handle = open(file_path, "wb")
# do stuff
os.close(file_descriptor)
file_handle.close()
Since a comment was made about the zip part I figured I would add that as well
import os
import tempfile
import zipfile
new_files = []
for _ in range(10):
file_descriptor, file_path = tempfile.mkstemp(".txt", "prefix-", "/tmp")
file_handle = open(file_path, "wb")
file_handle.write("HELLO")
os.close(file_descriptor)
file_handle.close()
new_files.append(file_path)
with zipfile.ZipFile("/tmp/zipped.zip", "w") as zipped:
for file_path in new_files:
zipped.write(file_path, os.path.basename(file_path))
The zipped.write arguments here assume only the filename (and not the path) are desired for the archived name.

I have a ".txt "file which consists of various filenames and I want to search each filename in a folder where these files are actually kept

Suppose I have a text file aiq_hits.txt.
Each line in this file corresponds a filename
ant1.aiq
ant2.aiq
ant3.aiq
ant4.aiq
I want to match each line of my textfile (ant1.aiq,ant2.aiq and so on) with filenames which are present at some specific place(R:\Sample) and extract matching files into some other place (R:\sample\wsa).
I have an idea that I need to use functions like os.walk() and fnmatch.fnmatch(), shutil.copy() but I am not able to implement them
My code:
import os
import shutil
import fnmatch
with open("aiq_hits.txt","r") as in_file:
for line in in_file:
I am stuck here
import os
import shutil
sourceDir = "R:\\Sample"
targetDir = "R:\\Sample\\wsa"
existingFiles = set(f for f in os.listdir(sourceDir) if os.path.isfile(os.path.join(sourceDir, f)))
infilepath = "aiq_hits.txt"
with open(infilepath) as infile:
for line in infile:
fname = line.strip()
if fname not in existingFiles: continue
shutil.move(os.path.join(sourceDir, fname), os.path.join(targetDir, fname))
I hope this will suffice:
import os
def match_files(url,file_read, dest):
f = open(file_read, 'rb')
file_list = os.listdir(url)
print(file_list)
saved_path = os.getcwd()
print("Current working directory is " + saved_path)
os.chdir(url)
match = []
for file_name in f:
file_name = file_name.strip()
if file_name in file_list:
match.append(file_name)
os.rename(os.path.join(url, file_name), os.path.join(dest, file_name))
os.chdir(saved_path)
print match
here, url is source directory or folder from which u want to match files, file_read is the name of file (with path) in which list of file names is given, dest is the destination folder.
this code moves the matching files from url to dest, i.e. these files won't remin in url after running the code.
Alternatively you could use the glob module which allows you to enter in a expression for the file name\extension which will then return a list that you can loop over.
I'd use this module if the source directory can have files with the same extension that you want to exclude from being looped over
Also I'm assuming that the file name list is not large and so storing it in a list wont be an issue
eg (I haven't tested the below )
from glob import glob
import os
import shutil
src = 'R:\\Sample'
dst = "R:\\Sample\\wsa"
in_file_list = "aiq_hits.txt"
list_Of_files = glob(os.path.join(src, 'ant*.aiq'))
data = []
with open(in_file_list) as reader:
data += reader.readlines()
for row in list_Of_files:
file_path, file_name = os.path.split(row)
if file_name in data:
shutil.copy2(row, os.path.join(dst, file_name))
# or if you want to move the file
# shutil.move(row, os.path.join(dst, file_name))

Python Recursive Hashlib

I'm having a problem for computing all checksums of all the files under the /bin/* directory.
I'm implementing a HIDS in Python, so i need to compute the checksums of each file and save it, say, in a list .... so my code here only returns the first checksum of the /bin/* directory.
import sys
import haslib
path = sys.argv[1] #PATH OF THE FILES, ex: /etc/shadow, /bin/*, etc.
with open(path,'rb') as fh:
md5 = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
md5.update(data)
print md5.hexdigest()
Any suggestions ??
import sys
from os import listdir
from os.path import isfile, join
import hashlib
path = sys.argv[1] #PATH OF THE FILES, ex: /etc/shadow, /bin/*, etc.
files = [ f for f in listdir(path) if isfile(join(path,f)) ]
my_files = {}
for fil in files:
with open(fil,'rb') as fh:
md5 = hashlib.md5()
while True:
data = fh.read(8192)
if not data:
break
md5.update(data)
my_files[fil] = md5.hexdigest()
for k,v in my_files.iteritems():
print 'file_name is {} | hash is {}'.format(k,v)

Remove lines of files in different directories

I'd like to remove the first line and the last second line of files which exits in different sub directories in the same root directory. And the codes as below
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
f = open(file,'r')
lines = f.readlines()
f.close()
f = open(file,'w')
f.writelines(lines[1:-2])
f.close()
But, when it found the file, the error happened saying no the file which has already been found.
Correct me if it does not work:
import fileinput
import sys
import os
path = "./rootDire"
for(dirpath,dirnames,files) in os.walk(path):
for filename in files:
filepath = os.path.join(dirpath, filename)
f = open(filepath,'r')
lines = f.readlines()
f.close()
f = open(filepath,'w')
f.writelines(lines[1:-2])
f.close()

Categories

Resources