Load consecutively numbered files in python - python

I have YEARS of files that follow a fairly simple naming structure and these files need to be read into a program so that their data can be dumped into a SQLite DB. The files follow the format <YYYY>/<MM>/<D>.txt I have some code that gets me close but it was designed to read a file that has a constant name not one that changes.
The relevant code is:
for root, subFolders, files in os.walk(rootdir):
if fnmatch.fnmatch(file, '*.txt') in files:
with open(os.path.join(root, '<filename>'), 'r') as log:
for lines in log:

Something like this:
import os
def list_text_files(root):
"""List all text files below a root directory."""
for dirpath, dirname, filenames in os.walk(root):
for filename in filenames:
if filename.endswith('.txt'):
yield os.path.join(dirpath, filename)
def load_file(path):
"""Load a file into the database."""
with open(path) as fp:
for line in fp:
pass # code goes here
for file in list_text_files('/home/jhacker/logs'):
load_file(file)
This will load all text files below the root. If you want better filtering, you'll have to change it.

Related

Skip list of filenames from txt file with os.walk

I would like to upload files that users dump into a shared folder to an FTP site. Only certain files must be uploaded based on a pattern in the filename, and that works. I would like to avoid uploading files that have been uploaded in the past. A simple solution would be to move the files to a subdirectory once uploaded, but users whish for the files to remain where they are.
I was thinking of writing a filename to a text file when each iteration of the loops makes an update. Populating the text file works.
Excluding directories with os.walk is mentioned in many articles and I can get that to work fine, but excluding a list of filenames seems to be a bit more obscure
This is what I have so far:
import ftplib
import os
import os.path
import fnmatch
## set local path variables
dir = 'c:/Temp'
hist_path = 'C:/Temp/hist.txt'
pattern = '*SomePattern*'
## make the ftp connection and set appropriate working directory
ftp = ftplib.FTP('ftp.someserver.com')
ftp.login('someuser', 'somepassword')
ftp.cwd('somedirectory')
## make a list of previously uploaded files
hist_list = open(hist_path, 'r')
hist_content = hist_list.read()
# print(hist_content)
## loop through the files and upload them to the FTP as above
for root, dirs, files in os.walk(dir):
for fname in fnmatch.filter(files, pattern): # this filters for filenames that include the pattern
## upload each file to the ftp
os.chdir(dir)
full_fname = os.path.join(root, fname)
ftp.storbinary('STOR ' + fname, open(full_fname, 'rb'))
## add an entry for each file into the historical uploads log
f = open(hist_path, 'a')
f.write(fname + '\n')
f.close()
Any help would be appreciated

Modify all PHP files within directory and subdirectories with Python

I'm having issues scanning through a root directory and modifying all .php files that contain a certain reference. Essentially, we're looking to move our entire database. I have to find all records of certain tables and rename them appropriately. Here's the code I have so far:
import os
import re
directory = 'C:/Users/me/Desktop/wsphp'
for root, dirs, files in os.walk(directory):
for filename in files:
if filename.endswith('.php'):
print(filename)
open_file = open(filename, 'r')
read_file = open_file.read()
regex = re.compile('OLD DATABASE NAME')
read_file = regex.sub('NEW DATABASE NAME', read_file)
write_file = open(filename, 'w')
write_file.write(read_file)
My code breaks when it attempts to open the file. The problem seems to be that 'filename' refers to JUST the filename without the entire directory ('index.php' rather than 'C:/Users/me/Desktop/wsphp/Subfolder/Subfolder2/index.php'). The root directory contains a few .php files as well as a bunch of subdirectories. Is there an easier way to go about this?
As you suspected, filename is just the filename. The path to the file is stored in root, so you need to do
open_file = open(os.path.join(root, filename), 'r')

Save outputs to new directory in Python

I am trying to identify all .kml in a specific directory and save them into a new directory. Is this possible? I'm able to print the file path but I would like to use Python to copy those files to a new directory.
Here is my code so far:
import os
# traverse whole directory
for root, dirs, files in os.walk(r'C:\Users\file_path_here'):
# select file name
for file in files:
# check the extension of files
if file.endswith('.kml'):
# print whole path of files
print(os.path.join(root, file))
Try this:
import os
# traverse whole directory
for root, dirs, files in os.walk(r'C:\Users\file_path_here'):
# select file name
for each_file in files:
# check the extension of files
if each_file.endswith('.kml'):
# print whole path of files
print(os.path.join(root, file))
kml_file = open(each_file, "r")
content = kml_file.read()
file.close()
with open('newfile.kml', 'w') as f:
f.write(content)

searching and moving files using python

I have been trying to write some python code in order to get each line from a .txt file and search for a file with that name in a folder and its subfolders. After this I want to move that file in a preset destination folder.
I have tried the following code which was posted on stack overflow only but it doesn't seem to work and I am unable to figure out the problem.Any help would be highly appreciated:
import os
import shutil
def main():
destination = '/Users/jorjis/Desktop/new'
with open('/Users/jorjis/Desktop/articles.txt', 'r') as lines:
filenames_to_copy = set(line.rstrip() for line in lines)
for root, _, filenames in os.walk('/Users/jorjis/Desktop/folder/'):
for filename in filenames:
if filename in filenames_to_copy:
shutil.copy(os.path.join(root, filename), destination)
Without any debugging output (which you have now obtained) I can only guess a common pitfall of os.walk: the filenames returned in filenames are just that, filenames without any path. If your file contains filenames with paths they will never match. Use this instead:
if os.path.join(root, filename) in filenames_to_copy:
shutil.copy(os.path.join(root, filename), destination)

Python - Need to loop through directories looking for TXT files

I am a total Python Newb
I need to loop through a directory looking for .txt files, and then read and process them individually. I would like to set this up so that whatever directory the script is in is treated as the root of this action. For example if the script is in /bsepath/workDir, then it would loop over all of the files in workDir and its children.
What I have so far is:
#!/usr/bin/env python
import os
scrptPth = os.path.realpath(__file__)
for file in os.listdir(scrptPth)
with open(file) as f:
head,sub,auth = [f.readline().strip() for i in range(3)]
data=f.read()
#data.encode('utf-8')
pth = os.getcwd()
print head,sub,auth,data,pth
This code is giving me an invalid syntax error and I suspect that is because os.listdir does not like file paths in standard string format. Also I dont think that I am doing the looped action right. How do I reference a specific file in the looped action? Is it packaged as a variable?
Any help is appriciated
import os, fnmatch
def findFiles (path, filter):
for root, dirs, files in os.walk(path):
for file in fnmatch.filter(files, filter):
yield os.path.join(root, file)
Use it like this, and it will find all text files somewhere within the given path (recursively):
for textFile in findFiles(r'C:\Users\poke\Documents', '*.txt'):
print(textFile)
os.listdir expects a directory as input. So, to get the directory in which the script resides use:
scrptPth = os.path.dirname(os.path.realpath(__file__))
Also, os.listdir returns just the filenames, not the full path.
So open(file) will not work unless the current working directory happens to be the directory where the script resides. To fix this, use os.path.join:
import os
scrptPth = os.path.dirname(os.path.realpath(__file__))
for file in os.listdir(scrptPth):
with open(os.path.join(scrptPth, file)) as f:
Finally, if you want to recurse through subdirectories, use os.walk:
import os
scrptPth = os.path.dirname(os.path.realpath(__file__))
for root, dirs, files in os.walk(scrptPth):
for filename in files:
filename = os.path.join(root, filename)
with open(filename, 'r') as f:
head,sub,auth = [f.readline().strip() for i in range(3)]
data=f.read()
#data.encode('utf-8')

Categories

Resources