import os, sys
import os.path, time
path=os.getcwd()
def file_info(directory):
file_list = []
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
print file_info(path)
Problem
how I can show each list item in new line and nice a nice format
how I can sort the file/directory list based on last modified
how I can sort the file/directory list based on creatation date
Here is the program with some nice printing using the format function:
import os
import time
path = os.getcwd()
def file_info(directory):
file_list = []
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
file_list = file_info(path)
for item in file_list:
line = "Name: {:<20} | Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
Here is some code with a sort function being used on the accessed time. The code is not optimized but it is very readable and you should be able to understand it.
import os
import time
path = os.getcwd()
def file_info(directory,sortLastModifiedOrNaw=False):
file_list = []
currentMin = 0 #This is the variable that will track the lowest digit
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
if sortLastModifiedOrNaw == True: #If you would like to sort.
if a.st_atime > currentMin: #Check if this is bigger than the current minimum.
currentMin = a.st_atime #If it is we update the current minimum
#Below we append so that it ends up in the end of the list
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
else: #If it is smaller, it should be in the front of the list so we insert it into position 0.
file_list.insert(0,[i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
else: #If you would not like to sort
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
file_list = file_info(path)
print("Unsorted Example")
for item in file_list:
line = "Name: {:<20} | Date Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
print("\nSorted example using last modified time")
file_list = file_info(path,sortLastModifiedOrNaw=True)
for item in file_list:
line = "Name: {:<20} | Date Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
Sample output:
Unsorted Example
Name: .idea | Date Last Accessed: Sun Jan 3 21:13:45 2016 | Date Created: Sun Jan 3 21:13:14 2016
Name: blahblah.py | Date Last Accessed: Sun Jan 3 21:13:48 2016 | Date Created: Sun Jan 3 21:13:48 2016
Name: testhoe1.py | Date Last Accessed: Sun Jan 3 19:09:57 2016 | Date Created: Sun Jan 3 18:52:06 2016
Sorted example using last modified time
Name: testhoe1.py | Date Last Accessed: Sun Jan 3 19:09:57 2016 | Date Created: Sun Jan 3 18:52:06 2016
Name: .idea | Date Last Accessed: Sun Jan 3 21:13:45 2016 | Date Created: Sun Jan 3 21:13:14 2016
Name: blahblah.py | Date Last Accessed: Sun Jan 3 21:13:48 2016 | Date Created: Sun Jan 3 21:13:48 2016
Happy optimizing! #If you change line 12 atime to ctime it will sort based on create-time.
Related
let me start off by saying, its possible I am attempting to use a terrible data structure.
Im trying to get information out of a large text dump and cant seem to get it sorted right. Data looks like the below, but is much longer.
r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8
I thought I could end up getting it into a data structure like..
r1_list = [
r01: [77.7,76.0,76.0,76.0],
r04: [69.5,4,4,5],
]
r2_list = [
r02: [1,2,3,4],
r04: [3,4,4,5],
]
Then I could loop through the lists, and check the mean etc of the values per device.
Here is what ive been trying
import re
r1_list = []
r2_list = []
current_device = False
device_type = False
current_reading = False
def matchr1(line):
matchThis = ""
matched = re.match(r'^(r1)\s(r\d+)$',line)
if matched:
#Matches r1
matchThis = matched.group(2)
else:
return False
return matchThis
def matchr2(line):
matchThis = ""
matched = re.match(r'^(r2)\s(r\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
def matchReading(line):
matchThis = ""
matched = re.match(r'^(\d+)\s(\d+.\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
with open("data.txt") as f:
for line in f:
if matchr1(line):
current_device = matchr1(line)
device_type = "r1"
if matchr2(line):
current_device = matchr2(line)
device_type = "r2"
if matchReading(line):
current_reading = matchReading(line)
if current_reading:
if device_type == "r1":
temp_dict = {current_device: [current_reading]}
r1_list.append(temp_dict)
if device_type == "r2":
temp_dict = {current_device: [current_reading]}
r2_list.append(temp_dict)
current_reading = False
print(r1_list)
print(r2_list)
What I get
[{'r01': ['77.7']}, {'r01': ['76.0']}, {'r01': ['77.7']}, {'r04': ['74.6']}, {'r04': ['75.6']}, {'r04': ['75.8']}]
[{'r02': ['74.7']}, {'r02': ['74.0']}, {'r02': ['76.7']}, {'r03': ['74.2']}, {'r03': ['74.1']}, {'r03': ['76.8']}]
There are two separate steps here:
Looking at rows starting with "r" and finding there their data should be inserted.
Looking at other rows and inserting them into the data structure.
Here's what I came up with:
#!/usr/bin/env python
data = """r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8"""
result = {}
for line in data.splitlines():
if line.startswith("r"):
# Find (or create) the place in the data structure where
# we should insert the values.
first, second = line.split()
# dict.setdefault(key, value) sets `dict[key] = value` if
# it's not already set, then returns `dict[key]`.
dest = result.setdefault(first, {}).setdefault(second, [])
# Move on to the next line.
continue
# Get the value of the line
_, value = line.split()
# Add it to the list we found in the `line.startswith('r')`
# bit above.
dest.append(value)
assert result == {
"r1": {
"r01": ["77.7", "76.0", "77.7"],
"r04": ["74.6", "75.6", "75.8"]
},
"r2": {
"r02": ["74.7", "74.0", "76.7"],
"r03": ["74.2", "74.1", "76.8"]
},
}
I try to get the data from pyOWM package using city name but in some cases because of city typo error
not getting data & it breaks the process.
I want to get the weather data using lat-long but don't know how to set function for it.
Df1:
-----
User City State Zip Lat Long
-----------------------------------------------------------------------------
A Kuala Lumpur Wilayah Persekutuan 50100 5.3288907 103.1344397
B Dublin County Dublin NA 50.2030506 14.5509842
C Oconomowoc NA NA 53.3640384 -6.1953066
D Mumbai Maharashtra 400067 19.2177166 72.9708833
E Mratin Stredocesky kraj 250 63 40.7560585 -5.6924778
.
.
.
----------------------------------
Code:
--------
import time
from tqdm.notebook import tqdm
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
cities = Df1["City"].unique().tolist()
cities1 = cities [:5]
owm = pyowm.OWM('bee8db7d50a4b777bfbb9f47d9beb7d0')
mgr = owm.weather_manager()
'''
Step-1 Define list where save the data
'''
list_wind_Speed =[]
list_tempreture =[]
list_max_temp =[]
list_min_temp =[]
list_humidity =[]
list_pressure =[]
list_city = []
list_cloud=[]
list_status =[]
list_rain =[]
'''
Step-2 Fetch data
'''
j=0
for city in tqdm(cities1):
j=+1
if j < 60:
# one_call_obs = owm.weather_at_coords(52.5244, 13.4105).weather
# one_call_obs.current.humidity
observation = mgr.weather_at_place(city)
l = observation.weather
list_city.append(city)
list_wind_Speed.append(l.wind()['speed'])
list_tempreture.append(l.temperature('celsius')['temp'])
list_max_temp.append(l.temperature('celsius')['temp_max'])
list_min_temp.append(l.temperature('celsius')['temp_min'])
list_humidity.append(l.humidity)
list_pressure.append(l.pressure['press'])
list_cloud.append(l.clouds)
list_rain.append(l.rain)
else:
time.sleep(60)
j=0
'''
Step-3 Blank data frame and store data in that
'''
df2 = pd.DataFrame()
df2["City"] = list_city
df2["Temp"] = list_tempreture
df2["Max_Temp"] = list_max_temp
df2["Min_Temp"] = list_min_temp
df2["Cloud"] = list_cloud
df2["Humidity"] = list_humidity
df2["Pressure"] = list_pressure
df2["Status"] = list_status
df2["Rain"] = list_status
df2
From the above code, I get the result as below,
City | Temp |Max_Temp|Min_Temp|Cloud |Humidity|Pressure |Status | Rain
------------------------------------------------------------------------------------------
Kuala Lumpur|29.22 |30.00 |27.78 | 20 |70 |1007 | moderate rain | moderate rain
Dublin |23.12 |26.43 |22.34 | 15 |89 | 978 | cloudy | cloudy
...
Now because of some city typo error processes getting stop,
Looking for an alternate solution of it and try to get weather data from Lat-Long but don't know how to set function for pass lat & long column data.
Df1 = {'User':['A','B','C','D','E'],
'City':['Kuala Lumpur','Dublin','Oconomowoc','Mumbai','Mratin'],
'State':['Wilayah Persekutuan','County Dublin',NA,1'Maharashtra','Stredocesky kraj'],
'Zip': [50100,NA,NA,400067,250 63],
'Lat':[5.3288907,50.2030506,53.3640384,19.2177166,40.7560585],
'Long':[103.1344397,14.5509842,-6.1953066,72.9708833,-5.6924778]}
# Try to use this code to get wather data
# one_call_obs = owm.weather_at_coords(52.5244, 13.4105).weather
# one_call_obs.current.humidity
Expected Result
--------------
User | City | Lat | Long | Temp | Cloud | Humidity | Pressure | Rain | Status
-----------------------------------------------------------------------------
Catch the error if a city is not found, parse the lat/lon from the dataframe. Use that lat/lon to create a bounding box and use weather_at_places_in_bbox to get a list of observations in that area.
import time
from tqdm.notebook import tqdm
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
import pandas as pd
from pyowm.commons.exceptions import NotFoundError, ParseAPIResponseError
df1 = pd.DataFrame({'City': ('Kuala Lumpur', 'Dublin', 'Oconomowoc', 'Mumbai', 'C airo', 'Mratin'),
'Lat': ('5.3288907', '50.2030506', '53.3640384', '19.2177166', '30.22', '40.7560585'),
'Long': ('103.1344397', '14.5509842', '-6.1953066', '72.9708833', '31', '-5.6924778')})
cities = df1["City"].unique().tolist()
owm = pyowm.OWM('bee8db7d50a4b777bfbb9f47d9beb7d0')
mgr = owm.weather_manager()
for city in cities:
try:
observation = mgr.weather_at_place(city)
# print(city, observation)
except NotFoundError:
# get city by lat/lon
lat_top = float(df1.loc[df1['City'] == city, 'Lat'])
lon_left = float(df1.loc[df1['City'] == city, 'Long'])
lat_bottom = lat_top - 0.3
lon_right = lon_left + 0.3
try:
observations = mgr.weather_at_places_in_bbox(lon_left, lat_bottom, lon_right, lat_top, zoom=5)
observation = observations[0]
except ParseAPIResponseError:
raise RuntimeError(f"Couldn't find {city} at lat: {lat_top} / lon: {lon_right}, try tweaking the bounding box")
weather = observation.weather
temp = weather.temperature('celsius')['temp']
print(f"The current temperature in {city} is {temp}")
I am trying to run a python cron script that will batch convert all video files inside a directory.
The python script is based on MoviePy and is working seamlessly when triggered manually.
But when triggered in Cron Job, its not running/working as estimated.
I have set a Shell Script inside which i have kept the Python script for any error crash handling.
I am calling the shell script from the cron.
Here are my codes:
Crontab -e:
# Edit this file to introduce tasks to be run by cron.
#
# Each task to run has to be defined through a single line
# indicating with different fields when the task will be run
# and what command to run for the task
#
# To define the time you can provide concrete values for
# minute (m), hour (h), day of month (dom), month (mon),
# and day of week (dow) or use '*' in these fields (for 'any').
#
# Notice that tasks will be started based on the cron's system
# daemon's notion of time and timezones.
#
# Output of the crontab jobs (including errors) is sent through
# email to the user the crontab file belongs to (unless redirected).
#
# For example, you can run a backup of all your user accounts
# at 5 a.m every week with:
# 0 5 * * 1 tar -zcf /var/backups/home.tgz /home/
#
# For more information see the manual pages of crontab(5) and cron(8)
#
# m h dom mon dow command
# * * * * * /usr/bin/python3 /var/www/html/cgi-bin/in.py /var/www/html/cgi-bin/log.txt
# * * * * * /bin/bash -c "/var/www/html/cgi-bin/cron.sh"
* * * * * cd /bin/bash /var/www/html/cgi-bin/cron.sh > /var/www/html/cgi-bin/log.txt 2> &1
Cron.sh is my Shell File that Cron will run.
#!/bin/sh
echo $(date) >> /var/www/html/cgi-bin/log.txt
/usr/bin/python3 /var/www/html/cgi-bin/in.py >> /var/www/html/cgi-bin/log.txt
Here is my Python File - In.py :
import moviepy.editor as mp
import sys, getopt
import requests
from datetime import datetime
from random import randint
import os, os.path, random
import shutil
rand_aud = str(randint(0, len(os.listdir('aud/'))))
inputfile = ''
keypass = ''
def main(argv):
inputfile = ''
keypass = ''
try:
opts, args = getopt.getopt(argv,"hi:k:",["ifile=","key="])
except getopt.GetoptError:
print ('in.py -i <inputfile> -k <user_key>')
sys.exit(2)
for opt, arg in opts:
if opt == '-h':
print ('in.py -i <inputfile> -k <user_key>')
sys.exit()
elif opt in ("-i", "--ifile"):
inputfile = arg
elif opt in ("-k", "--key"):
keypass = arg
# print(inputfile)
# print(keypass)
directory = r'in/'
for filename in os.listdir(directory):
inp = os.path.join(directory, filename)
#if inp == '':
# inp = 'in/f.mp4'
now = datetime.now()
value = randint(0, 10)
dt_stamp = now.strftime("%d%m%Y%H%M%S") + str(value)
out = 'out/' + keypass + '_' + dt_stamp + '.webm'
# aud = 'aud/' + rand_aud +'.WAV'
aud = 'aud/' + random.choice(os.listdir("aud/"))
print(out)
logu = 'logo.png'
video = mp.VideoFileClip(inp)
# if video.rotation == 90:
video = video.resize(video.size[::-1])
video.rotation = 0
logo = (mp.ImageClip(logu)
.set_duration(video.duration)
.resize(height=50) # if you need to resize...
.margin(right=8, top=8, opacity=0) # (optional) logo-border padding
.set_pos(("right","top")))
if aud != '':
audioclip = mp.AudioFileClip(aud).set_duration(video.duration)
new_audioclip = mp.CompositeAudioClip([audioclip])
video.audio = new_audioclip
final = mp.CompositeVideoClip([video, logo])
final.write_videofile(out)
if os.path.exists(inp):
os.remove(inp)
url = 'https://get-data.worlds.com.au?Auth=SSSOSXSQSSSXSQSXSOSSSOSS&Sender_key=' + keypass + '&handle=stream_response'
# print ('Posting Data To ' + url)
userdata = {"loc": out, "stamp": dt_stamp, "Auth": keypass, "handle": "stream"}
resp = requests.post(url)
# files = {'file': open(out, 'rb')}
# userdata = {"loc": out, "stamp": dt_stamp, "Auth": keypass, "handle": "stream"}
# resp = requests.post(url, files=files, params=userdata)
# r = requests.get(url, headers={"Auth":keypass, "handle":"stream"})
# print ('Call Response:')
# print (resp)
if __name__ == "__main__":
main(sys.argv[1:])
Here is the log.txt file. Please note that the MoviePy Done was the one i performed manually. Rest are CRON calls. The ones that have only time in them shows the cron job is running but the python script isnt :
Mon Apr 12 08:38:17 UTC 2021
out/_120420210838183.webm
Moviepy - Building video out/_120420210838183.webm.
MoviePy - Writing audio in _120420210838183TEMP_MPY_wvf_snd.ogg
MoviePy - Done.
Moviepy - Writing video out/_120420210838183.webm
Moviepy - Done !
Moviepy - video ready out/_120420210838183.webm
out/_120420210838374.webm
Moviepy - Building video out/_120420210838374.webm.
MoviePy - Writing audio in _120420210838374TEMP_MPY_wvf_snd.ogg
MoviePy - Done.
Moviepy - Writing video out/_120420210838374.webm
Moviepy - Done !
Moviepy - video ready out/_120420210838374.webm
Mon Apr 12 08:39:01 UTC 2021
Mon Apr 12 08:40:01 UTC 2021
Mon Apr 12 08:41:01 UTC 2021
Mon Apr 12 08:42:01 UTC 2021
Mon Apr 12 08:43:01 UTC 2021
Mon Apr 12 08:44:01 UTC 2021
Mon Apr 12 08:45:01 UTC 2021
Mon Apr 12 08:46:01 UTC 2021
Mon Apr 12 08:47:01 UTC 2021
Mon Apr 12 08:48:02 UTC 2021
Mon Apr 12 08:49:01 UTC 2021
Mon Apr 12 08:50:01 UTC 2021
Mon Apr 12 08:51:01 UTC 2021
Mon Apr 12 08:52:01 UTC 2021
Mon Apr 12 08:53:01 UTC 2021
Mon Apr 12 08:57:01 UTC 2021
Mon Apr 12 08:58:01 UTC 2021
Mon Apr 12 08:59:01 UTC 2021
Mon Apr 12 09:00:01 UTC 2021
Mon Apr 12 09:01:01 UTC 2021
Mon Apr 12 09:02:01 UTC 2021
Mon Apr 12 09:03:01 UTC 2021
Mon Apr 12 09:04:01 UTC 2021
Mon Apr 12 09:05:01 UTC 2021
Mon Apr 12 09:06:01 UTC 2021
Mon Apr 12 09:07:01 UTC 2021
Mon Apr 12 09:08:01 UTC 2021
Mon Apr 12 09:09:01 UTC 2021
Mon Apr 12 09:10:01 UTC 2021
Mon Apr 12 09:11:01 UTC 2021
Mon Apr 12 09:12:02 UTC 2021
Mon Apr 12 09:13:01 UTC 2021
Mon Apr 12 09:14:01 UTC 2021
Mon Apr 12 09:15:01 UTC 2021
Mon Apr 12 09:16:01 UTC 2021
Mon Apr 12 09:17:01 UTC 2021
I have problem with the for loop in Python. I want to sum these data based on time and location, without pandas. This data is in the MySQL database (mysql workbench):
Time No_of_people Location
----------------------------------------
07:00 20 Liberty City
07:15 25 Liberty City
07:30 20 Liberty City
07:45 30 Liberty City
08:00 21 Liberty City
...
07:00 10 San Andreas
07:15 15 San Andreas
07:30 20 San Andreas
07:45 25 San Andreas
08:00 30 San Andreas
Now I want it to be like:
Time No_of_people Location
----------------------------------------
07:00 116 Liberty City
08:00 120 Liberty City
...
07:00 100 San Andreas
This is currently what I have done:
views.py:
def getData(request):
api = 'http://localhost:8000/api/myData/'
response = requests.get(api)
myData = response.json()
time = []
no_of_people = []
location = []
for hourly in myData:
time.append(hourly['time'])
no_of_people.append(hourly['no_of_people'])
location.append(hourly['location'])
hour = []
for x in range(7,24):
hour.append(x)
uniqueLocation=[]
for x in location:
if x not in uniqueLocation:
uniqueLocation.append(x)
for uniqueIndex in uniqueLocation:
for x in hour:
sum =0
for index, t in enumerate(time):
x_time = t.split(":")[0]
if int(x_time) == x and uniqueIndex == location[index]:
sum += no_of_people[index]
print(str(sum))
json_obj = {
"time": time,
"no_of_people": no_of_people,
"location": location
}
return JsonResponse(data=json_obj)
You want to group by the location, therefore I suggest you aim for this format, which is easier to visualize, and then try to build the table output from there (for each city, for each time, print hour and people/hr)
[
{'location' : 'Liberty City', 'times': [{'hour' : '7:00', 'people' : 116}, ...]},
...
]
When working with almost any database, try to create a class per object (row, table, bucket, relationship, (insert database term here), etc). You can then isolate logic here rather than clutter the main function
class Location:
def __init__(self, name):
self.name = name
self.times = list()
def __str__(self):
s = ['{}\t{}\t{}'.format(k, t[k], self.name) for t in self.times for k in t.keys()]
return '\n'.join(s)
def add_time(self, hour, people):
existing_people_for_hour = None
for t in self.times: # loop existing times, looking for the hour
existing_people_for_hour = t.get(hour)
if existing_people_for_hour is not None:
t[hour] += people
break # found the hour to update, so break the loop
if existing_people_for_hour is None: # if the hour was never found, add to the times list
self.times.append({hour : people})
With that in place, use a dictionary to group on the location value and you should be able to print them in the end
locations = dict()
for d in myData:
# parse each value out
hour = d['time'][:2] + ':00'
p = int(d['no_of_people'])
loc = d['location']
# get the location from the map, if exists, else create new one
l = locations.get(loc, Location(loc))
l.add_time(hour, p) # add the people for the time
locations[loc] = l # upsert the new location
for l in locations.values():
print(l)
Output
07:00 95 Liberty City
08:00 21 Liberty City
07:00 70 San Andreas
08:00 30 San Andreas
I'm working on text file processing using Python.
I've got a text file (ctl_Files.txt) which has the following content/ or similar to this:
------------------------
Changeset: 143
User: Sarfaraz
Date: Tuesday, April 05, 2011 5:34:54 PM
Comment:
Initial add, all objects.
Items:
add $/Systems/DB/Expences/Loader
add $/Systems/DB/Expences/Loader/AAA.txt
add $/Systems/DB/Expences/Loader/BBB.txt
add $/Systems/DB/Expences/Loader/CCC.txt
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
Changeset: 145
User: Sarfaraz
Date: Thursday, April 07, 2011 5:34:54 PM
Comment:
edited objects.
Items:
edit $/Systems/DB/Expences/Loader
edit $/Systems/DB/Expences/Loader/AAA.txt
edit $/Systems/DB/Expences/Loader/AAB.txt
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
Changeset: 147
User: Sarfaraz
Date: Wednesday, April 06, 2011 5:34:54 PM
Comment:
Initial add, all objects.
Items:
delete, source rename $/Systems/DB/Expences/Loader/AAA.txt;X34892
rename $/Systems/DB/Expences/Loader/AAC.txt.
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
To process this file I wrote the following code:
#Tags - used for spliting the information
tag1 = 'Changeset:'
tag2 = 'User:'
tag3 = 'Date:'
tag4 = 'Comment:'
tag5 = 'Items:'
tag6 = 'Check-in Notes:'
#opening and reading the input file
#In path to input file use '\' as escape character
with open ("C:\\Users\\md_sarfaraz\\Desktop\\ctl_Files.txt", "r") as myfile:
val=myfile.read().replace('\n', ' ')
#counting the occurence of any one of the above tag
#As count will be same for all the tags
occurence = val.count(tag1)
#initializing row variable
row=""
#passing the count - occurence to the loop
for count in range(1, occurence+1):
row += ( (val.split(tag1)[count].split(tag2)[0]).strip() + '|' \
+ (val.split(tag2)[count].split(tag3)[0]).strip() + '|' \
+ (val.split(tag3)[count].split(tag4)[0]).strip() + '|' \
+ (val.split(tag4)[count].split(tag5)[0]).strip() + '|' \
+ (val.split(tag5)[count].split(tag6)[0]).strip() + '\n')
#opening and writing the output file
#In path to output file use '\' as escape character
file = open("C:\\Users\\md_sarfaraz\\Desktop\\processed_ctl_Files.txt", "w+")
file.write(row)
file.close()
and got the following result/File (processed_ctl_Files.txt):
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader add $/Systems/DB/Expences/Loader/AAA.txt add $/Systems/DB/Expences/Loader/BBB.txt add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader edit $/Systems/DB/Expences/Loader/AAA.txt edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892 rename $/Systems/DB/Rascal/Expences/AAC.txt.
But, I want the result like this:
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader
add $/Systems/DB/Expences/Loader/AAA.txt
add $/Systems/DB/Expences/Loader/BBB.txt
add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader
edit $/Systems/DB/Expences/Loader/AAA.txt
edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892
rename $/Systems/DB/Rascal/Expences/AAC.txt.
or it would be great if we can get results like this :
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/AAA.txt
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/BBB.txt
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader/AAA.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|rename $/Systems/DB/Rascal/Expences/AAC.txt.
Let me know how I can do this. Also, I'm very new to Python so please ignore if I've written some lousy or redundant code. And help me to improve this.
This solution is not as short and probably not as effective as the answer utilizing regular expressions, but it should be quite easy to understand. The solution does make it easier to use the parsed data because each section data is stored into a dictionary.
ctl_file = "ctl_Files.txt" # path of source file
processed_ctl_file = "processed_ctl_Files.txt" # path of destination file
#Tags - used for spliting the information
changeset_tag = 'Changeset:'
user_tag = 'User:'
date_tag = 'Date:'
comment_tag = 'Comment:'
items_tag = 'Items:'
checkin_tag = 'Check-in Notes:'
section_separator = "------------------------"
changesets = []
#open and read the input file
with open(ctl_file, 'r') as read_file:
first_section = True
changeset_dict = {}
items = []
comment_stage = False
items_stage = False
checkin_dict = {}
# Read one line at a time
for line in read_file:
# Check which tag matches the current line and store the data to matching key in the dictionary
if changeset_tag in line:
changeset = line.split(":")[1].strip()
changeset_dict[changeset_tag] = changeset
elif user_tag in line:
user = line.split(":")[1].strip()
changeset_dict[user_tag] = user
elif date_tag in line:
date = line.split(":")[1].strip()
changeset_dict[date_tag] = date
elif comment_tag in line:
comment_stage = True
elif items_tag in line:
items_stage = True
elif checkin_tag in line:
pass # not implemented due to example file not containing any data
elif section_separator in line: # new section
if first_section:
first_section = False
continue
tmp = changeset_dict
changesets.append(tmp)
changeset_dict = {}
items = []
# Set stages to false just in case
items_stage = False
comment_stage = False
elif not line.strip(): # empty line
if items_stage:
changeset_dict[items_tag] = items
items_stage = False
comment_stage = False
else:
if comment_stage:
changeset_dict[comment_tag] = line.strip() # Only works for one line comment
elif items_stage:
items.append(line.strip())
#open and write to the output file
with open(processed_ctl_file, 'w') as write_file:
for changeset in changesets:
row = "{0}|{1}|{2}|{3}|".format(changeset[changeset_tag], changeset[user_tag], changeset[date_tag], changeset[comment_tag])
distance = len(row)
items = changeset[items_tag]
join_string = "\n" + distance * " "
items_part = str.join(join_string, items)
row += items_part + "\n"
write_file.write(row)
Also, try to use variable names which describes its content. Names like tag1, tag2, etc. does not say much about the variable content. This makes code difficult to read, especially when scripts gets longer. Readability might seem unimportant in most cases, but when re-visiting old code it takes much longer to understand what the code does with non describing variables.
I would start by extracting the values into variables. Then create a prefix from the first few tags. You can count the number of characters in the prefix and use that for the padding. When you get to items, append the first one to the prefix and any other item can be appended to padding created from the number of spaces that you need.
# keywords used in the tag "Items: "
keywords = ['add', 'delete', 'edit', 'source', 'rename']
#passing the count - occurence to the loop
for cs in val.split(tag1)[1:]:
changeset = cs.split(tag2)[0].strip()
user = cs.split(tag2)[1].split(tag3)[0].strip()
date = cs.split(tag3)[1].split(tag4)[0].strip()
comment = cs.split(tag4)[1].split(tag5)[0].strip()
items = cs.split(tag5)[1].split(tag6)[0].strip().split()
notes = cs.split(tag6)
prefix = '{0}|{1}|{2}|{3}'.format(changeset, user, date, comment)
space_count = len(prefix)
i = 0
while i < len(items):
# if we are printing the first item, add it to the other text
if i == 0:
pref = prefix
# otherwise create padding from spaces
else:
pref = ' '*space_count
# add all keywords
words = ''
for j in range(i, len(items)):
if items[j] in keywords:
words += ' ' + items[j]
else:
break
if i >= len(items): break
row += '{0}|{1} {2}\n'.format(pref, words, items[j])
i += j - i + 1 # increase by the number of keywords + the param
This seems to do what you want, but I am not sure if this is the best solution. Maybe it is better to process the file line by line and print the values straight to the stream?
You can use a regular expression to search for 'add', 'edit' etc.
import re
#Tags - used for spliting the information
tag1 = 'Changeset:'
tag2 = 'User:'
tag3 = 'Date:'
tag4 = 'Comment:'
tag5 = 'Items:'
tag6 = 'Check-in Notes:'
#opening and reading the input file
#In path to input file use '\' as escape character
with open ("wibble.txt", "r") as myfile:
val=myfile.read().replace('\n', ' ')
#counting the occurence of any one of the above tag
#As count will be same for all the tags
occurence = val.count(tag1)
#initializing row variable
row=""
prevlen = 0
#passing the count - occurence to the loop
for count in range(1, occurence+1):
row += ( (val.split(tag1)[count].split(tag2)[0]).strip() + '|' \
+ (val.split(tag2)[count].split(tag3)[0]).strip() + '|' \
+ (val.split(tag3)[count].split(tag4)[0]).strip() + '|' \
+ (val.split(tag4)[count].split(tag5)[0]).strip() + '|' )
distance = len(row) - prevlen
row += re.sub("\s\s+([edit]|[add]|[delete]|[rename])", r"\n"+r" "*distance+r"\1", (val.split(tag5)[count].split(tag6)[0])) + '\r'
prevlen = len(row)
#opening and writing the output file
#In path to output file use '\' as escape character
file = open("wobble.txt", "w+")
file.write(row)
file.close()