Data reformatting failing. using python - python

I have a cust_peformat_test.csv file with this content:
AV-IM-1-13991730,6,2014-06-01 00:10,0.96
AV-IM-1-13991730,6,2014-06-01 00:15,0.92
AV-IM-1-13991730,6,2014-06-01 00:20,0.97
AV-IM-1-13991731,6,2014-06-01 00:10,1.96
AV-IM-1-13991731,6,2014-06-01 00:15,1.92
AV-IM-1-13991731,6,2014-06-01 00:20,1.97
AV-IM-1-13991732,6,2014-06-01 00:10,2.96
AV-IM-1-13991732,6,2014-06-01 00:15,2.92
AV-IM-1-13991732,6,2014-06-01 00:20,2.97
I wrote a python script to reformat this file to generate two more files whose content looks like this:
File-1: custpower.csv:
# file...... Recorder file
# date...... Thu Mar 12 14:35:32 2015
# user...... Sri
# host...... (null)
# group..... None
# property.. Avista Measurements
# limit.....
# interval..
# timestamp
2014-06-01 00:15,0.92,1.92,2.92
2014-06-01 00:20,0.97,1.97,2.97
2014-06-01 00:10,0.96,1.96,2.96,
File-2:powersensornames.csv:
AV-IM-1-13991730,AV-IM-1-13991731,AV-IM-1-13991732,
and that is exactly what I want and it works perfect unless if the data of my cust_peformat_test.csv is not well organized and if it looks like this:
AV-IM-1-13991730,6,2014-06-01 00:10,0.96
AV-IM-1-13991730,6,2014-06-01 00:15,0.92
AV-IM-1-13991731,6,2014-06-01 00:15,1.92
AV-IM-1-13991731,6,2014-06-01 00:20,1.97
AV-IM-1-13991730,6,2014-06-01 00:20,0.97
AV-IM-1-13991731,6,2014-06-01 00:10,1.96
AV-IM-1-13991732,6,2014-06-01 00:10,2.96
AV-IM-1-13991732,6,2014-06-01 00:15,2.92
AV-IM-1-13991732,6,2014-06-01 00:20,2.97
that messes up the content of custpower.csv and looks like this:
# file...... Recorder file
# date...... Thu Mar 12 14:35:32 2015
# user...... Sri
# host...... (null)
# group..... None
# property.. Avista Measurements
# limit.....
# interval..
# timestamp
2014-06-01 00:15,1.96,2.96,,,2.92
2014-06-01 00:20,,,,,2.97
which is not correct. Its been a pain trying to figure out what is wrong with my code and this is my code:
'''
Created on Jun 18, 2015
#author: sg
'''
#import datetime
#import csv
'''
#cust_power_real_recorder = open("Custmeters_preformat.csv",'w')
cust_power_real_reader = csv.reader(open("SPU123_customer_meters_power_kw.csv",'r'),delimiter=',')
spu123_meters=[]
for i,line in enumerate(cust_power_real_reader):
if i>0:
#d = datetime.datetime.strptime(line[6],'%Y-%m-%d %H:%M:%S')
if line[5][8:] not in spu123_meters:
spu123_meters.append(line[5][8:])
#cust_power_real_recorder.writelines([line[5],',6,',line[6],',',line[10],'\n'])
#cust_power_real_recorder.close()
'''
'''open('data.csv','w').write(
"""\
13986513,6,6/1/2014 12:00:00 AM,248.7
13986513,6,6/1/2014 12:00:05 AM,248.4
13986513,6,6/1/2014 12:00:10 AM,249
13986513,6,6/1/2014 12:00:15 AM,249.3
13986513,6,6/1/2014 12:00:20 AM,249.3
13986513,6,6/1/2014 12:00:25 AM,249.3
13986513,6,6/30/2014 11:55:00 PM,249.3
13986534,6,6/1/2014 12:00:00 AM,249
13986534,6,6/1/2014 12:00:05 AM,249
13986534,6,6/1/2014 12:00:10 AM,249.3
13986534,6,6/1/2014 12:00:15 AM,249.6
13986534,6,6/30/2014 11:55:00 PM,249.7\
""")
'''
header = '''# file...... Recorder file
# date...... Thu Mar 12 14:35:32 2015
# user...... Sri
# host...... (null)
# group..... None
# property.. Avista Measurements
# limit.....
# interval..'''
#DECLARE THE FILE YOU WANT TO SPIT OUT
#testvolt = open("testvolt.csv",'w')
#testvolt.writelines([header,'\n','# timestamp\n'])
testpower = open("custpower.csv",'w')
testpower.writelines([header,'\n','# timestamp\n'])
class ReadSensorLines(object):
def __init__(self, filename):
sensor_offsets = {}
sensors = []
readfp = open(filename, "rb")
readfp.readline() # skip header
# find start of each sensor
# use readline not iteration so that tell offset is right
offset = readfp.tell()
sensor = ''
while True:
line = readfp.readline()
if not line:
break
next_sensor = line.split(',', 1)[0]
if next_sensor != sensor:
if sensor:
sensors.append(sensor)
next_offset = readfp.tell()
sensor_offsets[sensor] = [offset, next_offset - offset]
sensor = next_sensor
offset = next_offset
else:
# setup for first sensor
sensor = next_sensor
if next_sensor:
sensors.append(next_sensor)
sensor_offsets[next_sensor] = [offset, readfp.tell() - offset]
self.readfp = readfp
self.sensor_offsets = sensor_offsets
self.sensors = sensors
def read_sensor(self, sensorname):
pos_data = self.sensor_offsets[sensorname]
self.readfp.seek(pos_data[0])
line = self.readfp.readline(pos_data[1])
pos_data[0] += len(line)
pos_data[1] -= len(line)
return line
#property
def data_remains(self):
return any(pos_data[1] for pos_data in self.sensor_offsets.itervalues())
def close(self):
self.readfp.close()
sensor_lines = ReadSensorLines("cust_peformat_test.csv") #READ THE CVS FILE U WANT TO MODIFY.
#In abive, delete.csv is for voltage recorder generation and Custmeters_preformat.csv is for power recorder generation
AllSensors=[]
while sensor_lines.data_remains:
row = []
for sensor in sensor_lines.sensors:
if sensor not in AllSensors:
AllSensors.append(sensor)
sensor_line = sensor_lines.read_sensor(sensor)
if sensor_line:
_, _, date, volts = sensor_line.strip().split(',')
row.append(volts)
else:
row.append('')
row.insert(0, date)
#print ','.join(row)
#print row
'''
#below if else is ONLY for voltage csv files
if '2014-06-' not in row[0]:
row[0] = str(datetime.datetime.strptime(row[0],'%m/%d/%Y %H:%M'))
else:
row[0] = str(datetime.datetime.strptime(row[0],'%Y-%m-%d %H:%M:%S'))
#voltage csv file if else case ENDS
'''
#testvolt.writelines([','.join(row),'\n']) # this is for voltage files
testpower.writelines([','.join(row),'\n']) # this is for power files
sensornames = open("powersensornames.csv",'w') #THIS IS THE FILE WHERE THE HEADERS ARE WRITTEN
#In above, sensornames.csv is for voltage recorder and powersensornames.csv is for power recorder generation
for asensor in AllSensors:
sensornames.writelines([asensor,','])

Related

Creating a List of Dicts where the value is a list

let me start off by saying, its possible I am attempting to use a terrible data structure.
Im trying to get information out of a large text dump and cant seem to get it sorted right. Data looks like the below, but is much longer.
r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8
I thought I could end up getting it into a data structure like..
r1_list = [
r01: [77.7,76.0,76.0,76.0],
r04: [69.5,4,4,5],
]
r2_list = [
r02: [1,2,3,4],
r04: [3,4,4,5],
]
Then I could loop through the lists, and check the mean etc of the values per device.
Here is what ive been trying
import re
r1_list = []
r2_list = []
current_device = False
device_type = False
current_reading = False
def matchr1(line):
matchThis = ""
matched = re.match(r'^(r1)\s(r\d+)$',line)
if matched:
#Matches r1
matchThis = matched.group(2)
else:
return False
return matchThis
def matchr2(line):
matchThis = ""
matched = re.match(r'^(r2)\s(r\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
def matchReading(line):
matchThis = ""
matched = re.match(r'^(\d+)\s(\d+.\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
with open("data.txt") as f:
for line in f:
if matchr1(line):
current_device = matchr1(line)
device_type = "r1"
if matchr2(line):
current_device = matchr2(line)
device_type = "r2"
if matchReading(line):
current_reading = matchReading(line)
if current_reading:
if device_type == "r1":
temp_dict = {current_device: [current_reading]}
r1_list.append(temp_dict)
if device_type == "r2":
temp_dict = {current_device: [current_reading]}
r2_list.append(temp_dict)
current_reading = False
print(r1_list)
print(r2_list)
What I get
[{'r01': ['77.7']}, {'r01': ['76.0']}, {'r01': ['77.7']}, {'r04': ['74.6']}, {'r04': ['75.6']}, {'r04': ['75.8']}]
[{'r02': ['74.7']}, {'r02': ['74.0']}, {'r02': ['76.7']}, {'r03': ['74.2']}, {'r03': ['74.1']}, {'r03': ['76.8']}]
There are two separate steps here:
Looking at rows starting with "r" and finding there their data should be inserted.
Looking at other rows and inserting them into the data structure.
Here's what I came up with:
#!/usr/bin/env python
data = """r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8"""
result = {}
for line in data.splitlines():
if line.startswith("r"):
# Find (or create) the place in the data structure where
# we should insert the values.
first, second = line.split()
# dict.setdefault(key, value) sets `dict[key] = value` if
# it's not already set, then returns `dict[key]`.
dest = result.setdefault(first, {}).setdefault(second, [])
# Move on to the next line.
continue
# Get the value of the line
_, value = line.split()
# Add it to the list we found in the `line.startswith('r')`
# bit above.
dest.append(value)
assert result == {
"r1": {
"r01": ["77.7", "76.0", "77.7"],
"r04": ["74.6", "75.6", "75.8"]
},
"r2": {
"r02": ["74.7", "74.0", "76.7"],
"r03": ["74.2", "74.1", "76.8"]
},
}

How to create function to pass lat long in api call for get weather data

I try to get the data from pyOWM package using city name but in some cases because of city typo error
not getting data & it breaks the process.
I want to get the weather data using lat-long but don't know how to set function for it.
Df1:
-----
User City State Zip Lat Long
-----------------------------------------------------------------------------
A Kuala Lumpur Wilayah Persekutuan 50100 5.3288907 103.1344397
B Dublin County Dublin NA 50.2030506 14.5509842
C Oconomowoc NA NA 53.3640384 -6.1953066
D Mumbai Maharashtra 400067 19.2177166 72.9708833
E Mratin Stredocesky kraj 250 63 40.7560585 -5.6924778
.
.
.
----------------------------------
Code:
--------
import time
from tqdm.notebook import tqdm
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
cities = Df1["City"].unique().tolist()
cities1 = cities [:5]
owm = pyowm.OWM('bee8db7d50a4b777bfbb9f47d9beb7d0')
mgr = owm.weather_manager()
'''
Step-1 Define list where save the data
'''
list_wind_Speed =[]
list_tempreture =[]
list_max_temp =[]
list_min_temp =[]
list_humidity =[]
list_pressure =[]
list_city = []
list_cloud=[]
list_status =[]
list_rain =[]
'''
Step-2 Fetch data
'''
j=0
for city in tqdm(cities1):
j=+1
if j < 60:
# one_call_obs = owm.weather_at_coords(52.5244, 13.4105).weather
# one_call_obs.current.humidity
observation = mgr.weather_at_place(city)
l = observation.weather
list_city.append(city)
list_wind_Speed.append(l.wind()['speed'])
list_tempreture.append(l.temperature('celsius')['temp'])
list_max_temp.append(l.temperature('celsius')['temp_max'])
list_min_temp.append(l.temperature('celsius')['temp_min'])
list_humidity.append(l.humidity)
list_pressure.append(l.pressure['press'])
list_cloud.append(l.clouds)
list_rain.append(l.rain)
else:
time.sleep(60)
j=0
'''
Step-3 Blank data frame and store data in that
'''
df2 = pd.DataFrame()
df2["City"] = list_city
df2["Temp"] = list_tempreture
df2["Max_Temp"] = list_max_temp
df2["Min_Temp"] = list_min_temp
df2["Cloud"] = list_cloud
df2["Humidity"] = list_humidity
df2["Pressure"] = list_pressure
df2["Status"] = list_status
df2["Rain"] = list_status
df2
From the above code, I get the result as below,
City | Temp |Max_Temp|Min_Temp|Cloud |Humidity|Pressure |Status | Rain
------------------------------------------------------------------------------------------
Kuala Lumpur|29.22 |30.00 |27.78 | 20 |70 |1007 | moderate rain | moderate rain
Dublin |23.12 |26.43 |22.34 | 15 |89 | 978 | cloudy | cloudy
...
Now because of some city typo error processes getting stop,
Looking for an alternate solution of it and try to get weather data from Lat-Long but don't know how to set function for pass lat & long column data.
Df1 = {'User':['A','B','C','D','E'],
'City':['Kuala Lumpur','Dublin','Oconomowoc','Mumbai','Mratin'],
'State':['Wilayah Persekutuan','County Dublin',NA,1'Maharashtra','Stredocesky kraj'],
'Zip': [50100,NA,NA,400067,250 63],
'Lat':[5.3288907,50.2030506,53.3640384,19.2177166,40.7560585],
'Long':[103.1344397,14.5509842,-6.1953066,72.9708833,-5.6924778]}
# Try to use this code to get wather data
# one_call_obs = owm.weather_at_coords(52.5244, 13.4105).weather
# one_call_obs.current.humidity
Expected Result
--------------
User | City | Lat | Long | Temp | Cloud | Humidity | Pressure | Rain | Status
-----------------------------------------------------------------------------
Catch the error if a city is not found, parse the lat/lon from the dataframe. Use that lat/lon to create a bounding box and use weather_at_places_in_bbox to get a list of observations in that area.
import time
from tqdm.notebook import tqdm
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
import pandas as pd
from pyowm.commons.exceptions import NotFoundError, ParseAPIResponseError
df1 = pd.DataFrame({'City': ('Kuala Lumpur', 'Dublin', 'Oconomowoc', 'Mumbai', 'C airo', 'Mratin'),
'Lat': ('5.3288907', '50.2030506', '53.3640384', '19.2177166', '30.22', '40.7560585'),
'Long': ('103.1344397', '14.5509842', '-6.1953066', '72.9708833', '31', '-5.6924778')})
cities = df1["City"].unique().tolist()
owm = pyowm.OWM('bee8db7d50a4b777bfbb9f47d9beb7d0')
mgr = owm.weather_manager()
for city in cities:
try:
observation = mgr.weather_at_place(city)
# print(city, observation)
except NotFoundError:
# get city by lat/lon
lat_top = float(df1.loc[df1['City'] == city, 'Lat'])
lon_left = float(df1.loc[df1['City'] == city, 'Long'])
lat_bottom = lat_top - 0.3
lon_right = lon_left + 0.3
try:
observations = mgr.weather_at_places_in_bbox(lon_left, lat_bottom, lon_right, lat_top, zoom=5)
observation = observations[0]
except ParseAPIResponseError:
raise RuntimeError(f"Couldn't find {city} at lat: {lat_top} / lon: {lon_right}, try tweaking the bounding box")
weather = observation.weather
temp = weather.temperature('celsius')['temp']
print(f"The current temperature in {city} is {temp}")

PySerial returning incorrect data from Nova PM SDS011 with raspi3

Here in California, I have purchased some Nova SDS011 PM sensors. When attempting to read from these sensors using Ivan Kalchev's git repo, I get mixed results. I can send commands to the sensor. e.g. sensor.sleep(sleep=<True/False>) will turn the fan on and off. However attempting to query the sensor to return PM2.5 and PM10 data returns a byte string that does not match the check sum. A couple examples are in the code snip-it below. As you can see, bytes 2 and 6 appear to be corrupt, and furthermore, the response is two bytes shorter than what is expected from the documentation.
Any Idea whats going on here? Im hoping this is simply a problem with pyserial. I have produced the same results with two sensors.
>>> sensor.sleep(sleep=False)
>>> cmd
'\xaa\xb4\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xff\xff\x02\xab'
>>> sensor.ser.write(cmd)
19
>>> sensor.ser.readline()
'\xaa\xc0]\x01\xba\x01\xc2*\x05\xab'
>>> sensor.ser.write(cmd)
19
>>> sensor.ser.readline()
'\xaa\xc0c\x01\xbc\x01\xc2*\r\xab'
>>> sensor.ser.write(cmd)
19
>>> sensor.ser.readline()
'\xaa\xc0d\x01\xbf\x01\xc2*\x11\xab'
So this occurred because the python was written for python 3, and my raspberry pi had 2.7. below is the code for 2.7. Thanks to Ivan for putting the original together.
"""This module provides an abstraction for the SDS011 air partuclate densiry sensor.
"""
import struct
import serial
#TODO: Commands against the sensor should read the reply and return success status.
class SDS011(object):
"""Provides method to read from a SDS011 air particlate density sensor
using UART.
"""
HEAD = b'\xaa'
TAIL = b'\xab'
CMD_ID = b'\xb4'
# The sent command is a read or a write
READ = b"\x00"
WRITE = b"\x01"
REPORT_MODE_CMD = b"\x02"
ACTIVE = b"\x00"
PASSIVE = b"\x01"
QUERY_CMD = b"\x04"
# The sleep command ID
SLEEP_CMD = b"\x06"
# Sleep and work byte
SLEEP = b"\x00"
WORK = b"\x01"
# The work period command ID
WORK_PERIOD_CMD = b'\x08'
def __init__(self, serial_port, baudrate=9600, timeout=2,
use_query_mode=True):
"""Initialise and open serial port.
"""
self.ser = serial.Serial(port=serial_port,
baudrate=baudrate,
timeout=timeout)
self.ser.flush()
self.set_report_mode(active=not use_query_mode)
def _execute(self, cmd_bytes):
"""Writes a byte sequence to the serial.
"""
self.ser.write(cmd_bytes)
def _get_reply(self):
"""Read reply from device."""
raw = self.ser.read(size=10)
data = raw[2:8]
if len(data) == 0:
return None
if (sum(ord(d) for d in data) & 255) != ord(raw[8]):
return None #TODO: also check cmd id
return raw
def cmd_begin(self):
"""Get command header and command ID bytes.
#rtype: list
"""
return self.HEAD + self.CMD_ID
def set_report_mode(self, read=False, active=False):
"""Get sleep command. Does not contain checksum and tail.
#rtype: list
"""
cmd = self.cmd_begin()
cmd += (self.REPORT_MODE_CMD
+ (self.READ if read else self.WRITE)
+ (self.ACTIVE if active else self.PASSIVE)
+ b"\x00" * 10)
cmd = self._finish_cmd(cmd)
self._execute(cmd)
self._get_reply()
def query(self):
"""Query the device and read the data.
#return: Air particulate density in micrograms per cubic meter.
#rtype: tuple(float, float) -> (PM2.5, PM10)
"""
cmd = self.cmd_begin()
cmd += (self.QUERY_CMD
+ b"\x00" * 12)
cmd = self._finish_cmd(cmd)
self._execute(cmd)
raw = self._get_reply()
if raw is None:
return None #TODO:
data = struct.unpack('<HH', raw[2:6])
pm25 = data[0] / 10.0
pm10 = data[1] / 10.0
return (pm25, pm10)
def sleep(self, read=False, sleep=True):
"""Sleep/Wake up the sensor.
#param sleep: Whether the device should sleep or work.
#type sleep: bool
"""
cmd = self.cmd_begin()
cmd += (self.SLEEP_CMD
+ (self.READ if read else self.WRITE)
+ (self.SLEEP if sleep else self.WORK)
+ b"\x00" * 10)
cmd = self._finish_cmd(cmd)
self._execute(cmd)
self._get_reply()
def set_work_period(self, read=False, work_time=0):
"""Get work period command. Does not contain checksum and tail.
#rtype: list
"""
assert work_time >= 0 and work_time <= 30
cmd = self.cmd_begin()
cmd += (self.WORK_PERIOD_CMD
+ (self.READ if read else self.WRITE)
+ bytes([work_time])
+ b"\x00" * 10)
cmd = self._finish_cmd(cmd)
self._execute(cmd)
self._get_reply()
def _finish_cmd(self, cmd, id1=b"\xff", id2=b"\xff"):
"""Add device ID, checksum and tail bytes.
#rtype: list
"""
cmd += id1 + id2
checksum = sum(d for d in bytearray(cmd[2:])) % 256
cmd += chr(checksum) + self.TAIL
return cmd
def _process_frame(self, data):
"""Process a SDS011 data frame.
Byte positions:
0 - Header
1 - Command No.
2,3 - PM2.5 low/high byte
4,5 - PM10 low/high
6,7 - ID bytes
8 - Checksum - sum of bytes 2-7
9 - Tail
"""
raw = struct.unpack('<HHxxBBB', data[2:])
checksum = sum(v for v in bytearray(data[2:8])) % 256
if checksum != data[8]:
return None
pm25 = raw[0] / 10.0
pm10 = raw[1] / 10.0
return (pm25, pm10)
def read(self):
"""Read sensor data.
#return: PM2.5 and PM10 concetration in micrograms per cude meter.
#rtype: tuple(float, float) - first is PM2.5.
"""
byte = 0
while byte != self.HEAD:
byte = self.ser.read(size=1)
d = self.ser.read(size=10)
if d[0:1] == b"\xc0":
data = self._process_frame(byte + d)
return data

Python 2.7 - manipulate some data from a CSV file

First of all I wanna emphasize that I'm a total beginner at python, the below code I made to manipulate some data from a CSV. I know that it's not the prettiest code and probably I could have made it more elegant, but it works, until a certain point and that's the reason I opened this question
import csv
from numpy import interp
from operator import sub
import math
import pandas as pd
from Tkinter import *
import Tkinter as tk
import tkFileDialog as filedialog
root = Tk()
root.withdraw()
filename= filedialog.askopenfilename( initialdir="C:/", title="select file", filetypes=(("CSV files", "*.CSV"), ("all files", "*.*")))
id_uri = []
ore = []
minute = []
zile = []
activi = []
listx = []
listsa = []
list_ore = []
listspi = []
listspf = []
list_min = []
zile_luna = 0
test = []
nume = []
with open (filename) as p, open ('activi.csv') as a:
reader = csv.reader(p,delimiter=',')
for row in reader:
id_uri.append(row[0])
ore.append(row[1])
minute.append(row[2])
zile.append(row[3])
reader = csv.reader(a)
for row in reader:
activi.append(row[0])
nume.append(row[1])
id_uri = map(int, id_uri)
ore = map(float, ore)
minute = map(float, minute)
minute = interp(minute,[0,60],[0,100])
ore = ore + minute/100
zile = map(int, zile)
activi = map(int, activi)
zile_luna = len(set(zile))+1
mimin = 0
maxim = 0
def pontaj():
global listx
global listsa
global listspi
global listspf
global list_ore
global list_min
global maxim
global minim
for x in range(3):
for y in range(len(id_uri)):
if zile[y] == z:
if activi[x] == id_uri[y]:
listx.append(ore[y])
minim = min(listx)
maxim = max(listx)
listsa.append(maxim-minim)
listx = []
listspi = [int(i) for i in listsa]
listspf = [i%1 for i in listsa]
for i in range(len(listspf)):
listspf[i] = round(listspf[i], 2)
listspf[i] = listspf[i]*100
listspf[i] = interp(listspf[i],[0,100],[0,60])
listspf[i] = int(listspf[i])
list_ore.append(listspi)
list_min.append(listspf)
listsa = []
for z in range(1,zile_luna):
pontaj()
for sublst in list_ore:
for item in range(len(sublst)):
sublst[item] = str(sublst[item])
for sublst in list_min:
for item in range(len(sublst)):
sublst[item] = str(sublst[item])
for i in range(len(list_ore)):
for j in range(len(list_ore[i])):
list_ore[i][j] = ' '.join(i + ':' + j for i,j in zip(list_ore[i][j],list_min[i][j]))
df = pd.DataFrame(list_ore)
df = df.T
nume = pd.Series(nume)
df['e'] = nume.values
df.to_csv('pontaj.csv', index = False, header = False)
print df
and the CSV file I read all the info from looks like this(employee code, hour, minute, day):
23,5,00,1
23,6,00,1
24,7,00,1
25,8,00,1
24,9,00,1
25,11,00,1
24,7,00,2
25,8,00,2
24,9,00,2
25,11,00,2
23,5,00,4
23,6,00,4
24,7,00,4
25,8,00,4
24,9,00,4
25,11,00,4
I have another CSV file that has employee code folowed by employee name like this:
23,aqwe
24,beww
25,cwww
Basically it's an attendance logger, it compares info from one CSV to another, finds the min and max hours in a certain day, subtracts min from max and writes this info in a list that is written to another csv.
Thing is, if all employees attend a certain day, all goes well, it calculates the attendance hours, puts them in the csv, all good. But what will happen if an employee skips one day? well as I found out, it ruins the calculation, because the code requires that all data must be consistent and in a perfect order.
The data written to the CSV file must finally look like this:
day1 day2 day3
hours hours hours employee_a
hours hours hours employee_b
hours hours hours employee_c
But if one skips a day, the hours get scrambled.
I've tried some different approaches but none worked, and I realize the problem is due to my simple way of thinking, but as I said, I only started with python a few days ago.
Do you have any suggestions on how I could improve the code to take the missed day of a certain employee in consideration and generate the data like so:
day1 day2 day3
1:20 2:30 3:40 employee_a
1:20 2:30 3:40 employee_b
0:0 2:30 3:40 employee_c
Any advice would be appreciated, thanks!

python sort file based on created date

import os, sys
import os.path, time
path=os.getcwd()
def file_info(directory):
file_list = []
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
print file_info(path)
Problem
how I can show each list item in new line and nice a nice format
how I can sort the file/directory list based on last modified
how I can sort the file/directory list based on creatation date
Here is the program with some nice printing using the format function:
import os
import time
path = os.getcwd()
def file_info(directory):
file_list = []
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
file_list = file_info(path)
for item in file_list:
line = "Name: {:<20} | Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
Here is some code with a sort function being used on the accessed time. The code is not optimized but it is very readable and you should be able to understand it.
import os
import time
path = os.getcwd()
def file_info(directory,sortLastModifiedOrNaw=False):
file_list = []
currentMin = 0 #This is the variable that will track the lowest digit
for i in os.listdir(directory):
a = os.stat(os.path.join(directory,i))
if sortLastModifiedOrNaw == True: #If you would like to sort.
if a.st_atime > currentMin: #Check if this is bigger than the current minimum.
currentMin = a.st_atime #If it is we update the current minimum
#Below we append so that it ends up in the end of the list
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
else: #If it is smaller, it should be in the front of the list so we insert it into position 0.
file_list.insert(0,[i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
else: #If you would not like to sort
file_list.append([i,time.ctime(a.st_atime),time.ctime(a.st_ctime)]) #[file,most_recent_access,created]
return file_list
file_list = file_info(path)
print("Unsorted Example")
for item in file_list:
line = "Name: {:<20} | Date Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
print("\nSorted example using last modified time")
file_list = file_info(path,sortLastModifiedOrNaw=True)
for item in file_list:
line = "Name: {:<20} | Date Last Accessed: {:>20} | Date Created: {:>20}".format(item[0],item[1],item[2])
print(line)
Sample output:
Unsorted Example
Name: .idea | Date Last Accessed: Sun Jan 3 21:13:45 2016 | Date Created: Sun Jan 3 21:13:14 2016
Name: blahblah.py | Date Last Accessed: Sun Jan 3 21:13:48 2016 | Date Created: Sun Jan 3 21:13:48 2016
Name: testhoe1.py | Date Last Accessed: Sun Jan 3 19:09:57 2016 | Date Created: Sun Jan 3 18:52:06 2016
Sorted example using last modified time
Name: testhoe1.py | Date Last Accessed: Sun Jan 3 19:09:57 2016 | Date Created: Sun Jan 3 18:52:06 2016
Name: .idea | Date Last Accessed: Sun Jan 3 21:13:45 2016 | Date Created: Sun Jan 3 21:13:14 2016
Name: blahblah.py | Date Last Accessed: Sun Jan 3 21:13:48 2016 | Date Created: Sun Jan 3 21:13:48 2016
Happy optimizing! #If you change line 12 atime to ctime it will sort based on create-time.

Categories

Resources