Storing into a list schedule from a .txt file - python

The first part below does what I want by separating it by week works how I want it, but when I try to store it in the schedule list that iterates over I,
Here is my code:
schedule_file = open("sch2019.txt", "r")
schedule_list = schedule_file.readlines()
for x in schedule_list:
This is the output of the code:
week 1 : { ('LAR','CP'), ('KCC','JJ'), ('NYG','DC'), ('BB','NYJ'), ('CIN','SS'), ('DB','OR'), ('WR','PE'), ('GBP','CB'), ('AF','MV'), ('PS','NEP'), ('HT','NOS'), ('IC','LAC'), ('TT','CLV'), ('SF','TBB'), ('DL','AC'), ('BR','MD') }
week 2 : { ('SS','PS'), ('BB','NYG'), ('DC','WR'), ('SF','CIN'), ('IC','TT'), ('JJ','HT'), ('PE','AF'), ('KCC','OR'), ('LAC','DL'), ('CB','DB'), ('AC','BR'),('NOS','LAR'), ('TBB','CP'), ('NEP','MD'), ('MV','GBP'),('CLV','NYJ') }
week 3 : {('NYJ','NEP'), ('PS','SF'), ('HT','LAC'),('NYG','TBB'), ('NOS','SS'), ('DL','PE'), ('OR','MV'), ('DB','GBP'),('LAR','CLV'), ('CB','WR'), ('CP','AC'), ('MD','DC'), ('BR','KCC'), ('CIN','BB'), ('AF','IC'), ('TT','JJ') }
week 4 : { ('TT','AF'), ('SS','AC'), ('CIN','PS'), ('NEP','BB'), ('CLV','BR'), ('OR','IC'),('TBB','LAR'), ('MV','CB'), ('CP','HT'), ('LAC','MD'), ('KCC','DL'), ('JJ','DB'), ('DC','NOS'), ('WR','NYG'), ('PE','GBP') }
I want to store each week into a list schedule, but I don't know how to approach it.
What I tried doing is
schedule_file = open("sch2019.txt", "r")
schedule_list = schedule_file.readlines()
schedule = []
for x in schedule_list:
for i in x:
schedule.append(i)
print (schedule)
But all it does is further separate it.
How would I separate the list into iterations for schedule?
what i want it to be like:
Schedule[1]← {(LAR,CP), (KCC,JJ), (NYG,DC), (BB,NYJ), (CIN,SS), (DB,OR), (WR,PE),(GBP,CB), (AF,MV), (PS,NEP), (HT,NOS), (IC,LAC), (TT,CLV), (SF,TBB), (DL,AC),(BR,MD)}
Schedule[2]←{(SS,PS), (BB,NYG), (DC,WR), (SF,CIN), (IC,TT), (JJ,HT), (PE,AF), (KCC,OR),(LAC,DL), (CB,DB), (AC,BR), (NOS,LAR), (TBB,CP), (NEP,MD), (MV,GBP), (CLV,NYJ)}
where each iteration of the schedule[i] contains the values from the corresponding week info from the text file

I think this is what you're after, working Repl.it here.
This will parse each line into a list of fixtures (assuming I've got your input text file formatted correctly). This is then stored in a Dict with the week number as a key.
import re
with open('schedule.txt') as f:
schedule = f.readlines()
def line_parser(line):
p = re.compile("'[A-Z]{2,3}','[A-Z]{2,3}'")
fixtures = []
for match in p.findall(line):
fixtures.append(match)
return fixtures
schedule_list = {}
for week, line in enumerate(schedule):
schedule_list[week] = line_parser(line)
# Print the 10th fixture in week 3:
print(schedule_list[3][9])
# Output: 'LAC','MD'
I suppose you could break this down further by splitting each fixture into a home/away string which is stored as a dictionary, but I'm not sure what output format you're expecting?
import re
with open('schedule.txt') as f:
schedule = f.readlines()
def line_parser(line):
p = re.compile("[A-Z]{2,3}','[A-Z]{2,3}")
fixtures = []
for match in p.findall(line):
sides = match.split("','")
fixtures.append({
"home": sides[0],
"away": sides[1]
})
return fixtures
schedule_list = {}
for week, line in enumerate(schedule):
schedule_list[week] = line_parser(line)
# Print the 10th fixture in week 3:
print(schedule_list[3][9])
# Output: {'home': 'LAC', 'away': 'MD'}

Related

How to add from file to dictionary?

Suppose I have file that looks like this
Channel 1
12:30-14:00 Children’s program
17:00-19:00 Afternoon News
8:00-9:00 Morning News
————————————————————————— Channel 2
19:30-21:00 National Geographic
14:00-15:30 Comedy movies
And so on for a finite number of Channels and programs .
I would like to read the file and create a dictionary and sort it with respect to channel and program that is being shown on a given time . So something like this
Channels={
Channel 1:{Childrens program : 19:30-21:00,Afternooon news : 17:00-18:00},
Channel 2 :{ National Geographic: 19:30-21:00,Batman:14:00-15:30}
}
Try regular expressions at each line you parse, and fill your dictionary depending on what type of line this is, like so:
d = {}
with open("f.txt") as f:
channel = None
for l in f.readlines():
# If channel line format is found
match_line = re.findall(r"Channel \d*", l)
if match_line:
channel = match_line[0]
d[channel] = {}
# If program format is found
match_program = re.findall(r"(\d{1,2}:\d{1,2}-\d{1,2}:\d{1,2}) (.*$)", l)
if match_program:
d[channel][match_program[0][1]] = match_program[0][0]
d equals to:
{
"Channel 1": {
"Children’s program": "12:30-14:00",
"Afternoon News": "17:00-19:00",
"Morning News": "8:00-9:00"
},
"Channel 2": {
"National Geographic": "19:30-21:00",
"Comedy movies": "14:00-15:30"
}
}
My approach: First, break the text into blocks, each block is separated by the dash lines. Next, convert each block in a single key, value of a bigger dictionary. Finally, putting them all together to form the result.
import json
import pathlib
import re
def parse_channel(text):
"""
The `text` is a block of text such as:
Channel 1
12:30-14:00 Children’s program
17:00-19:00 Afternoon News
8:00-9:00 Morning News
This function will return ("Channel 1", {...}) which are
the key and value of a bigger dictionary
"""
lines = (line.strip() for line in text.splitlines() if line)
key = next(lines)
value = {}
for line in lines:
time, name = line.split(" ", 1)
value[name] = time
return key, value
path = pathlib.Path(__file__).with_name("data.txt")
assert path.exists()
text = path.read_text(encoding="utf-8")
# Break text into blocks, separated by dash lines
blocks = re.split("-{2,}", text)
# Convert each block into key/value, then build the final dictionary
channels = dict(map(parse_channel, blocks))
print(json.dumps(channels, indent=4))

keep calling an API until it is updated with latest item (Python)

I'm looking to call an API, and compare the data to my saved data in a CSV. If it has a new data point then I want to update my CSV and return the DataFrame... The mystery I have is why these two variables appear to be the same, yet the If statement moves to the Else instead of recognizing they are the same, if they are the same it should keep looping until an updated data point appears,(see second_cell == lastItem1 )
import pandas_datareader as pdr # https://medium.com/swlh/pandas-datareader-federal-reserve-economic-data-fred-a360c5795013
import datetime
def datagetter():
i = 1
while i < 120:
start = datetime.datetime (2005, 1, 1) ### Step 1: get data, and print last item
end = datetime.datetime (2040, 1, 1)
df = pdr.DataReader('PAYEMS', 'fred', start, end) ## This is the API
lastItem1 = df["PAYEMS"].iloc[-1] # find the last item in the data we have just downloaded
print ("Latest item from Fred API: " , lastItem1) ### Print the last item
with open('PAYEMS.csv', 'r') as logs: # So first we open the most recent CSV file
data = logs.readlines()
last_row = data[-1].split(',') # split is default on , as CSVs should be.
second_cell = last_row[1] # "second_cell" is our variable name for the saved datapoint from last month/week/day
print ("Last Item, in thousands" , second_cell)
if second_cell == lastItem1:
print ("CSV " , second_cell, "API ", lastItem1, " downloaded and stored items are the same, will re-loop until a new datapoint")
print("attempt no.", i)
i += 1
else:
df.to_csv("PAYEMS.csv")
print ("returning dataframe")
# print(df.tail())
return df
df = datagetter()
print(df.tail(3))
solved my own problem:
my CSV was returning a string, and the API an int... not quite sure why.
So
if second_cell == "": second_cell = 0 second_cell1 = int(float(second_cell))

Pytest -Get one item from multiple returned values

I've e2e_te_data.json file which includes my 2 different test points. It means I will have 2 test case data and give the pytest and it will execute 2 different test cases.
`e2e_te_data.json
[{ "dataSource":"dataSource1",
"machineName":"MachineName_X",
},
{` "dataSource":"dataSource2",
"machineName":"MachineName_Y",
}]
--`-------This is my code:
def read_test_data_from_json():
JsonFile = open('..\\e2eTestData.json','r')
h=[]
convertedJsonStr=[]
json_input = JsonFile.read()
parsedJsonStr = json.loads(json_input) # Parse JSON string to Python dict
for i in range(0, len(parsedJsonStr)):
convertedJsonStr.append(json.dumps(parsedJsonStr[i]))
h.append(parsedJsonStr[i]['machineName'])
return convertedJsonStr,h
#pytest.mark.parametrize("convertedJsonStr,h", (read_test_data_from_json()[0],read_test_data_from_json()[1]))
def test_GetFrequencyOfAllToolUsage(convertedJsonStr,h):
objAPI=HTTPMethods()
frequencyOfToolResultFromAPIRequest=objAPI.getFrequencyOfTools(read_test_data_from_json[0])
print(h)
Value of convertedJsonstr variable
I want to get one item of convertedJsonStr and h returned from read_test_data_from_json method when it comes into test_GetFrequencyOfAllToolUsage method. But I see all items of convertedJsonStr and h as image above.
First Item
def read_test_data_from_json():
JsonFile = json.load(open('..\\e2eTestData.json','r'))
# First item
return JsonFile[0], JsonFile[0]["machineName"]
Last item
return JsonFile[-1], JsonFile[-1]["machineName"]
Random item
item = random.choice(JsonFile)
return item, item["machineName"]

Python multiprocessing: Reading a file and updating a dictionary

Lets assume that I have a text file with only 2 rows as follows:
File.txt:
100022441 #DavidBartonWB Guarding Constitution
100022441 RT #frankgaffney 2nd Amendment Guy.
First column is user id and second column is user tweet. I'd like to read the above text file and update the following dictionary:
d={'100022441':{'#frankgaffney': 0, '#DavidBartonWB': 0}}.
Here is my code:
def f(line):
data = line.split('\t')
uid = data[0]
tweet = data[1]
if uid in d.keys():
for gn in d[uid].keys():
if gn in tweet:
return uid, gn, 1
else:
return uid, gn, 0
p = Pool(4)
with open('~/File.txt') as source_file:
for uid, gn, r in p.map(f, source_file):
d[uid][gn] += r
So basically I need to read each line of the file and determine whether the user is in my dictionary, and if it is, whether the tweet contain user's keys in the dictionary (e.g. '#frankgaffney' and '#DavidBartonWB'). So based on the two lines I wrote above, the code should result:
d = {{'100022441':{'#frankgaffney': 1, '#DavidBartonWB': 1 }}
But it gives:
d = {{'100022441':{'#frankgaffney': 1, '#DavidBartonWB': 0 }}
For some reason the code always loses one of the keys for all users. Any idea what is wrong in my code?
Your file is tab delimited, and you are always checking the third column for the mention; it works correctly for the first mention because you are passing in the entire file to the function, not each line. So effectively you are doing this:
>>> s = '100022441\t#DavidBartonWB Guarding Constitution\n100022441\tRT#frankgaffney 2nd Amendment Guy.'
>>> s.split('\t')
['100022441', '#DavidBartonWB Guarding Constitution\n100022441', 'RT#frankgaffney 2nd Amendment Guy.']
I recommend two approaches:
Map your function to each line in the file.
Use regular expressions for a more robust search.
Try this version:
import re
d = {'100022441':{'#frankgaffney': 0, '#DavidBartonWB': 0}}
e = r'(#\w+)'
def parser(line):
key, tweet = line.split('\t')
data = d.get(key)
if data:
mentions = re.findall(e, tweet)
for mention in mentions:
if mention in data.keys():
d[key][mention] += 1
with open('~/File.txt') as f:
for line in f:
parser(line)
print(d)
Once you've confirmed its working correctly, then you can multi-process it:
import itertools, re
from multiprocessing import Process, Manager
def parse(queue, d, m):
while True:
line = queue.get()
if line is None:
return # we are done with this thread
key, tweet = line.split('\t')
data = d.get(key)
e = r'(#\w+)'
if data:
mentions = re.findall(e, tweet)
for mention in mentions:
if mention in data:
if mention not in m:
m[mention] = 1
else:
m[mention] += 1
if __name__ == '__main__':
workers = 2
manager = Manager()
d = manager.dict()
d2 = manager.dict()
d = {'100022441': ['#frankgaffney', '#DavidBartonWB']}
queue = manager.Queue(workers)
worker_pool = []
for i in range(workers):
p = Process(target=parse, args=(queue, d, d2))
p.start()
worker_pool.append(p)
# Fill the queue with data for the workers
with open(r'tweets2.txt') as f:
iters = itertools.chain(f, (None,)*workers)
for line in iters:
queue.put(line)
for p in worker_pool:
p.join()
for i,data in d.iteritems():
print('For ID: {}'.format(i))
for key in data:
print(' {} - {}'.format(key, d2[key]))
second column is data[1], not data[2]
the fact that data[2] works means that you are splitting into words, not columns
if you want to search for the user key as a separate word (as opposed to substring), you need tweet=data[1:]
if you want to search for a substring you need to split into exactly two pieces: uid,tweet=line.split(None,1)

Reading and comparing lines in a file using Python

I have a file of the following format.
15/07/2010 14:14:13 changed_status_from_Offline_to_Available
15/07/2010 15:01:09 changed_status_from_Available_to_Offline
15/07/2010 15:15:35 changed_status_from_Offline_to_Away became_idle
15/07/2010 15:16:29 changed_status_from_Away_to_Available became_unidle
15/07/2010 15:45:40 changed_status_from_Available_to_Away became_idle
15/07/2010 16:05:40 changed_status_from_Away_to_Available became_unidle
15/07/2010 16:51:39 changed_status_from_Available_to_Offline
20/07/2010 13:07:26 changed_status_from_Offline_to_Available
I need to create a function in python that has to arguments: date and time. It should read the file and return the second status if the date matches and time is less than the time in the function call. That is
Lets say i call the function returnstatus(15/07/2010, 15:10:01).
The function should go to the file and return the status of the user on that day at that time, which in this case is "Offline".
I am a Python newbie and any help would be really appreciated.
import datetime
import time
def lines( path_to_file ):
'''Open path_to_file and read the lines one at a time, yielding tuples
( date of line, time of line, status before line )'''
with open( path_to_file ) as theFile:
for line in theFile:
line = line.rsplit( " ", 1 )
yield (
datetime.datetime.strptime( line[ 0 ], "%d/%m/%Y %H:%M:%S" ),
line[ 1 ].split( "_" )[ 3 ]
)
def return_status( statDate ):
for lineDate, lineStatus in lines( path_to_file ):
if statDate > lineDate:
continue
return lineStatus
Does that make sense, or would you like me to explain any of it?
Edit
Did you mean what you said above?
date matches and time is less than the time in the function call
In other words, what should happen if you call return_status( 16/07/2010, <some.time> )? Should you get "Offline"?
Another Edit
I have edited it to do sensible datetime comparisons. I think you have read the inequality the wrong way around: we loop through lines in the file until the first line after the date we wish to fetch (keep reading while statDate > lineDate). Once this test fails, line is the first line after the desired date, so its from value is the status at the time we requested. You should call the function with a datetime.datetime.
I suggest you have a read in the python docs, specifically the time module and the function strptime which can parse textual representation of times into a programmatic representation.
Calling returnstatus the way you wrote in the question will surely fail, you might want to call it with a string representation of the time (i.e. "15/07/2010 15:10:01") or by passing one of the datatypes defined in the time module.
EDIT: obviously if you pass in a string time then finding it in the file is much easier:
if substring in line:
# do stuff
As Yoni said, you're probably better served by passing a string argument (if you have one). You may also find the types in datetime useful. You'll also want to look into the split function.
Basically, what you need to do is pull out the dates and times from your log into a easy-to-compare format. Enter datetime.
import datetime
def getStatus(log_list, dt, tm):
#filter the list
log_list = [a_log_entry for a_log_entry in log_list if a_log_entry[0] == dt and a_log_entry[1] <= tm]
#sort it
log_list.sort(cmp=lambda x,y: cmp(x[1], y[1]))
if log_list is []:
return 'No status available for this day and time.'
#pull out the status
status_to_return = log_list[-1][2].split('_')[-1].strip()
return status_to_return
if __name__ == '__main__':
in_file = open('a.log', 'rU')
a_list = []
for line in in_file:
if line.strip() is not '': #handle whitespace
a_list.append(line.split(' '))
#convert string dates and times to datetime objects
a_list = [ [datetime.datetime.strptime(el[0], '%d/%m/%Y'),
datetime.datetime.strptime(el[1], '%H:%M:%S'),
el[2]] for el in a_list]
a_date = datetime.datetime(2010, 7, 15)
a_time = datetime.datetime(1900, 1, 1, 16, 1, 0)
print getStatus(a_list, a_date, a_time)
Try this:
import datetime
filein = open("filein", "r")
class Status:
def __init__(self, date, time, status):
print date.split('/')
day, month, year = map(int, date.split('/'))
hour, minute, second = map(int, time.split(':'))
self.date_and_time = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second)
self.status = status
list = []
line = filein.readline().rstrip('\n')
while line != "":
print line
date, time, status = line.split(' ')[:3]
status = status.split('_')
status.reverse()
status = status[0]
status_it = Status(date=date, time=time, status=status)
line = filein.readline().rstrip('\n')
list.append(status_it)
def query (date, time):
day, month, year = map(int, date.split('/'))
hour, minute, second = map(int, time.split(':'))
date_and_time = datetime.datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second)
for counter, it in enumerate(list):
if date_and_time >= it.date_and_time and (date_and_time < list[counter + 1].date_and_time or counter == len(list) - 1):
print it.status
return
print "I don't know the status"
query("15/07/2010", "15:10:01")
From the question user392409 most probably wants to pass the parameters as string and wants a single function.
Lets say i call the function returnstatus(15/07/2010, 15:10:01). The function should go to the file and return the status of the user on that day at that time, which in this case is "Offline".
import datetime
import time
def returnstatus(d, t):
d = datetime.datetime.strptime(d, "%d/%m/%Y")
t = time.strptime(t, "%H:%M:%S")
f = open("log.txt")
for line in f:
line = line.split(" ")
line_date = datetime.datetime.strptime(line[0], "%d/%m/%Y")
line_time = time.strptime(line[1], "%H:%M:%S")
if d != line_date and t >= line_time:
continue
# Returns the first occurrence. To get all store in a list or print.
f.close()
return line[2].split("_")[3]

Categories

Resources