I was trying to create some unit test for my tweeter editor and i had a problem when i run the code. My unit test is this so far:
%%file test_twitter.py
import unittest
class TestTwitterEditor(unittest.TestCase):
"""Populate catalog"""
def setUp(self):
self.tweet[1] = tweet('Business Tax Accountant openings','Thu Sep 13 01:30:23 +0000 2012')
self.tweet[2] = tweet('Watching #XFactor USA beat show ever !!!', 'Thu Sep 13 0:30:23 +0000 2012')
self.tweet[3] = tweet('Random3', 'Thu Sep 18 5:30:23 +0000 2012')
self.tweet[4] = tweet('Random4', 'Thu Sep 17 5:30:23 +0000 2012')
self.tweet[5] = tweet('Random5', 'Thu Sep 18 7:30:23 +0000 2012')
def tearDown(self):
pass
def test_delete_tweet(self):
self.assertNotEqual(self.delete_tweet(0,0,0,5),'Random5')
def print_current_tweet(self):
self.assertEqual(self.print_current_tweet(tweet[5] , 0, 5),print(f"The current tweet is the number {tweet[5]} : \"{tweet[5]}\" Made at {tweet[5]}") )
if __name__ == '__main__':
unittest.main()
My delete_tweet function is this:
def delete_tweet(temp , temps , del_tweets , TOTAL_TWEET):
"""This function receives as input two temp.variables,a variable callled del_tweets"""
"""which will delete the current Id,and the number of total tweets which will be used"""
"""to track properly the current Id which is going to be deleted."""
"""It returns the temp.variables,the deleted tweet and the num of total tweets. """
tweet_id = int(input("Enter the id of the tweet you want to delete: "))
if (tweet_id <= TOTAL_TWEET):
del_tweets.append(tweet_id)
elif (tweet_id <= TOTAL_TWEET + temp):
for i in range(tweet_id + 1 , temp , 1):
temps[i] = temps[i + 1]
return temp , temps , del_tweets , TOTAL_TWEET
And the "error" thaty i receive is this:
Ran 0 tests in 0.000s
OK
Where is the problem?
I don't understand why I receive this red message. If the assertNotEqual was wrong or non valid i should receive a message like:Error οr something similar.
I'd like to use Python to read in a list of directories and store data in variables based on a template such as /home/user/Music/%artist%/[%year%] %album%.
An example would be:
artist, year, album = None, None, None
template = "/home/user/Music/%artist%/[%year%] %album%"
path = "/home/user/Music/3 Doors Down/[2002] Away From The Sun"
if text == "%artist%":
artist = key
if text == "%year%":
year = key
if text == "%album%":
album = key
print(artist)
# 3 Doors Down
print(year)
# 2002
print(album)
# Away From The Sun
I can do the reverse easily enough with str.replace("%artist%", artist) but how can extract the data?
If your folder structure template is reliable the following should work without the need for regular expressions.
path = "/home/user/Music/3 Doors Down/[2002] Away From The Sun"
path_parts = path.split("/") # divide up the path into array by slashes
print(path_parts)
artist = path_parts[4] # get element of array at index 4
year = path_parts[5][1:5] # get characters at index 1-5 for the element of array at index 5
album = path_parts[5][7:]
print(artist)
# 3 Doors Down
print(year)
# 2002
print(album)
# Away From The Sun
# to put the path back together again using an F-string (No need for str.replace)
reconstructed_path = f"/home/user/Music/{artist}/[{year}] {album}"
print(reconstructed_path)
output:
['', 'home', 'user', 'Music', '3 Doors Down', '[2002] Away From The Sun']
3 Doors Down
2002
Away From The Sun
/home/user/Music/3 Doors Down/[2002] Away From The Sun
The following works for me:
from difflib import SequenceMatcher
def extract(template, text):
seq = SequenceMatcher(None, template, text, True)
return [text[c:d] for tag, a, b, c, d in seq.get_opcodes() if tag == 'replace']
template = "home/user/Music/%/[%] %"
path = "home/user/Music/3 Doors Down/[2002] Away From The Sun"
artist, year, album = extract(template, path)
print(artist)
print(year)
print(album)
Output:
3 Doors Down
2002
Away From The Sun
Each template placeholder can be any single character as long as the character is not present in the value to be returned.
Currently, I have a code whereby "users" will key in their username and password to log in.
uname = input("Enter uname: ")
pword = input("Enter pword: ")
.
.
.
if row[1] == pword and row[0] == uname:
LOGIN()
However, I wish to add an "update info" and "generate report" function.
How can I code, using python, such that I can retrieve the "e unit price" of a specific row of the CSV file? (e.g. uname = donnavan12 and pword = Onwdsna)?
Another question that I have is: How can I code, using python, such that I can retrieve the sum of a particular column (e.g. "energy saved") with (e.g. uname = donnavan12 and pword = Onwdsna)?
Sorry that I don't have codes of what I have tried because I don't even know where to begin. I only learned basic python in the past and used dataframe which was much easier but in this project, Pandas is not allowed so I'm rather stumped. (I also added minimal code as I'm afraid of getting my groupmates striked for plagiarism. Please let me know if more code is necessary.)
Try using DictReader in the csv module
Example code:
mcsv = csv.DictReader(filename)
rows = list(mcsv)
def get_value(myname, mypass, clm):
for row in rows:
if row['uname']==myname and row['pass'] == mypass:
return row['e unit price']
def set_value(myname, mypass, clm, new_value):
for row in rows:
if row['uname']==myname and row['pass'] == mypass:
row[clm] = new_value
def get_sum(myname, mypass, clm):
esaved = 0
for row in rows:
if row['uname']==myname and row['pass'] == mypass:
esaved += int(row[clm])
return esaved
print('energy saved: ', get_sum(myname, mypass, 'energy saved'))
print('e unit price before: ', get_value(myname, mypass, 'e unit price'))
set_value(myname, mypass, 'e unit price', 201)
print('e unit price after: ', get_value(myname, mypass, 'e unit price'))
Input
uname
pass
e unit price
energy saved
abc
123
100
10
cde
456
101
11
abc
123
100
13
fgh
789
102
12
Output
energy saved: 23
e unit price before: 100
e unit price after: 201
I use LibShortText for short-text classification.
I trained a model and use it to get class predictions on my test set by running:
python text-train.py -L 0 -f ./demo/train_file
python text-predict.py ./demo/train_file train_file.model output
The output file contains the score of each class for each test sample. She is the beginning of the output file:
version: 1
analyzable: 1
text-src: ./demo/train_file
extra-files:
model-id: 22d9e6defd38ed92e45662d576262915d10c3374
Tickets Tickets 1.045974012515694 -0.1533289000025808 -0.142460215262256 -0.1530588765291932 -0.1249182478102407 -0.1190708362082807 -0.06841237067728836 0.04587568197139553 -0.2283616562229066 -0.102238591774343
Stamps Stamps -0.1187719176481736 1.118188003417143 -0.08034439513604429 -0.1973997029054026 -0.06355109135595602 -0.1786639939826796 -0.1169254102259164 -0.01967861752032143 -0.06964465109882922 -0.2732082235438185
Music Music -0.1315596826953709 -0.2641082947449856 1.008713836384851 -0.04068831625284784 -0.1545790157496564 -0.1010212095804389 -0.02069378431571431 -0.02404317930606417 0.008960552873498827 -0.2809809066132714
Jewelry & Watches Jewelry & Watches -0.0749032450936907 -0.1369122108940684 -0.2159355702219642 0.9582440549577076 -0.141187218792264 -0.1290355317490395 -0.04287756450848382 -0.0919782002284954 -0.04312539181047169 -0.0822891216592294
Tickets Tickets 0.9291396425612148 -0.1597595507175184 -0.07086077554348413 -0.07087036006347401 -0.1111802245732816 -0.2329161314957608 -0.07080154336497513 -0.07093153970747144 -0.07096098431125453 -0.07085853278399512
Books Books -0.03482279197164031 -0.02622229736755784 -0.08576360644172253 -0.1209545478269265 0.9735039690597804 -0.02640896142537765 -0.1511226188239169 -0.1785299152500055 -0.1569282110333412 -0.1927510189192921
Tickets Tickets 1.165624491239117 -0.1643444003616841 -0.279795018266336 -0.05911033737681937 -0.1496733471948844 -0.1774767469424229 -0.1806900189575362 -0.05711408596057094 0.06427848575613292 -0.1616990219349959
Art Art -0.07563152438778584 -0.1926345255861422 -0.1379519287608234 -0.1728869014895525 -0.2081235484009353 0.9764371359082827 -0.06097998223834129 -0.06082239643658216 -0.0434090642865785 -0.0239972643215402
Art Art -0.21374038053991 0.0146962630542977 -0.02279914632208601 -0.001108284295731699 -0.2621058759589903 1.016592310148241 0.01436347343617804 -0.04476369315079338 -0.1246095742882179 -0.3765250920829869
Books Books -0.08063364674726788 -0.08053738921453879 -0.08032365427931695 -0.1496633152184083 0.9195583554164264 -0.08011940998873018 -0.08053175336913043 -0.16302082274963 -0.1105339242133948 -0.09419443963601073
How can I know to which class each score corresponds to?
I know I could infer it by looking at the predicted class and the maximum score for several test samples, but I'm hoping there exist some mmore direct way.
The labels member of the PredictionResult returned from predict_text() contains the ordering. So a small addition to classifier_impl.py will expose as column headers in the output file:
*** libshorttext-1.1/libshorttext/classifier/classifier_impl.py.orig
--- libshorttext-1.1/libshorttext/classifier/classifier_impl.py
***************
*** 113,118 ****
--- 113,125 ----
fmt = '\t{{0:{0}}}'.format(fmt)
for i in range(len(self.predicted_y)):
+ if i == 0:
+ label_text = 'Predicted' * 18
+ label_text += 'True class' * 18
+ for l in self.labels:
+ label_text += " {0: <18}".format(l)
+ fout.write(label_text + "\n")
+
fout.write("{py}\t{y}".format(py = self.predicted_y[i], y = self.true_y[i]))
for v in self.decvals[i]:
fout.write(fmt.format(v))
I'm working on text file processing using Python.
I've got a text file (ctl_Files.txt) which has the following content/ or similar to this:
------------------------
Changeset: 143
User: Sarfaraz
Date: Tuesday, April 05, 2011 5:34:54 PM
Comment:
Initial add, all objects.
Items:
add $/Systems/DB/Expences/Loader
add $/Systems/DB/Expences/Loader/AAA.txt
add $/Systems/DB/Expences/Loader/BBB.txt
add $/Systems/DB/Expences/Loader/CCC.txt
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
Changeset: 145
User: Sarfaraz
Date: Thursday, April 07, 2011 5:34:54 PM
Comment:
edited objects.
Items:
edit $/Systems/DB/Expences/Loader
edit $/Systems/DB/Expences/Loader/AAA.txt
edit $/Systems/DB/Expences/Loader/AAB.txt
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
Changeset: 147
User: Sarfaraz
Date: Wednesday, April 06, 2011 5:34:54 PM
Comment:
Initial add, all objects.
Items:
delete, source rename $/Systems/DB/Expences/Loader/AAA.txt;X34892
rename $/Systems/DB/Expences/Loader/AAC.txt.
Check-in Notes:
Code Reviewer:
Performance Reviewer:
Reviewer:
Security Reviewer:
------------------------
To process this file I wrote the following code:
#Tags - used for spliting the information
tag1 = 'Changeset:'
tag2 = 'User:'
tag3 = 'Date:'
tag4 = 'Comment:'
tag5 = 'Items:'
tag6 = 'Check-in Notes:'
#opening and reading the input file
#In path to input file use '\' as escape character
with open ("C:\\Users\\md_sarfaraz\\Desktop\\ctl_Files.txt", "r") as myfile:
val=myfile.read().replace('\n', ' ')
#counting the occurence of any one of the above tag
#As count will be same for all the tags
occurence = val.count(tag1)
#initializing row variable
row=""
#passing the count - occurence to the loop
for count in range(1, occurence+1):
row += ( (val.split(tag1)[count].split(tag2)[0]).strip() + '|' \
+ (val.split(tag2)[count].split(tag3)[0]).strip() + '|' \
+ (val.split(tag3)[count].split(tag4)[0]).strip() + '|' \
+ (val.split(tag4)[count].split(tag5)[0]).strip() + '|' \
+ (val.split(tag5)[count].split(tag6)[0]).strip() + '\n')
#opening and writing the output file
#In path to output file use '\' as escape character
file = open("C:\\Users\\md_sarfaraz\\Desktop\\processed_ctl_Files.txt", "w+")
file.write(row)
file.close()
and got the following result/File (processed_ctl_Files.txt):
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader add $/Systems/DB/Expences/Loader/AAA.txt add $/Systems/DB/Expences/Loader/BBB.txt add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader edit $/Systems/DB/Expences/Loader/AAA.txt edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892 rename $/Systems/DB/Rascal/Expences/AAC.txt.
But, I want the result like this:
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader
add $/Systems/DB/Expences/Loader/AAA.txt
add $/Systems/DB/Expences/Loader/BBB.txt
add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader
edit $/Systems/DB/Expences/Loader/AAA.txt
edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892
rename $/Systems/DB/Rascal/Expences/AAC.txt.
or it would be great if we can get results like this :
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/AAA.txt
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/BBB.txt
143|Sarfaraz|Tuesday, April 05, 2011 5:34:54 PM|Initial add, all objects.|add $/Systems/DB/Expences/Loader/CCC.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader/AAA.txt
145|Sarfaraz|Thursday, April 07, 2011 5:34:54 PM|edited objects.|edit $/Systems/DB/Expences/Loader/AAB.txt
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|delete, source rename $/Systems/DB/Rascal/Expences/AAA.txt;X34892
147|Sarfaraz|Wednesday, April 06, 2011 5:34:54 PM|Initial add, all objects.|rename $/Systems/DB/Rascal/Expences/AAC.txt.
Let me know how I can do this. Also, I'm very new to Python so please ignore if I've written some lousy or redundant code. And help me to improve this.
This solution is not as short and probably not as effective as the answer utilizing regular expressions, but it should be quite easy to understand. The solution does make it easier to use the parsed data because each section data is stored into a dictionary.
ctl_file = "ctl_Files.txt" # path of source file
processed_ctl_file = "processed_ctl_Files.txt" # path of destination file
#Tags - used for spliting the information
changeset_tag = 'Changeset:'
user_tag = 'User:'
date_tag = 'Date:'
comment_tag = 'Comment:'
items_tag = 'Items:'
checkin_tag = 'Check-in Notes:'
section_separator = "------------------------"
changesets = []
#open and read the input file
with open(ctl_file, 'r') as read_file:
first_section = True
changeset_dict = {}
items = []
comment_stage = False
items_stage = False
checkin_dict = {}
# Read one line at a time
for line in read_file:
# Check which tag matches the current line and store the data to matching key in the dictionary
if changeset_tag in line:
changeset = line.split(":")[1].strip()
changeset_dict[changeset_tag] = changeset
elif user_tag in line:
user = line.split(":")[1].strip()
changeset_dict[user_tag] = user
elif date_tag in line:
date = line.split(":")[1].strip()
changeset_dict[date_tag] = date
elif comment_tag in line:
comment_stage = True
elif items_tag in line:
items_stage = True
elif checkin_tag in line:
pass # not implemented due to example file not containing any data
elif section_separator in line: # new section
if first_section:
first_section = False
continue
tmp = changeset_dict
changesets.append(tmp)
changeset_dict = {}
items = []
# Set stages to false just in case
items_stage = False
comment_stage = False
elif not line.strip(): # empty line
if items_stage:
changeset_dict[items_tag] = items
items_stage = False
comment_stage = False
else:
if comment_stage:
changeset_dict[comment_tag] = line.strip() # Only works for one line comment
elif items_stage:
items.append(line.strip())
#open and write to the output file
with open(processed_ctl_file, 'w') as write_file:
for changeset in changesets:
row = "{0}|{1}|{2}|{3}|".format(changeset[changeset_tag], changeset[user_tag], changeset[date_tag], changeset[comment_tag])
distance = len(row)
items = changeset[items_tag]
join_string = "\n" + distance * " "
items_part = str.join(join_string, items)
row += items_part + "\n"
write_file.write(row)
Also, try to use variable names which describes its content. Names like tag1, tag2, etc. does not say much about the variable content. This makes code difficult to read, especially when scripts gets longer. Readability might seem unimportant in most cases, but when re-visiting old code it takes much longer to understand what the code does with non describing variables.
I would start by extracting the values into variables. Then create a prefix from the first few tags. You can count the number of characters in the prefix and use that for the padding. When you get to items, append the first one to the prefix and any other item can be appended to padding created from the number of spaces that you need.
# keywords used in the tag "Items: "
keywords = ['add', 'delete', 'edit', 'source', 'rename']
#passing the count - occurence to the loop
for cs in val.split(tag1)[1:]:
changeset = cs.split(tag2)[0].strip()
user = cs.split(tag2)[1].split(tag3)[0].strip()
date = cs.split(tag3)[1].split(tag4)[0].strip()
comment = cs.split(tag4)[1].split(tag5)[0].strip()
items = cs.split(tag5)[1].split(tag6)[0].strip().split()
notes = cs.split(tag6)
prefix = '{0}|{1}|{2}|{3}'.format(changeset, user, date, comment)
space_count = len(prefix)
i = 0
while i < len(items):
# if we are printing the first item, add it to the other text
if i == 0:
pref = prefix
# otherwise create padding from spaces
else:
pref = ' '*space_count
# add all keywords
words = ''
for j in range(i, len(items)):
if items[j] in keywords:
words += ' ' + items[j]
else:
break
if i >= len(items): break
row += '{0}|{1} {2}\n'.format(pref, words, items[j])
i += j - i + 1 # increase by the number of keywords + the param
This seems to do what you want, but I am not sure if this is the best solution. Maybe it is better to process the file line by line and print the values straight to the stream?
You can use a regular expression to search for 'add', 'edit' etc.
import re
#Tags - used for spliting the information
tag1 = 'Changeset:'
tag2 = 'User:'
tag3 = 'Date:'
tag4 = 'Comment:'
tag5 = 'Items:'
tag6 = 'Check-in Notes:'
#opening and reading the input file
#In path to input file use '\' as escape character
with open ("wibble.txt", "r") as myfile:
val=myfile.read().replace('\n', ' ')
#counting the occurence of any one of the above tag
#As count will be same for all the tags
occurence = val.count(tag1)
#initializing row variable
row=""
prevlen = 0
#passing the count - occurence to the loop
for count in range(1, occurence+1):
row += ( (val.split(tag1)[count].split(tag2)[0]).strip() + '|' \
+ (val.split(tag2)[count].split(tag3)[0]).strip() + '|' \
+ (val.split(tag3)[count].split(tag4)[0]).strip() + '|' \
+ (val.split(tag4)[count].split(tag5)[0]).strip() + '|' )
distance = len(row) - prevlen
row += re.sub("\s\s+([edit]|[add]|[delete]|[rename])", r"\n"+r" "*distance+r"\1", (val.split(tag5)[count].split(tag6)[0])) + '\r'
prevlen = len(row)
#opening and writing the output file
#In path to output file use '\' as escape character
file = open("wobble.txt", "w+")
file.write(row)
file.close()