Hello have been trying to create random data with random dates as into a csv file but getting the following error expected str instance, numpy.datetime64 found
code for data generator
import pandas as pd
import numpy as np
import string
import random
def gen_random_email():
domains = [ "hotmail.com", "gmail.com", "aol.com", "mail.com" , "mail.kz", "yahoo.com"]
letters = string.ascii_letters +'.'*5
email = ''.join(np.random.choice(list(letters),10))+'#'+ np.random.choice(domains)
email = email.replace('.#', '#')
return email, "Email"
def gen_random_float():
num = np.random.random()*np.random.randint(2000)
decimal_points = np.random.randint(8)
num = int(num*10**(decimal_points))/10**decimal_points
return str(num), 'Float'
def gen_random_sentence():
nouns = ["puppy", "car", "rabbit", "girl", "monkey"]
verbs = ["runs", "hits", "jumps", "drives", "barfs"]
adv = ["crazily", "dutifully", "foolishly", "merrily", "occasionally"]
adj = ["adorable.", "clueless.", "dirty.", "odd.", "stupid."]
random_entry = lambda x: x[random.randrange(len(x))]
random_entry = " ".join([random_entry(nouns), random_entry(verbs),
random_entry(adv), random_entry(adj)])
return random_entry, 'String'
def gen_random_int():
num = np.random.randint(1000000)
return str(num), 'Int'
def gen_random_date():
monthly_days = np.arange(0, 30)
base_date = np.datetime64('2020-01-01')
random_date = base_date + np.random.choice(monthly_days)
return random_date, 'Date'
def gen_dataset(filename, size=5000):
randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
with open(filename, 'w') as file:
file.write("Text, Type\n")
for _ in range(size):
file.write(",".join(random.choice(randomizers)())+"\n")
gen_dataset('dataaaa.csv')
TypeError: sequence item 0: expected str instance, numpy.datetime64 found
First, catch the error and see what is causing it.
def gen_dataset(filename, size=5000):
randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
with open(filename, 'w') as file:
file.write("Text, Type\n")
for _ in range(size):
f = random.choice(randomizers)
result = f()
try:
file.write(",".join(result)+"\n")
except TypeError:
print(result)
raise
>>>
(numpy.datetime64('2020-01-09'), 'Date')
Traceback (most recent call last):
File "C:\pyProjects\tmp.py", line 80, in <module>
gen_dataset('dataaaa.csv')
File "C:\pyProjects\tmp.py", line 75, in gen_dataset
file.write(",".join(result)+"\n")
TypeError: sequence item 0: expected str instance, numpy.datetime64 found
hmmm, I wonder if join only except strings as arguments?
Yep, from the docs:
A TypeError will be raised if there are any non-string values in iterable, including bytes objects.
I wonder how I can turn a numpy datetime64 to a string. Searching with numpy datetime64 to string is productive: Convert numpy.datetime64 to string object in python
These work
>>> q = gen_random_date()[0]
>>> q
numpy.datetime64('2020-01-27')
>>> np.datetime_as_string(q)
'2020-01-27'
>>> q.astype(str)
'2020-01-27'
>>>
Then just modify the try/except.
def gen_dataset(filename, size=5000):
randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
with open(filename, 'w') as file:
file.write("Text, Type\n")
for _ in range(size):
f = random.choice(randomizers)
a,b = f()
try:
q = ",".join([a,b,"\n"])
except TypeError:
a = np.datetime_as_string(a)
q = ",".join([a,b,"\n"])
file.write(q)
Or simply preemptively make the first item a string.
def gen_dataset(filename, size=5000):
randomizers = [gen_random_email, gen_random_float, gen_random_int, gen_random_sentence,gen_random_date]
with open(filename, 'w') as file:
file.write("Text, Type\n")
for _ in range(size):
f = random.choice(randomizers)
a,b = f()
q = ",".join([str(a),b,"\n"])
file.write(q)
Related
I am trying to figure out how to average the temperatures from a json file and am having trouble with the conversion. It keeps throwing the error of: TypeError: 'float' object is not subscriptable when calc temp averages from json
>>> import urllib.request
>>> import json
>>>
>>> zipcode_list = ["72714","71640","72454","71834","72223","72110"]
>>> for one_zipcode in zipcode_list:
... link = "https://api.openweathermap.org/data/2.5/weather?zip="+one_zipcode+"&appid=e7c3fb6e681c8f7bb59af33f1dc8bbca"
... f = urllib.request.urlopen(link)
... data = f.read()
... weather = json.loads(data)
... n = (weather["name"])
... x = (weather["main"]["temp"])
... fh = (x-273.1)*9/5+32
... total = round(fh, 2)
... print (n, ":", total, "Farenheit")
... avg = (x["main"]["temp"].mean())
... print(avg)
...
Bella Vista : 11.93 Farenheit
Traceback (most recent call last):
File "<stdin>", line 11, in <module>
TypeError: 'float' object is not subscriptable
Did you mean to collect up all the values of total and find the mean?
import statistics
temps=[]
for one_zipcode in zipcode_list:
link = link = "https://api.openweathermap.org/data/2.5/weather?zip="+one_zipcode+"&appid=e7c3fb6e681c8f7bb59af33f1dc8bbca"
f = urllib.request.urlopen(link)
data = f.read()
weather = json.loads(data)
n = weather["name"]
x = weather["main"]["temp"]
fh = (x - 273.1) * 9 / 5 + 32
total = round(fh, 2)
print(n, ":", total, "Farenheit")
temps.append(total)
avg = statistics.mean(temps)
print(avg)
You assign a float to x then later you try to subscript x which is why you are seeing the error message. See below.
x = (weather["main"]["temp"])
...
avg = (x["main"]["temp"].mean())
Not very good at programming. Need to change the script code. It is necessary to read the contents of the file and split it into separate keys, and then save them in a file. Thank you in advance for your help!!!
'text.txt'
File:
0200e7c810f4553fe1722522f8dcfc8e810757ef427efefef79bdf08ddf3700fd5
0216b3e68fed004b2fea2119cdbb8ab2393dfe8fc99398da18e40b6e949e9e1278
022bbf0fcde9bcba6e1038b78bd6906ed00be95d1a6f912a7352f5aca2d7bb6bbc
021060631ef4a610aebc3c9e24f5b0e33dcd0eb422b8223dbd75c1e6edfd21dd72
0218cbb66d6a417890aea6bf5f8a83a4d181a89c5aba8121e20def5b42c311514e
025d8ea956802ed00ebec42b480c0eb77c6ada6ed3fceb40e5fff9aed0fa31c6b4
02264a8c56551abeb68d6112863249857a4360c38528d02b9313988ba062e6efed
import binascii
with open('text.txt') as f:
text = f.read()
compressed_key_hex = text.split('\n')
computed_uncompressed_key = []
p_hex = 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F'
p = int(p_hex, 16)
x_hex = compressed_key_hex[2:66]
x = int(x_hex, 16)
prefix = compressed_key_hex[0:2]
y_square = (pow(x, 3, p) + 7) % p
y_square_square_root = pow(y_square, (p+1)//4, p)
if prefix == "02":
y = (-y_square_square_root) % p
else:
y = y_square_square_root
computed_y_hex = hex(y)[2:66]
computed_uncompressed_key = "04" + x_hex + computed_y_hex
with open('result.txt', 'w') as f:
f.write('\n'.join(computed_uncompressed_key))
I get the error:
===================== RESTART: D:\detailALL\03\Bit06.py =====================
Traceback (most recent call last):
File "D:\detailALL\03\Bit06.py", line 12, in <module>
x = int(x_hex, 16)
TypeError: int() can't convert non-string with explicit base
>>>
You are passing a list rather than a str. In the following code x_hex is a list.
x_hex = compressed_key_hex[2:66]
So you need to convert the list to str, you can do that using the following:
x_hex = ''.join(compressed_key_hex[2:66])
I guess the following might be your required solution:
import binascii
with open('text.txt') as f:
text = f.read()
compressed_key_hex = text.split('\n')
print(compressed_key_hex)
computed_uncompressed_key_list = []
p_hex = 'FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F'
p = int(p_hex, 16)
for val in compressed_key_hex:
x_hex = val[2:66]
x = int(x_hex, 16)
prefix = val[0:2]
y_square = (pow(x, 3, p) + 7) % p
y_square_square_root = pow(y_square, (p+1)//4, p)
if prefix == "02":
y = (-y_square_square_root) % p
else:
y = y_square_square_root
computed_y_hex = hex(y)[2:66]
computed_y_hex = computed_y_hex.zfill(64)
computed_uncompressed_key = "04" + x_hex + computed_y_hex
computed_uncompressed_key_list.append(computed_uncompressed_key)
with open('result.txt', 'w') as f:
f.write('\n'.join(computed_uncompressed_key_list))
text.txt file:
0200e7c810f4553fe1722522f8dcfc8e810757ef427efefef79bdf08ddf3700fd5
0216b3e68fed004b2fea2119cdbb8ab2393dfe8fc99398da18e40b6e949e9e1278
022bbf0fcde9bcba6e1038b78bd6906ed00be95d1a6f912a7352f5aca2d7bb6bbc
021060631ef4a610aebc3c9e24f5b0e33dcd0eb422b8223dbd75c1e6edfd21dd72
0218cbb66d6a417890aea6bf5f8a83a4d181a89c5aba8121e20def5b42c311514e
025d8ea956802ed00ebec42b480c0eb77c6ada6ed3fceb40e5fff9aed0fa31c6b4
02264a8c56551abeb68d6112863249857a4360c38528d02b9313988ba062e6efed
result.txt file:
0400e7c810f4553fe1722522f8dcfc8e810757ef427efefef79bdf08ddf3700fd5c9b034d2aa9ee1ef7b2346e8fc9c0245a8746a92bfdbb472fc98397477551ced
0416b3e68fed004b2fea2119cdbb8ab2393dfe8fc99398da18e40b6e949e9e12780126dfa95d2d9ab8fc055ce158f1d2ef51c2a012413b3f88a6365f375cf903f8
042bbf0fcde9bcba6e1038b78bd6906ed00be95d1a6f912a7352f5aca2d7bb6bbcf4a39790075ce43dc08fbf0ecc9cc732415e6b066c3b8b8d960b8548e8a612b7
041060631ef4a610aebc3c9e24f5b0e33dcd0eb422b8223dbd75c1e6edfd21dd723f873c976d071939edf8450124da64c3d9a1b35fb070761b01a5bace7d741588
0418cbb66d6a417890aea6bf5f8a83a4d181a89c5aba8121e20def5b42c311514efb4f8645c503e7a39954e977f7af8e802a5ec44ce3084cb6fb4e133a79733e77
045d8ea956802ed00ebec42b480c0eb77c6ada6ed3fceb40e5fff9aed0fa31c6b4e7c279c9d2c3e731803a4dde91a0d9409e49b1cbec3c7ac536a3783d9518d737
04264a8c56551abeb68d6112863249857a4360c38528d02b9313988ba062e6efeddbd8a97a8762f6a1add1ea6f549b61316fe675fc703d49f597a91ad620f7627a
Hello i got python error too many values to unpack on python when i run my script
Traceback:
File "C:\Python27.1\perpetuum.py", line 193, in __init__
header_offset, header_length = struct.unpack('8sii', f.read(16))
ValueError: too many values to unpack.
Code:
class DataFile(object):
"""GBF file reader"""
def __init__(self, filename):
self.filename = filename
with open(filename, 'rb') as f:
header_offset, header_length = struct.unpack('8sii', f.read(16))
if magic != 'GXYDATA\x1a':
raise Exception('Invalid data file (wrong magic)', magic)
header = self._get_record(header_offset, header_length)
self._records = self._parse_header(header)
def _decode(self, data):
try:
import numpy as np
i = np.arange(len(data), dtype=np.byte)
buf = np.frombuffer(data, np.byte) ^ ((i + 1) * (i ^ -54) - 84)
return buf.tostring()
except ImportError:
buf = array.array('B', data)
for i in xrange(len(data)):
buf[i] = 0xff & (buf[i] ^ ((i + 1) * (i ^ 0xca) - 84))
return buf.tostring()
def _get_record(self, offset, length):
with open(self.filename, 'rb') as f:
f.seek(offset)
data = f.read(length)
return self._decode(data)
def _parse_header(self, header):
"""
header record format:
int numRecords
for each record:
char[nameLen] nameSZ, 0
int offset
int length
int unkLength
"""
records = {}
num_records = struct.unpack_from('i', header)[0]
pos = 4
for i in xrange(num_records):
name_end = header.find('\0', pos)
name = header[pos:name_end]
pos = name_end + 1
offset, length, unkLength = struct.unpack_from('iii', header, pos)
pos += 12
# f1Length = min(13, unkLength)
# f1 = header[pos:pos+f1Length]
pos += unkLength
records[name] = (offset, length)
return records
PREFIX_MAP = {'\x89PNG': '.png',
'DDS ': '.dds',
'A3MF': '.a3m',
'#': '.txt',
'=': '.txt',
'Extended Module': '.xm',
'RIFF': '.wav',
'OggS': '.ogg'}
def _guess_ext(self, name, data):
for prefix, ext in self.PREFIX_MAP.iteritems():
if data.startswith(prefix):
return ext
return '.bin'
CATEGORY_MAP = OrderedDict([
('def*.png', 'icons'),
('icon*.png', 'icons'),
('entityIcon*.png', 'icons'),
('noIcon*.png', 'icons'),
('gfx_*.png', 'gfx'),
('*.a3m', 'models'),
('snd_*', 'sound'),
('altitude*', 'terrain'),
('terrain*', 'terrain'),
('altitude0*', 'terrain'),
('blocks0*', 'terrain'),
('control0*', 'terrain'),
('plants0*', 'terrain'),
('surface0*', 'terrain'),
('tactical*.png', 'tactical_icons'),
('font*', 'font'),
('textures_*.dds', 'textures'),
('corp*.png', 'corp_icons'),
('credits.txt', 'misc'),
('eula*.txt', 'misc'),
('*.txt', 'text_data')])
def dump_record(self, name, dest_dir, sort=False):
offset, length = self._records[name]
print '%08x: %s (%.2f KB)' % (offset, name, length / 1024.)
data = self._get_record(offset, length)
name += self._guess_ext(name, data)
if sort:
for pattern, category in self.CATEGORY_MAP.iteritems():
if fnmatch.fnmatch(name, pattern):
dest_dir = os.path.join(dest_dir, category)
try:
os.makedirs(dest_dir)
except OSError:
pass
break
rec_filename = os.path.join(dest_dir, name)
with open(rec_filename, 'wb') as f:
f.write(data)
def dump_records(self, patterns, dest_dir, sort=False):
for name in self._records:
if any(fnmatch.fnmatch(name, pattern) for pattern in patterns):
self.dump_record(name, dest_dir, sort)
any ideas ?
The code
struct.unpack('8sii', f.read(16))
unpacks into three values - a 8-byte string, and two integers each of which has length of 4 bytes, while your LHS has only two variables.
See the struct documentation for its format string.
This line is making an incorrect assumption:
header_offset, header_length = struct.unpack('8sii', f.read(16))
You are probably better off checking the size of the tuple returned from struct.unpack and then dealing with the results in a conditional manner.
Note that according to the docs, this method always returns a tuple, even if that tuple is of length one.
Stupid i remove if magic, before header offset.
drhagen stupidity of human take -1 and no answer at all get my skill at photoshop and I could laugh of you any time
I want to create a very basic Q&A chatbot. Given a list of questions & answers that I use as my dataset, I want to train it in order to return relevant answers, depending on a hard-coded question (different every time). First I tokenize, cleaning up, then using cosine similarity, but it gives me an error, which is (I guess) a pickle issue.
UPDATED
import csv
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from collections import Counter
import pickle
import os.path
import re, math
tokenizer = RegexpTokenizer(r'\w+')
stopwords = stopwords.words('english')
extra_stopwords = stopwords + ['I', 'can']
WORD = re.compile(r'\w+')
def get_clean_data():
clean_data_set = {
'questions' : {},
'answers' : {}
}
reader = csv.reader(open('data.csv', 'r', encoding="utf-8"))
tags = []
counter = 0
for r in reader:
question = str(r[0].encode('utf-8'))
answer = str(r[1].encode('utf-8'))
_, tags_question = get_tags(question)
_, tags_answer = get_tags(answer)
clean_data_set['answers'][answer] = tags_answer + tags_question
clean_data_set['questions'][question] = text_to_vector(question)
counter += 1
# hardcode the number :)
print (counter, ' out of 746')
# pickle.dump(clean_data_set, open('dump.dict', 'wb'))
with open('dump.dict', 'wb') as my_dump_file:
pickle.dump(clean_data_set, my_dump_file)
def get_tags(text, use_set = True):
tokens = tokenizer.tokenize(text)
# remove stop words from tokens
# make it lower case
filtered_words = [word.lower() for word in tokens if word not in extra_stopwords]
# return non duplicate values by default
if use_set == True:
filterd_words = list(set(filtered_words))
return Counter(filtered_words), filtered_words
# simple cosine similarity measure
def get_cosine(vec1, vec2):
intersection = set(vec1.keys()) & set(vec2.keys())
numerator = sum([vec1[x] * vec2[x] for x in intersection])
sum1 = sum([vec1[x]**2 for x in vec1.keys()])
sum2 = sum([vec2[x]**2 for x in vec2.keys()])
denominator = math.sqrt(sum1) * math.sqrt(sum2)
if not denominator:
return 0.0
else:
return float(numerator) / denominator
def text_to_vector(text):
words = WORD.findall(text)
return Counter(words)
# question_set is the data we had
def get_cosine_value(question, question_set):
question_vector = text_to_vector(question)
cosine = get_cosine(question_vector, question_set)
return cosine
def answer_question(question, top = 5):
with open('dump.dict', 'rb') as my_dump_file:
data_set = pickle.load(my_dump_file)
# data_set = pickle.load(open('dump.dict', 'rb'))
pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)
_, question_tags = get_tags(question)
ranking_dict = {}
similar_questions_rank = {}
for entry in data_set['answers']:
tags = data_set['answers'][entry]
# rank is the intersection between the list of tags from the question
# and the list of tags associated to answers
rank = len(set(question_tags).intersection(tags))
ranking_dict[entry] = rank
for entry in data_set['questions']:
cosine_similarity = get_cosine_value(question, data_set['questions'][entry])
similar_questions_rank[entry] = cosine_similarity
sorted_similarity_dict = sorted(similar_questions_rank.items(), key=lambda x: x[1], reverse=True)
sorted_ranking_dict = sorted(ranking_dict.items(), key=lambda x: x[1], reverse=True)
# sort them by rank
for item in sorted_ranking_dict[0:top-1]:
print ('Rank: ', item[1])
print ('Answer: ', item[0])
print ('\n\n')
# sort them by rank
for item in sorted_similarity_dict[0:top-1]:
print ('Rank: ', item[1])
print ('Question: ', item[0])
#get_clean_data()
question = 'why all these errors?'
answer_question(question)
This is the updated error message:
Traceback (most recent call last):
File "C:\Users\joasa\Desktop\si\main.py", line 133, in <module>
answer_question(question)
File "C:\Users\joasa\Desktop\si\main.py", line 94, in answer_question
data_set = pickle.load(my_dump_file)
EOFError: Ran out of input
[Finished in 1.4s]
Can someone help please? I have no idea what to do. Thanks in advance
I think it comes from this line in your get_clean_data function:
pickle.dump(clean_data_set, open('dump.dict', 'w'))
See here you open the file for writing, but you never close it, so when you try to read it there is nothing to signify that the end of file has been reached. To avoid stuff like this from happening, use a context manager block:
with open('dump.dict', 'wb') as my_dump_file:
pickle.dump(clean_data_set, my_dump_file)
That way, whichever way you exit the with block, you are guaranteed to close your file.
You should also do the same when loading your pickle dump in answer_question:
with open('dump.dict', 'rb') as my_dump_file:
data_set = pickle.load(my_dump_file)
I am new in python and i try to build a program that know to decode and encode dual-tone multiple-frequency (DTMF) signals used to dial a telephone.
for now the encoding part is working good but for some reason the encoding is not working and i get the follwoing exception
Traceback (most recent call last):
File "C:\Users\matant\workspace\dialer2\dialer.py", line 239, in <module>
x = d.decoder()
File "C:\Users\matant\workspace\dialer2\dialer.py", line 218, in decoder
data = self.read_wav()
File "C:\Users\matant\workspace\dialer2\dialer.py", line 201, in read_wav
n = fin.getnframes()
AttributeError: 'file' object has no attribute 'getnframes'
as you can see i writing frames into the file so i dont understand why its happend:
this is my code:
'''
Created on Jan 10, 2016
#author: matant
'''
import json
from math import pi, sin
import wave
import logging
import struct
import os
ROW_FREQ = (697, 770, 852, 941)
COL_FREQ = (1209, 1336, 1477, 1633)
SAMPLE_RATE = 44100
SAMPLE_WIDTH = 2
NUMBER_OF_CHANNELS = 1
COMPRESSION_TYPE = "NONE"
COMPRESSION_NAME = "Uncompressed"
PI2 = 6.283185306
scale = 32767 #16-bit unsigned short
keys= '1','2','3','A',\
'4','5','6','B',\
'7','8','9','C',\
'*','0','#','D'
FREQUENCY_MAP = dict()
FREQUENCY_MAP['1'] = (697, 1209)
FREQUENCY_MAP['2'] = (697, 1336)
FREQUENCY_MAP['3'] = (697, 1477)
FREQUENCY_MAP['A'] = (697, 1633)
FREQUENCY_MAP['4'] = (770, 1209)
FREQUENCY_MAP['5'] = (770, 1336)
FREQUENCY_MAP['6'] = (770, 1477)
FREQUENCY_MAP['B'] = (770, 1633)
FREQUENCY_MAP['7'] = (852, 1209)
FREQUENCY_MAP['8'] = (852, 1336)
FREQUENCY_MAP['9'] = (852, 1477)
FREQUENCY_MAP['C'] = (852, 1633)
FREQUENCY_MAP['*'] = (941, 1209)
FREQUENCY_MAP['0'] = (941, 1336)
FREQUENCY_MAP['#'] = (941, 1477)
FREQUENCY_MAP['D'] = (941, 1633)
FREQUENCY_MAP['S'] = (0, 0)
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s <%(levelname)s> %(module)s.%(funcName)s() %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
log = logging.getLogger(__name__)
class DTMF:
VALID_SEQUENCE_TYPES = [list, tuple, set]
def __init__(self, input_string=None, input_list=None):
"""
Initializes a DTMF instance with an option DTMF sequence. This can be a list of lists or a json string.
If both are supplied, it tries to parse the json_string. If it does, it uses that. If there are errors, it
validates the list and tries to use that. Basically input_string takes precedence.
General workflow would be setting dtmf_sequence and calling generate_raw_data. This data can then be saved to a
.wav file or compressed and saved as other, smaller, file formats.
:param input_list: list of lists or tuples of the form [['A', 100], ['S', 50], ['2', 100], ['S', 50]]
:param input_string: json_string of the form '[["A", 100], ["S", 50], ["2", 100], ["S", 50]]'
"""
log.debug("Creating instance of DTMF")
log.debug("input_string = {}".format(input_string))
log.debug("input_list = {}".format(input_list))
self._dtmf_sequence = None
self._raw_data = None
if input_string is not None:
converted_json_sequence = self.parse_json_string(input_string)
self._dtmf_sequence = converted_json_sequence
elif input_list is not None:
self._dtmf_sequence = input_list
#property
def dtmf_sequence(self):
return self._dtmf_sequence
#dtmf_sequence.setter
def dtmf_sequence(self, input_sequence):
if type(input_sequence) == str:
input_sequence = self.parse_json_string(input_sequence)
if type(input_sequence) == list:
if self._dtmf_sequence_is_valid(input_sequence):
self._dtmf_sequence = input_sequence
log.debug("Set _dtmf_sequence to {}".format(self._dtmf_sequence))
def parse_json_string(self, input_string):
return json.loads(input_string)
def generate_raw_data(self):
"""
Generates raw data that can be saved into a .wav file. This can take some time to generate.
:raise AttributeError: If no dtmf sequence has been set
"""
_data = list()
if self._dtmf_sequence is None:
raise AttributeError("No dtmf sequence set")
for tone_tuple in self._dtmf_sequence:
key = tone_tuple[0]
tone_duration = tone_tuple[1]
f1 = FREQUENCY_MAP[key][0]
f2 = FREQUENCY_MAP[key][1]
_data += (self.generate_tone(f1, f2, tone_duration))
self._raw_data = _data
def save_wave_file(self, file_path):
if self._raw_data is None or len(self._raw_data) < 1:
self.generate_raw_data()
f = wave.open(file_path, 'w')
f.setnchannels(NUMBER_OF_CHANNELS)
f.setsampwidth(SAMPLE_WIDTH)
f.setframerate(SAMPLE_RATE)
f.setnframes(len(self._raw_data))
f.setcomptype(COMPRESSION_TYPE, COMPRESSION_NAME)
log.info("Saving wav file {} THIS MAY TAKE A WHILE".format(file_path))
for i in self._raw_data:
f.writeframes(struct.pack('i', i))
log.info("Saved file to {0}".format(file_path))
f.close()
#staticmethod
def dtmf_sequence_is_valid(input_list):
"""
Validates an input sequence for proper structure and contents.
:param input_list:
:return:
"""
if type(input_list) is not list:
log.warning('input_list must be a list instance')
return False
if [(type(item) in DTMF.VALID_SEQUENCE_TYPES) for item in input_list].count(False) != 0:
log.warning('input_list contains invalid sequence type')
return False
for item in input_list:
if type(item[0]) != str or type(item[1]) != int:
log.debug("Type list[0]: {}".format(type(item[0])))
log.debug("Type list[1]: {}".format(type(item[1])))
log.warning('input_list must contain a list of sequences of [str, int]')
return False
return True
#staticmethod
def generate_tone(f1, f2, _duration_in_ms):
"""
Generates a single value representing a sample of two combined frequencies.
:param f1:
:param f2:
:param _duration_in_ms:
:return:
"""
assert f1 in ROW_FREQ or f1 == 0
assert f2 in COL_FREQ or f2 == 0
number_of_samples = int(SAMPLE_RATE * _duration_in_ms / 1000)
scale = 32767 # signed int / 2
result = list()
for i in range(number_of_samples):
p = i * 1.0 / SAMPLE_RATE
result.append(int((sin(p * f1 * pi * 2) + sin(p * f2 * pi * 2)) / 2 * scale))
log.info(
"Generated {0}ms tone of {1} samples with F1: {2} F2: {3}".format(_duration_in_ms, number_of_samples, f1,
f2))
return result
def create_dtmf_wave_file(self, input_sequence, file_path, dump_to_csv=False):
"""
A convenience method. Validates and assigns a dtmf_sequence, then generates data and saves to a .wav
:param input_sequence: list of lists or tuples of the form [['A', 100], ['S', 50], ['2', 100], ['S', 50]] or json_string of the form '[["A", 100], ["S", 50], ["2", 100], ["S", 50]]'
:param file_path: the full path of the wav file that will be saved
"""
self._dtmf_sequence = input_sequence
self.generate_raw_data()
try:
os.remove('dtmf_dump.csv')
except:
pass # file doesn't exist
if dump_to_csv:
with open('dtmf_dump.csv', 'w') as f:
for d in self._raw_data:
f.write(str(d))
f.write(",")
self.save_wave_file(file_path)
def read_wav(self):
fin = open('testNum.wav','r')
n = fin.getnframes()
d = fin.readframes(n)
fin.close()
data = []
for i in range(n):
#LS8bit = inv_endian(ord(d[2*i]))
#MS8bit = inv_endian(ord(d[2*i+1]))
LS8bit, MS8bit = ord(d[2*i]),ord(d[2*i+1])
data.append((MS8bit<<8)+LS8bit)
return data
# Decoder takes a DTMF signal file (.wav), sampled at 44,000
# 16-bit samples per second, and decode the corresponding symbol X.
def decoder(self):
data = self.read_wav()
temp = []
for f1 in ROW_FREQ:
for f2 in COL_FREQ:
diff = 0
for i in range(SAMPLE_RATE): #assume phase has not shifted dramatically
p = i*1.0/SAMPLE_RATE
S=int(scale+scale*(sin(p*f1*PI2)+sin(p*f2*PI2))/2)
diff += abs(S-data[i])
temp.append((diff,f1,f2))
f1,f2 = min(temp)[1:] #retrieve the frequency of minimum signal distortion
i, j = ROW_FREQ.index(f1), COL_FREQ.index(f2)
X = keys[4*i+j]
print 'Decoded key is: ', X
return X
if __name__ == '__main__':
d = 100
sample_input = [('0', d), ('5', d), ('0', d), ('8', d), ('6', d), ('9', d), ('0',d), ('1',d) , ('8',d),('6',d)]
d = DTMF()
d.create_dtmf_wave_file(sample_input, file_path='testNum.wav', dump_to_csv=True)
x = d.decoder()
fin = open('testNum.wav','r')
Looks like you're using the built-in open function instead of the one from the wave module. Try:
fin = wave.open('testNum.wav','r')
You have to operate with wave read object which could be returned from wave.open, that method would return file with Attribute your code are trying to access.
Also you add new from wave import open statement, so in that way you would overwrite default open method, but it's better to access wave open method thought dot natation as wave.open.