Extracting data from string with specific format using Python

Extracting data from string with specific format using Python - python

I am novice with Python and currently I am trying to use it to parse some custom output formated string. In fact format contains named lists of float and lists of tuples of float. I wrote a function but it looks excessive. How can it be done in more suitable way for Python?
import re
def extract_line(line):
line = line.lstrip('0123456789# ')
measurement_list = list(filter(None, re.split(r'\s*;\s*', line)))
measurement = {}
for elem in measurement_list:
elem_list = list(filter(None, re.split(r'\s*=\s*', elem)))
name = elem_list[0]
if name == 'points':
points = list(filter(None, re.split(r'\s*\(\s*|\s*\)\s*',elem_list[1].strip(' {}'))))
for point in points:
p = re.match(r'\s*(\d+(?:\.\d+)?)\s*,\s*(\d+(?:\.\d+)?)\s*', point).groups()
if 'points' not in measurement.keys():
measurement['points'] = []
measurement['points'].append(tuple(map(float,p)))
else:
values = list(filter(None, elem_list[1].strip(' {}').split(' ')))
for value in values:
if name not in measurement.keys():
measurement[name] = []
measurement[name].append(float(value))
return measurement
to_parse = '#10 points = { ( 2.96296 , 0.822213 ) ( 3.7037 , 0.902167 ) } ; L = { 5.20086 } ; P = { 3.14815 3.51852 } ;'
print(extract_line(to_parse))

You can do it using re.findall:
import re
to_parse = '#10 points = { ( 2.96296 , 0.822213 ) ( 3.7037 , 0.902167 ) } ; L = { 5.20086 } ; P = { 3.14815 3.51852 } ;'
m_list = re.findall(r'(\w+)\s*=\s*{([^}]*)}', to_parse)
measurements = {}
for k,v in m_list:
if k == 'points':
elts = re.findall(r'([0-9.]+)\s*,\s*([0-9.]+)', v)
measurements[k] = [tuple(map(float, elt)) for elt in elts]
else:
measurements[k] = [float(x) for x in v.split()]
print(measurements)
Feel free to put it in a function and to check if keys don't already exists.

This:
import re
a=re.findall(r' ([\d\.eE-]*) ',to_parse)
map(float, a)
>> [2.96296, 0.822213, 3.7037, 0.902167, 5.20086, 3.14815]
Will give you your list of numbers, is that what you look for?

Related

What's the best method to create a dictionary from outputs of multiple for loops

This is my code:
def get_coin_tickers(url):
req = requests.get(url)
# print(req.text)
resp = json.loads(req.text)
# print(resp.text)
return resp
pair_a_list = ["BTC_USDT", "EOS_USDT", "ETH_USDT"]
pair_b_list = ["SOL_USDT", "MATIC_USDT", "SUSHI_USDT"]
for pair_a in pair_a_list:
orderbook_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_a}&limit=5'
pair_a_prices_json = get_coin_tickers(orderbook_url)
pair_a_ask = pair_a_prices_json['data']['asks'][0][0]
pair_a_bid = pair_a_prices_json['data']['bids'][0][0]
for pair_b in pair_b_list:
orderbook_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_b}&limit=5'
pair_a_prices_json = get_coin_tickers(orderbook_url)
pair_b_ask = pair_a_prices_json['data']['asks'][0][0]
pair_b_bid = pair_a_prices_json['data']['bids'][0][0]
keys = ['pair_a', 'pair_a_ask', 'pair_a_bid', 'pair_b', 'pair_b_ask', 'pair_b_bid']
values = [pair_a, pair_a_ask, pair_a_bid, pair_b, pair_b_ask, pair_b_bid]
mydict = {k: v for (k, v) in zip(keys, values)}
print(mydict)
I'm able to create a Dictionary but with only one Symbol-Pairs from each list. Which seems to be outputs from only the last symbol pairs of both the lists:
{'pair_a': 'ETH_USDT', 'pair_a_ask': '1254.18', 'pair_a_bid': '1253.51', 'pair_b': 'SUSHI_USDT', 'pair_b_ask': '0.9815', 'pair_b_bid': '0.9795'}
I'm expecting to see a combined dictionary with values of both lists as keys (with their API-Values) in the final list (after) iterating through both lists using for Loops

Found a solution to my question based on suggestion from #JonSG, plus adapted to the suggestion. I've included the code below for others to use as and if the need arises.
for pair_a_list, pair_b_list, pair_c_list in zip(pair_a_list, pair_b_list, pair_c_list):
orderbook_a_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_a_list}&limit=1'
pair_a_prices_json = get_coin_tickers(orderbook_a_url)
pair_a_ask = pair_a_prices_json['data']['asks'][0][0]
pair_a_bid = pair_a_prices_json['data']['bids'][0][0]
my_dict_a = {
'pair_a_ask': pair_a_ask,
'pair_a_bid': pair_a_bid
}
orderbook_b_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_b_list}&limit=1'
pair_b_prices_json = get_coin_tickers(orderbook_b_url)
pair_b_ask = pair_b_prices_json['data']['asks'][0][0]
pair_b_bid = pair_b_prices_json['data']['bids'][0][0]
my_dict_b = {
'pair_b_ask': pair_b_ask,
'pair_b_bid': pair_b_bid
}
orderbook_c_url = f'https://api.pionex.com/api/v1/market/depth?symbol={pair_c_list}&limit=1'
pair_c_prices_json = get_coin_tickers(orderbook_c_url)
pair_c_ask = pair_c_prices_json['data']['asks'][0][0]
pair_c_bid = pair_c_prices_json['data']['bids'][0][0]
my_dict_c = {
'pair_c_ask': pair_c_ask,
'pair_c_bid': pair_c_bid
}
# (Use either option below.)
# my_dict = {**my_dict_a, **my_dict_b, **my_dict_c}
# my_dict = my_dict_a | my_dict_b | my_dict_c

How to replace and insert a new node into a ast tree using esprima for python?

I am having trouble trying to replace and insert a new node into the ast tree using esprima for python. There is an example on github but then it replaces all the nodes with the same node that I created but I just want one of them changed while keeping the rest of the tree intact.
from __future__ import print_function
import json
import esprima
from jscodegen_py import jscodegen
# Build a CallExpression expression statement manually:
# callee = esprima.nodes.Identifier("alert")
# args = [esprima.nodes.Literal("other alert", "'other alert'")]
# call = esprima.nodes.CallExpression(callee, args)
# other_alert = esprima.nodes.ExpressionStatement(call)
generator = jscodegen.CodeGenerator(indent = 2)
def js2ast(js: str):
return esprima.parseScript(js)
def ast2js(ast: dict):
return generator.generate(ast)
# Add a few expression statements using `parse()`:
af = {'Lg': {'RawString': 'var Lg = function(WN5, AN5) {\n return WN5 > AN5;\n };', 'RawValue': 'WN5 > AN5', 'operator': '>'}}
accessory_function_expression_statements = {}
for name in af:
accessory_function_expression_statements[name] = esprima.parse(af[name]['RawValue']).body[0]
class MyVisitor(esprima.NodeVisitor):
def transform_CallExpression(self, node, metadata):
# If the callee is an `alert()`, change it to `console.log()`:
if node.callee.name == 'Lg':
new_node_arguments = []
for item in node.arguments:
new_node_arguments.append(esprima.parse(generator.generate_expression(item.toDict(), 0)).body[0])
new_node = accessory_function_expression_statements['Lg'].expression
new_node.left = new_node_arguments[0].expression
new_node.right = new_node_arguments[1].expression
print(f'new_node: {new_node}')
return self.transform_Object(new_node, metadata) # every time this is called it will walk down the tree from the beginning
visitor = MyVisitor()
tree = esprima.parse("""
if (Lg(GP5["length"], 5)) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5,
lP5 = kP5;
var abc = Boolean(Lg(Jj, 21))
}
""", delegate=visitor)
print(ast2js(tree.toDict()))
But using this code it gives this result.
if (Jj > 21) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5, lP5 = kP5;
var abc = Boolean(Jj > 21);
}
As you can see the script in the code it replaces all the ifstatements to '(Jj > 21)' but I want it like this.
if (GP5.length > 5) {
var kP5 = window["parseInt"](GP5[5], 10);
lP5 = window["isNaN"](kP5) || dK(hA(1), kP5) ? window["Number"]["MAX_VALUE"] : kP5, lP5 = kP5;
var abc = Boolean(Jj > 21);
}
How can I do this using esprima in python?

How to get Specific values from printed value on Python and sort from high to small

I am trying to use binance api for my project I would like to list top gainers and sort them from high to small I tried couple of things but those did not work.
I would like to print only "symbol" and "priceChangePercent".
Is there any way to get these two values?
This is my output:
[
{
"symbol":"FIDABUSD",
"priceChange":"0.41800000",
"priceChangePercent":"6.375",
"weightedAvgPrice":"6.95111809",
"prevClosePrice":"6.54400000",
"lastPrice":"6.97500000",
"lastQty":"28.30000000",
"bidPrice":"6.97400000",
"bidQty":"74.80000000",
"askPrice":"6.97900000",
"askQty":"3.30000000",
"openPrice":"6.55700000",
"highPrice":"7.20000000",
"lowPrice":"6.47700000",
"volume":"354812.40000000",
"quoteVolume":"2466342.89060000",
"openTime":1633166019175,
"closeTime":1633252419175,
"firstId":78716,
"lastId":88805,
"count":10090
},
{
"symbol":"FIDABNB",
"priceChange":"0.00093000",
"priceChangePercent":"6.008",
"weightedAvgPrice":"0.01614960",
"prevClosePrice":"0.01546000",
"lastPrice":"0.01641000",
"lastQty":"109.10000000",
"bidPrice":"0.01643000",
"bidQty":"97.50000000",
"askPrice":"0.01649000",
"askQty":"140.60000000",
"openPrice":"0.01548000",
"highPrice":"0.01663000",
"lowPrice":"0.01533000",
"volume":"75225.50000000",
"quoteVolume":"1214.86161500",
"openTime":1633166016671,
"closeTime":1633252416671,
"firstId":8400,
"lastId":9840,
"count":1441
},
]
Here's what I tried:
class BinanceConnection:
def __init__(self, file):
self.connect(file)
""" Creates Binance client """
def connect(self, file):
lines = [line.rstrip('\n') for line in open(file)]
key = lines[0]
secret = lines[1]
self.client = Client(key, secret)
if __name__ == '__main__':
connection = BinanceConnection(filename)
prices = connection.client.get_ticker()
print(prices)

see below - a 1 liner
data = [
{
"symbol":"FIDABUSD",
"priceChange":"0.41800000",
"priceChangePercent":"6.375",
"weightedAvgPrice":"6.95111809",
"prevClosePrice":"6.54400000",
"lastPrice":"6.97500000",
"lastQty":"28.30000000",
"bidPrice":"6.97400000",
"bidQty":"74.80000000",
"askPrice":"6.97900000",
"askQty":"3.30000000",
"openPrice":"6.55700000",
"highPrice":"7.20000000",
"lowPrice":"6.47700000",
"volume":"354812.40000000",
"quoteVolume":"2466342.89060000",
"openTime":1633166019175,
"closeTime":1633252419175,
"firstId":78716,
"lastId":88805,
"count":10090
},
{
"symbol":"FIDABNB",
"priceChange":"0.00093000",
"priceChangePercent":"6.008",
"weightedAvgPrice":"0.01614960",
"prevClosePrice":"0.01546000",
"lastPrice":"0.01641000",
"lastQty":"109.10000000",
"bidPrice":"0.01643000",
"bidQty":"97.50000000",
"askPrice":"0.01649000",
"askQty":"140.60000000",
"openPrice":"0.01548000",
"highPrice":"0.01663000",
"lowPrice":"0.01533000",
"volume":"75225.50000000",
"quoteVolume":"1214.86161500",
"openTime":1633166016671,
"closeTime":1633252416671,
"firstId":8400,
"lastId":9840,
"count":1441
}
]
data = sorted([{'symbol':x['symbol'],'priceChangePercent':x['priceChangePercent']} for x in data],key = lambda k: float(k['priceChangePercent']), reverse=True)
print(data)
output
[{'symbol': 'FIDABUSD', 'priceChangePercent': '6.375'}, {'symbol': 'FIDABNB', 'priceChangePercent': '6.008'}]

try this:
data = *your data*
newlist = list()
for item in data:
newlist.append({key:item[key] for key in ['symbol', 'priceChange']})
print(sorted(newlist, key=lambda k: k['priceChange'], reverse=True) )

How to get the minimum and maximum values in a string?

files = ['foo.0001.jpg', 'test2.0003.jpg', 'foo.0004.jpg', 'tmp.txt',
'foo.0003.jpg', 'test2.0002.jpg', 'test2.0004.jpg', 'test.0002.jpg',
'foo.0002.jpg', 'foo.0005.jpg', 'test.0001.jpg']
and I want foo.####.jpg and min, max print
test.####.jpg and min, max print
test2.####.jpg and min, max print
def get_frame_number(files):
for c in foo:
value = files.get(c)
for i in value:
num = i.split(".")[1]
num_list.append(int(num))
print str(min(num_list)) + "-" + str(max(num_list))
I have a function. but couldn't figure it out.

You can use re to try to pull the number out of your file name. Then use this function as the key argument to max and min respectively.
import re
def get_frame_number(file):
match = re.match(r'[\w\d]+\.(\d+)\.jpg', file)
if match:
return int(match.group(1))
else:
return float('nan')
>>> max(files, key=get_frame_number)
'foo.0005.jpg'
>>> min(files, key=get_frame_number)
'foo.0001.jpg'

An option would be using key arg (with lambda function) of max() and min() built-in functions like this:
for fn in ('foo', 'test', 'test2'):
fn_max = max(
(name for name in files if name.startswith('{}.'.format(fn))),
key=lambda name: int(name.split('.')[1]))
fn_min = min(
(name for name in files if name.startswith('{}.'.format(fn))),
key=lambda name: int(name.split('.')[1]))
print(fn, fn_max, fn_min)
Output:
('foo', 'foo.0005.jpg', 'foo.0001.jpg')
('test', 'test.0002.jpg', 'test.0001.jpg')
('test2', 'test2.0004.jpg', 'test2.0002.jpg')

import re
foo = re.findall( r'(foo\.\d+.jpg)','|'.join( sorted(files) ) )
foo[0], foo[-1]
Output :
('foo.0001.jpg', 'foo.0005.jpg')
Similarly you can check for min, max of other files:
test = re.findall( r'(test\.\d+.jpg)','|'.join( sorted(files) ) )
test[0], test[-1]
test2 = re.findall( r'(test2\.\d+.jpg)','|'.join( sorted(files) ) )
test2[0], test2[-1]
Putting all together in one liner:
[ ( i[0], i[-1] ) for i in [ re.findall( r'('+ j + '\.\d+.jpg)','|'.join( sorted(files) ) ) for j in ['foo','test','test2'] ] ]
Output:
[('foo.0001.jpg', 'foo.0005.jpg'),
('test.0001.jpg', 'test.0002.jpg'),
('test2.0002.jpg', 'test2.0004.jpg')]

def get_frame_number(files,name):
nums = []
for each in files:
parts = each.strip().split('.')
if parts[0] == name:nums.append(int(parts[1]))
else:print("Ignoring",each)
return(sorted(nums)[0],sorted(nums)[-1])
Try this with :
print(get_frame_number(files,"test"))
print(get_frame_numbers(files,"test2"))
print(get_frame_numbers(files,"foo"))

How to replace text in curly brackets with another text based on comparisons using Python Regex

I am quiet new to regular expressions. I have a string that looks like this:
str = "abc/def/([default], [testing])"
and a dictionary
dict = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
and using Python RE, I want str in this form, after comparisons of each element in dict to str:
str = "abc/def/(2.7, 2.1)"
Any help how to do it using Python RE?
P.S. its not the part of any assignment, instead it is the part of my project at work and I have spent many hours to figure out solution but in vain.

import re
st = "abc/def/([default], [testing], [something])"
dic = {'abc/def/[default]' : '2.7',
'abc/def/[testing]' : '2.1',
'bcd/xed/[something]' : '3.1'}
prefix_regex = "^[\w*/]*"
tag_regex = "\[\w*\]"
prefix = re.findall(prefix_regex, st)[0]
tags = re.findall(tag_regex, st)
for key in dic:
key_prefix = re.findall(prefix_regex, key)[0]
key_tag = re.findall(tag_regex, key)[0]
if prefix == key_prefix:
for tag in tags:
if tag == key_tag:
st = st.replace(tag, dic[key])
print st
OUTPUT:
abc/def/(2.7, 2.1, [something])

Here is a solution using re module.
Hypotheses :
there is a dictionary whose keys are composed of a prefix and a variable part, the variable part is enclosed in brackets ([])
the values are strings by which the variable parts are to be replaced in the string
the string is composed by a prefix, a (, a list of variable parts and a )
the variable parts in the string are enclosed in []
the variable parts in the string are separated by a comma followed by optional spaces
Python code :
import re
class splitter:
pref = re.compile("[^(]+")
iden = re.compile("\[[^]]*\]")
def __init__(self, d):
self.d = d
def split(self, s):
m = self.pref.match(s)
if m is not None:
p = m.group(0)
elts = self.iden.findall(s, m.span()[1])
return p, elts
return None
def convert(self, s):
p, elts = self.split(s)
return p + "(" + ", ".join((self.d[p + elt] for elt in elts)) + ")"
Usage :
s = "abc/def/([default], [testing])"
d = {'abc/def/[default]' : '2.7', 'abc/def/[testing]' : '2.1'}
sp = splitter(d)
print(sp.convert(s))
output :
abc/def/(2.7, 2.1)

Regex is probably not required here. Hope this helps
lhs,rhs = str.split("/(")
rhs1,rhs2 = rhs.strip(")").split(", ")
lhs+="/"
print "{0}({1},{2})".format(lhs,dict[lhs+rhs1],dict[lhs+rhs2])
output
abc/def/(2.7,2.1)

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Extracting data from string with specific format using Python - python

This: import re a=re.findall(r' ([\d\.eE-]*) ',to_parse) map(float, a) >> [2.96296, 0.822213, 3.7037, 0.902167, 5.20086, 3.14815] Will give you your list of numbers, is that what you look for?

Related

What's the best method to create a dictionary from outputs of multiple for loops

How to replace and insert a new node into a ast tree using esprima for python?

How to get Specific values from printed value on Python and sort from high to small

How to get the minimum and maximum values in a string?

How to replace text in curly brackets with another text based on comparisons using Python Regex

Categories

Resources