Django split String from database - python

I try to split a String from a database (SQLite). The String has a linebreak \n and I want split it there in 2 parts. It works with a normal String for example text = "Hello \n World". But if I use the string from my database it doesn't work (the text is saved correctly with \n in the database!!)
My Code for getting the first part of the string:
from django import template
from products.models import News
register = template.Library()
#register.simple_tag
def get_first_title(id):
search_value = "\n"
news = News.objects.values('title')
title = news.filter(pk=id)
number = str(title).find(search_value)
first_title = str(title)[0:number]
return first_title

Try this:
#register.simple_tag
def get_first_title(id):
news = News.objects.get(pk=id)
return news.title.split("\n")[0]
Although you should probably catch posible exceptions with:
#register.simple_tag
def get_first_title(id):
try:
news = News.objects.get(pk=id)
splitted_title = news.title.split("\n")
if splitted_title:
return splitted_title[0]
except News.DoesNotExist:
# raise exception or return None or something

Use the following code instead of your code in the last 4 lines
title = news.filter(pk=id)
number = str(title).split(" ")
# number is a list of strings
print(number , len(number))

Related

How to escape all HTML entities in show_popup() method and fix Parse Error in Sublime Text 3 plugin?

I am making a plugin for Sublime Text 3. It contacts my server in Java and receives a response in the form of a list of strings, that contains C code.
To display this code in a popup window you need to pass a string in HTML format to the method show_popup. Accordingly, all C-code characters that can be recognized by the parser as HTML entities should be replaced with their names (&name;) or numbers (&#number;). At first, I just replaced the most common characters with replace(), but it didn't always work out - Parse Error was displayed in the console:
Parse Error: <br> printf ("Decimals: %d %ld\n", 1977, 650000L);
<br> printf ("Preceding with blanks:&nbs
...
y</a></li><p><b>____________________________________________________</b></p>
</ul>
</body>
code: Unexpected character
I've tried to escape html entities with Python's html library:
import html
...
html.escape(string)
But Sublime doesn't see import and print in console that I was using a function without defining it - I guess he didn't see that I connected this library(Whyyy?). cgi.escape - is depricated, so I can't use this. I decided to write the function myself.
Then I saw a very interesting way to replace all the characters whose code is >127 and some other characters (&, <,>) with their numbers:
def escape_html (s):
out = ""
i = 0
while i < len(s):
c = s[i]
number = ord(c)
if number > 127 or c == '"' or c == '\'' or c == '<' or c == '>' or c == '&':
out += "&#"
out += str(number)
out += ";"
else:
out += c
i += 1
out = out.replace(" ", " ")
out = out.replace("\n", "<br>")
return out
This code works perfectly for displaying characters in a browser, but unfortunately it is not supported by Sublime Text 3.
As a result, I came to the conclusion that these characters should be replaced with their equivalent names:
def dumb_escape_html(s):
entities = [["&", "&"], ["<", "<"], [">", ">"], ["\n", "<br>"],
[" ", " "]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
But again I faced an obstacle: not all names are supported in Sublime. And again an error Parse Error.
I also attach a link to JSON file, which contains answer from my server, content of which should be displayed in pop-up window: Example of data from sever (codeshare.io)
I absolutely do not understand, in what I make a mistake - I hope, that great programmers know how to solve my problem.
Edit. Minimal, Reproducible Example:
import sublime
import sublime_plugin
import string
import sys
import json
def get_func_name(line, column):
return "printf"
def get_const_data(func_name):
input_file = open ("PATH_TO_JSON/data_printf.json")
results = json.load(input_file)
return results
def dumb_escape_html(s):
entities = [["&", "&"], ["<", "<"], [">", ">"], ["\n", "<br>"],
[" ", " "]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
def dumb_unescape_html(s):
entities = [["<", "<"], [">", ">"], ["<br>", "\n"],
[" ", " "], ["&", "&"]]
for entity in entities:
s = s.replace(entity[0], entity[1])
return s
class CoderecsysCommand(sublime_plugin.TextCommand):
def run(self, edit):
v = self.view
cur_line = v.substr(v.line(v.sel()[0]))
for sel in v.sel():
line_begin = v.rowcol(sel.begin())[0]
line_end = v.rowcol(sel.end())[0]
pos = v.rowcol(v.sel()[0].begin()) # (row, column)
try:
func_name = get_func_name(cur_line, pos[1]-1)
li_tree = ""
final_data = get_const_data(func_name)
for i in range(len(final_data)):
source = "source: " + final_data[i]["source"]
escaped = dumb_escape_html(final_data[i]["code"])
divider = "<b>____________________________________________________</b>"
li_tree += "<li><p>%s</p>%s <a href='%s'>Copy</a></li><p>%s</p>" %(source, escaped, escaped, divider)
# The html to be shown.
html = """
<body id=copy-multiline>
Examples of using <b>%s</b> function.
<ul>
%s
</ul>
</body>
""" %(func_name, li_tree)
self.view.show_popup(html, max_width=700, on_navigate=lambda example: self.copy_example(example, func_name, source))
except Exception as ex:
self.view.show_popup("<b style=\"color:#1c87c9\">CodeRec Error:</b> " + str(ex), max_width=700)
def copy_example(self, example, func_name, source):
# Copies the code to the clipboard.
unescaped = dumb_unescape_html(example)
unescaped = "// " + source + unescaped
sublime.set_clipboard(unescaped)
self.view.hide_popup()
sublime.status_message('Example of using ' + func_name + ' copied to clipboard !')

Parsing Json with multiple "levels" with Python

I'm trying to parse a json file from an api call.
I have found this code that fits my need and trying to adapt it to what I want:
import math, urllib2, json, re
def download():
graph = {}
page = urllib2.urlopen("http://fx.priceonomics.com/v1/rates/?q=1")
jsrates = json.loads(page.read())
pattern = re.compile("([A-Z]{3})_([A-Z]{3})")
for key in jsrates:
matches = pattern.match(key)
conversion_rate = -math.log(float(jsrates[key]))
from_rate = matches.group(1).encode('ascii','ignore')
to_rate = matches.group(2).encode('ascii','ignore')
if from_rate != to_rate:
if from_rate not in graph:
graph[from_rate] = {}
graph[from_rate][to_rate] = float(conversion_rate)
return graph
And I've turned it into:
import math, urllib2, json, re
def download():
graph = {}
page = urllib2.urlopen("https://bittrex.com/api/v1.1/public/getmarketsummaries")
jsrates = json.loads(page.read())
for pattern in jsrates['result'][0]['MarketName']:
for key in jsrates['result'][0]['Ask']:
matches = pattern.match(key)
conversion_rate = -math.log(float(jsrates[key]))
from_rate = matches.group(1).encode('ascii','ignore')
to_rate = matches.group(2).encode('ascii','ignore')
if from_rate != to_rate:
if from_rate not in graph:
graph[from_rate] = {}
graph[from_rate][to_rate] = float(conversion_rate)
return graph
Now the problem is that there is multiple level in the json "Result > 0, 1,2 etc"
json screenshot
for key in jsrates['result'][0]['Ask']:
I want the zero to be able to be any number, I don't know if thats clear.
So I could get all the ask price to match their marketname.
I have shortened the code so it doesnt make too long of a post.
Thanks
PS: sorry for the english, its not my native language.
You could loop through all of the result values that are returned, ignoring the meaningless numeric index:
for result in jsrates['result'].values():
ask = result.get('Ask')
if ask is not None:
# Do things with your ask...

Python; How to replace escaped non-unicode characters with their respective 'real' utf-8

I am relatively new to programming, and I have a small problem writing a python equivalent of Snip for spotify for ubuntu(linux)
Somehow i can encode the title correctly, but am unable to encode the artist the same way
when i try to encode the artist in the same fashion i get this:
File "./songfinder.py", line 11, in currentplaying
artiststr = str((metadata['xesam:artist']).encode('utf-8'))
AttributeError: 'dbus.Array' object has no attribute 'encode'
however the title is done exactly the same and that is working.
Code so far IS working but has for example \xd8 instead of Ø, and similar:
import dbus
session_bus = dbus.SessionBus()
spotify_bus = session_bus.get_object("org.mpris.MediaPlayer2.spotify", "/org/mpris/MediaPlayer2")
spotify_properties = dbus.Interface(spotify_bus, "org.freedesktop.DBus.Properties")
def currentplaying():
metadata = spotify_properties.Get("org.mpris.MediaPlayer2.Player", "Metadata")
title = str((metadata['xesam:title']).encode('utf-8'))
artiststr = str((metadata['xesam:artist']))
if ("dbus.string" in artiststr.lower()):
artists = artiststr.split("(u")
artist = artists[1]
artists = artist.split(")],")
artist = artists[0]
artist = artist.replace("(u", "")
else:
artist = "'unknown'"
artist = (artist.replace("'",""))
playing = (artist + " - " + title + " ")
return playing
#save playing to file.txt
relevant qna's:
Replace non-ascii chars from a unicode string in Python
Why it does not resolve my problem: I would like to print/save the actual character, not replace it with similar ones
Looking at your question metadata contains at least something like this with Unicode strings. The artist field seems to be some sort of iterable the begins with the artist. Something like this (feel free to post actual metadata content):
metadata = {'xesam:title':u'title','xesam:artist':[u'artist']}
In the title assignment line, str is unnecessary since encoding a Unicode string returns a str anyway, but no need to encode it either. Unicode strings represent text, so leave it that way:
title = metadata['xesam:title']
Similar for artist assignment, but get the first element of the iterable:
artist = metadata['xesam:artist'][0]
Next, in your song-updating logic, use io.open to open the files with a UTF-8 encoding. This lets Unicode strings (text) be written directly and the file will handle the encoding. Also use a with statement to automatically close the file when the with ends.
Program with recommended changes:
import time
import dbus
import io
session_bus = dbus.SessionBus()
spotify_bus = session_bus.get_object("org.mpris.MediaPlayer2.spotify", "/org/mpris/MediaPlayer2")
spotify_properties = dbus.Interface(spotify_bus, "org.freedesktop.DBus.Properties")
def currentplaying():
metadata = spotify_properties.Get("org.mpris.MediaPlayer2.Player", "Metadata")
title = metadata['xesam:title']
artist = metadata['xesam:artist'][0]
playing = artist + " - " + title + " "
return playing
while True:
with io.open('currentsongspotify.txt', encoding='utf8') as filetxt:
oldtitle = filetxt.read()
newtitle = currentplaying()
if newtitle == oldtitle:
time.sleep(1)
else:
with io.open('currentsongspotify.txt','w',encoding='utf8') as filetxt: # save newtitle to file, overwriting existing data
filetxt.write(newtitle)
print 'new file saved:',newtitle
The error you getting is not about unicode, it is about wrong type. Python complains that you trying to call string method encode from the array object. Which does not have this method.
The first this I would try is to remove redundant brackets here it getting artiststr like this: artiststr = str(metadata['xesam:artist']).
But I'm not sure this would work. If it doesn't work, you need to find out what type has metadata['xesam:artist']. Looks like it is not string, but array. So you need to fix the code which fills metadata['xesam:artist'] with data. You can try to use debugger or just print() function to find out the content of metadata['xesam:artist']. Or provide the relevant code in you question too.
Final program, feel free to use if you like:
import time
import dbus
session_bus = dbus.SessionBus()
spotify_bus = session_bus.get_object("org.mpris.MediaPlayer2.spotify", "/org/mpris/MediaPlayer2")
spotify_properties = dbus.Interface(spotify_bus, "org.freedesktop.DBus.Properties")
def currentplaying():
metadata = spotify_properties.Get("org.mpris.MediaPlayer2.Player", "Metadata")
title = str((metadata['xesam:title']).encode('utf-8'))
artiststr = str((metadata['xesam:artist'])[0].encode('utf-8'))
artist = artiststr
playing = (artist + " - " + title + " ")
return playing
while True:
filetxt = open("/home/USER/Desktop/currentsongspotify.txt", "r")
oldtitle = filetxt.read()
filetxt.close()
newtitle = str(currentplaying())
if(newtitle == oldtitle):
time.sleep(1)
else:
filetxt = open("/home/USER/Desktop/currentsongspotify.txt", "w") #save newtitle to file, overwriting existing data
filetxt.write(str(newtitle))
print("new file saved: " + newtitle)

parsing a string in python for #hashtag

I am wondering, how could I make an algorithm that parses a string for the hashtag symbol ' # ' and returns the full string, but where ever a word starts with a '#' symbol, it becomes a link. I am using python with Google app engine: webapp2 and Jinja2 and I am building a blog.
Thanks
A more efficient and complete way to find the "hashwords":
import functools
def hash_position(string):
return string.find('#')
def delimiter_position(string, delimiters):
positions = filter(lambda x: x >= 0, map(lambda delimiter: string.find(delimiter), delimiters))
try:
return functools.reduce(min, positions)
except TypeError:
return -1
def get_hashed_words(string, delimiters):
maximum_length = len(string)
current_hash_position = hash_position(string)
string = string[current_hash_position:]
results = []
counter = 0
while current_hash_position != -1:
current_delimiter_position = delimiter_position(string, delimiters)
if current_delimiter_position == -1:
results.append(string)
else:
results.append(string[0:current_delimiter_position])
# Update offsets and the haystack
string = string[current_delimiter_position:]
current_hash_position = hash_position(string)
string = string[current_hash_position:]
return results
if __name__ == "__main__":
string = "Please #clarify: What do you #mean with returning somthing as a #link. #herp"
delimiters = [' ', '.', ',', ':']
print(get_hashed_words(string, delimiters))
Imperative code with updates of the haystack looks a little bit ugly but hey, that's what we get for (ab-)using mutable variables.
And I still have no idea what do you mean with "returning something as a link".
Hope that helps.
not sure where do you get the data for the link, but maybe something like:
[('%s' % word) for word in input.split() if word[0]=='#']
Are you talking about twitter? Maybe this?
def get_hashtag_link(hashtag):
if hashtag.startswith("#"):
return '%s' % (hashtag[1:], hashtag)
>>> get_hashtag_link("#stackoverflow")
'#stackoverflow'
It will return None if hashtag is not a hashtag.

Django, custom template filters - regex problems

I'm trying to implement a WikiLink template filter in Django that queries the database model to give different responses depending on Page existence, identical to Wikipedia's red links. The filter does not raise an Error but instead doesn't do anything to the input.
WikiLink is defined as: [[ThisIsAWikiLink | This is the alt text]]
Here's a working example that does not query the database:
from django import template
from django.template.defaultfilters import stringfilter
from sites.wiki.models import Page
import re
register = template.Library()
#register.filter
#stringfilter
def wikilink(value):
return re.sub(r'\[\[ ?(.*?) ?\| ?(.*?) ?\]\]', r'\2', value)
wikilink.is_safe = True
The input (value) is a multi-line string, containing HTML and many WikiLinks.
The expected output is substituting [[ThisIsAWikiLink | This is the alt text]] with
This is the alt text
or if "ThisIsAWikiLink" doesn't exist in the database:
This is the alt text
and returning value.
Here's the non-working code (edited in response to comments/answers):
from django import template
from django.template.defaultfilters import stringfilter
from sites.wiki.models import Page
import re
register = template.Library()
#register.filter
#stringfilter
def wikilink(value):
m = re.match(r'\[\[ ?(.*?) ?\| ?(.*?) ?\]\]', value)
if(m):
page_alias = m.group(2)
page_title = m.group(3)
try:
page = Page.objects.get(alias=page_alias)
return re.sub(r'(\[\[)(.*)\|(.*)(\]\])', r'\3', value)
except Page.DoesNotExist:
return re.sub(r'(\[\[)(.*)\|(.*)(\]\])', r'\3', value)
else:
return value
wikilink.is_safe = True
What the code needs to do is:
extract all the WikiLinks in value
query the Page model to see if the page exists
substitute all the WikiLinks with normal links, styled dependent on each wikipage existence.
return the altered value
The updated question is:
What regular expression (method) can return a python List of WikiLinks, which can be altered and used to substitute the original matches (after being altered).
Edit:
I'd like to do something like this:
def wikilink(value):
regex = re.magic_method(r'\[\[ ?(.*?) ?\| ?(.*?) ?\]\]', value)
foreach wikilink in regex:
alias = wikilink.group(0)
text = wikilink.group(1)
if(alias exists in Page):
regex.sub(""+ text +"")
else:
regex.sub("<a href="+alias+" class='redlink'>"+ text +"</a>")
return value
If your string contains other text in addition to the wiki-link, your filter won't work because you are using re.match instead of re.search. re.match matches at the beginning of the string. re.search matches anywhere in the string. See matching vs. searching.
Also, your regex uses the greedy *, so it won't work if one line contains multiple wiki-links. Use *? instead to make it non-greedy:
re.search(r'\[\[(.*?)\|(.*?)\]\]', value)
Edit:
As for tips on how to fix your code, I suggest that you use re.sub with a callback. The advantages are:
It works correctly if you have multiple wiki-links in the same line.
One pass over the string is enough. You don't need a pass to find wiki-links, and another one to do the replacement.
Here is a sketch of the implmentation:
import re
WIKILINK_RE = re.compile(r'\[\[(.*?)\|(.*?)\]\]')
def wikilink(value):
def wikilink_sub_callback(match_obj):
alias = match_obj.group(1).strip()
text = match_obj.group(2).strip()
if(alias exists in Page):
class_attr = ''
else:
class_attr = ' class="redlink"'
return '<a href="%s"%s>%s</a>' % (alias, class_attr, text)
return WIKILINK_RE.sub(wikilink_sub_callback, value)
This is the type of problem that falls quickly to a small set of unit tests.
Pieces of the filter that can be tested in isolation (with a bit of code restructuring):
Determining whether or not value contains the pattern you're looking for
What string gets generated if there is a matching Page
What string gets generated is there isn't a matching Page
That would help you isolate where things are going wrong. You'll probably find that you'll need to rewire the regexps to account for optional spaces around the |.
Also, on first glance it looks like your filter is exploitable. You're claiming the result is safe, but you haven't filtered the alt text for nasties like script tags.
Code:
import re
def page_exists(alias):
if alias == 'ThisIsAWikiLink':
return True
return False
def wikilink(value):
if value == None:
return None
for alias, text in re.findall('\[\[\s*(.*?)\s*\|\s*(.*?)\s*\]\]',value):
if page_exists(alias):
value = re.sub('\[\[\s*%s\s*\|\s*%s\s*\]\]' % (alias,text), '%s' % (alias, text),value)
else:
value = re.sub('\[\[\s*%s\s*\|\s*%s\s*\]\]' % (alias,text), '%s' % (alias, text), value)
return value
Sample results:
>>> import wikilink
>>> wikilink.wikilink(None)
>>> wikilink.wikilink('')
''
>>> wikilink.wikilink('Test')
'Test'
>>> wikilink.wikilink('[[ThisIsAWikiLink | This is the alt text]]')
'This is the alt text'
>>> wikilink.wikilink('[[ThisIsABadWikiLink | This is the alt text]]')
'This is the alt text'
>>> wikilink.wikilink('[[ThisIsAWikiLink | This is the alt text]]\n[[ThisIsAWikiLink | This is another instance]]')
'This is the alt text\nThis is another instance'
>>> wikilink.wikilink('[[ThisIsAWikiLink | This is the alt text]]\n[[ThisIsAWikiLink | This is another instance]]')
General comments:
findall is the magic re function you're looking for
Change page_exists to run whatever query you want
Vulnerable to HTML injection (as mentioned by Dave W. Smith above)
Having to recompile the regex on each iteration is inefficient
Querying the database each time is inefficient
I think you'd run into performance issues pretty quickly with this approach.
This is the working code in case someone needs it:
from django import template
from django.template.defaultfilters import stringfilter
from sites.wiki.models import Page
import re
register = template.Library()
#register.filter
#stringfilter
def wikilink(value):
WIKILINK_RE = re.compile(r'\[\[ ?(.*?) ?\| ?(.*?) ?\]\]')
def wikilink_sub_callback(match_obj):
alias = match_obj.group(1).strip()
text = match_obj.group(2).strip()
class_attr = ''
try:
Page.objects.get(alias=alias)
except Page.DoesNotExist:
class_attr = ' class="redlink"'
return '<a href="%s"%s>%s</a>' % (alias, class_attr, text)
return WIKILINK_RE.sub(wikilink_sub_callback, value)
wikilink.is_safe = True
Many thanks for all the answers!

Categories

Resources