How I can print characters from various alphabets in python 3?

How I can print characters from various alphabets in python 3? - python

I am trying to work with cyrillic alphabet and latin alphabet with central european characters, however I am not able to print cyrillic characters. Take a look at the sample code below.
# -*- coding: utf-8 -*-
print("ň")
print("ф")
I've been able to output "ň", once I have set "encoding": "cp1250" in Python.sublime-settings, but unfortunately I have not found any means of displaying cyrillic character.
Thanks for any help.
------------------------Edit--------------------------
meanwhile I've put together code, that works in Ubuntu 13.04 environment but throwing exception in Win 7.
Exception:
Traceback (most recent call last):
File "C:\Users\branislavs\docs\personal\scripts\playground.py", line 6, in <module>
for line in data:
File "C:\Python34\lib\encodings\cp1250.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 51: character maps to <undefined>
Environment:
Win 7
Python 3.4,
Sublime Text 2,
I am trying to output it on windows console.
What should in win to make it work?
I am pasting the code as well:
import sys
char_mapping = ("абвгґдезіийклмнопрстуфъыьцчжшАБВГҐДЕЗIИЙКЛМНОПРСТУФЪЫЬЦЧЖШ",
"abvhgdeziyjklmnoprstuf'ŷ'cčžšABVHGDEZIYJKLMNOPRSTUF'Ŷ'cČŽŠ")
syllable_mapping = {
"special": { #di ti ni li, da ta na la, cja, cji, sja, sji, rja, rji
"ďi": "дї",
"Ďi": "Дї",
"ťi": "тї",
"Ťi": "Тї",
"ňi": "нї",
"Ňi": "Нї",
"ľi": "лї",
"Ľi": "Лї",
"ďa": "дя",
"Ďa": "Дя",
"ťa": "тя",
"Ťa": "Тя",
"ňa": "ня",
"Ňa": "Hя",
"ľa": "ля",
"Ľa": "Ля",
"c'a": "ця",
"c'a": "Ця",
"c'i": "цї",
"C'i": "Цї",
"c'o": "цё",
"C'o": "Цё",
"s'a": "ся",
"S'a": "Ся",
"s'i": "сї",
"S'i": "Сї",
"s'o": "сё",
"S'o": "Сё",
"r'a": "ря",
"R'a": "Ря",
"r'i": "рї",
"R'i": "Рї",
"r'o": "рё",
"R'o": "Рё",
"z'a": "зя",
"Z'a": "Зя",
"z'i": "зї",
"Z'i": "Зї",
"z'o": "зё",
"Z'o": "Зё",
},
"carons": {
"ď": "дь",
"Ď": "Дь",
"ť": "ть",
"Ť": "Ть",
"ň": "нь",
"Ň": "Нь",
"ľ": "ль",
"Ľ": "Ль",
},
"basic" : {
"ja": "я",
"Ja": "Я",
"ju": "ю",
"Ju": "Ю",
"je": "є",
"Je": "Є",
"ch": "х",
"Ch": "X",
"'o": "ё",
"'O": "Ë",
"x": "кc",
"X": "Кc",
"šč": "щ",
"Šč": "Щ",
"ji": "ї",
"c'" : "ць",
"C'" : "Ць",
"s'" : "сь",
"S'" : "Сь",
"r'" : "рь",
"R'" : "Рь",
"z'" : "зь",
"Z'" : "Зь",
}
}
tr_azb_lat = {ord(a):ord(b) for a, b in zip(*char_mapping)}
tr_lat_azb = {ord(b):ord(a) for a, b in zip(*char_mapping)}
def map_syllables_azb_lat(string, mapping_option):
for rule in syllable_mapping[mapping_option]:
string = string.replace(syllable_mapping[mapping_option][rule], rule)
return string
def translit_azb_lat(string):
string = map_syllables_azb_lat(string, 'special')
string = map_syllables_azb_lat(string, 'carons')
string = map_syllables_azb_lat(string, 'basic')
return string.translate(tr_azb_lat).encode('utf-8').decode(sys.stdout.encoding)
def map_syllables_lat_azb(string, mapping_option):
for rule in syllable_mapping[mapping_option]:
string = string.replace(rule, syllable_mapping[mapping_option][rule])
return string
def translit_lat_azb(string):
string = map_syllables_lat_azb(string, 'special')
string = map_syllables_lat_azb(string, 'carons')
string = map_syllables_lat_azb(string, 'basic')
return string.translate(tr_lat_azb).encode('utf-8').decode(sys.stdout.encoding)

Related

Replacing elements of a Python dictionary using regex

I have been trying to replace integer components of a dictionary with string values given in another dictionary. However, I am getting the following error:
Traceback (most recent call last):
File "<string>", line 11, in <module>
File "/usr/lib/python3.11/json/__init__.py", line 346, in loads
return _default_decoder.decode(s)
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.11/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 14 (char 13)
The code has been given below. Not sure why I am getting an error.
import re
from json import loads, dumps
movable = {"movable": [0, 3, 6, 9], "fixed": [1, 4, 7, 10], "mixed": [2, 5, 8, 11]}
int_mapping = {0: "Ar", 1: "Ta", 2: "Ge", 3: "Ca", 4: "Le", 5: "Vi", 6: "Li", 7: "Sc", 8: "Sa", 9: "Ca", 10: "Aq", 11: "Pi"}
movable = dumps(movable)
for key in int_mapping.keys():
movable = re.sub('(?<![0-9])' + str(key) + '(?![0-9])', int_mapping[key], movable)
movable = loads(movable)
I understand that this code can easily be written in a different way to get the desired output. However, I am interested to understand what I am doing wrong.

If you print how movable looks like right before calling json.loads, you'll see what the problem is:
for key in int_mapping.keys():
movable = re.sub('(?<![0-9])' + str(key) + '(?![0-9])', int_mapping[key], movable)
print(movable)
outputs:
{"movable": [Ar, Ca, Li, Ca], "fixed": [Ta, Le, Sc, Aq], "mixed": [Ge, Vi, Sa, Pi]}
Those strings (Ar, Ca...) are not quoted, therefore it is not valid JSON.
If you choose to continue the way you're going, you must add the ":
movable = re.sub(
'(?<![0-9])' + str(key) + '(?![0-9])',
'"' + int_mapping[key] + '"',
movable)
(notice the '"' + int_mapping[key] + '"')
Which produces:
{"movable": ["Ar", "Ca", "Li", "Ca"], "fixed": ["Ta", "Le", "Sc", "Aq"], "mixed": ["Ge", "Vi", "Sa", "Pi"]}
This said... you are probably much better off by just walking the movable values and substituting them by the values in int_mapping. Something like:
mapped_movable = {}
for key, val in movable.items():
mapped_movable[key] = [int_mapping[v] for v in val]
print(mapped_movable)

You could use a dict comprehension and make the mapping replacements directly in Python:
...
movable = {
k: [int_mapping[v] for v in values]
for k, values in movable.items()
}
print(type(movable))
print(movable)
Out:
<type 'dict'>
{'mixed': ['Ge', 'Vi', 'Sa', 'Pi'], 'fixed': ['Ta', 'Le', 'Sc', 'Aq'], 'movable': ['Ar', 'Ca', 'Li', 'Ca']}

Extracting JSON to PHP

I have this JSON data:
{
'total': 4744.134525437842,
'produksiHarian': [14.800870530853988, 15.639301040842536, 16.358413710544085, 16.952318230836113, 17.45055097248538, 17.8754763326927, 18.242760426469818, 18.563566329550866, 18.84608147199738, 19.096480365524762, 19.319535085761938, 19.519012642165947, 19.697941287301326, 19.85879401510806, 20.003618226362324, 20.134129444022808, 20.251780420564497, 20.357813020693253, 20.453297802680865, 20.539164652786695, 20.616226805275556, 20.685199899184028, 20.746717259937547, 20.801342274156227, 20.84957850042934, 20.891877998489687, 20.928648243306515, 20.960257904532423, 20.98704170951842, 21.009304559328097, 21.02732503324213, 21.04135838755367, 21.051639134485804, 21.058383270889877, 21.061790211998733, 21.06204447637534, 21.059317160271767, 21.053767231480556, 21.045542669893685, 21.034781475111366, 21.021612560179374, 21.00615654632769, 20.988526471783015, 20.968828425574475, 20.947162115751837, 20.923621380118846, 20.898294646278586, 20.871265347053306, 20.842612296170422, 20.81241002931908, 20.780729113365243, 20.747636428352394, 20.713195424315565, 20.677466356070354, 20.640506497917034, 20.602370340344187, 20.563109770866646, 20.522774239980293, 20.481410913972358, 20.43906481622342, 20.395778956975676, 20.351594453761813, 20.306550642902003, 20.260685182530857, 20.2140341483766, 20.166632122825597, 20.118512277471417, 20.069706450240275, 20.020245217044497, 19.970157959020252, 19.91947292463175, 19.868217288461143, 19.8164172056869, 19.764097863533138, 19.711283528975187, 19.657997593949958, 19.604262617587615, 19.550100366045164, 19.495531849637345, 19.44057735828815, 19.385256494483215, 19.329588204494947, 19.27359080791663, 19.217282025359857, 19.160679004832186, 19.103798346432747, 19.046656125958414, 18.989267917047492, 18.931648812181788, 18.873813442747196, 18.81577599788325, 18.757550242343445, 18.699149533671857, 18.6405868382421, 18.581874746778492, 18.523025488812625, 18.464050946690364, 18.40496266872333, 18.345771881921006, 18.286489503922823, 18.22712615435876, 18.167692166040553, 18.10819759492825, 18.048652230580856, 17.989065605305793, 17.929447003393125, 17.869805469665625, 17.810149817913633, 17.750488638676536, 17.690830306996318, 17.631182989544868, 17.571554651719957, 17.51195306422644, 17.452385809386776, 17.392860287453175, 17.333383722465378, 17.27396316777987, 17.214605511453826, 17.155317481724854, 17.096105651663066, 17.036976444166896, 16.977936136611227, 16.918990865183517, 16.860146629088725, 16.801409294861866, 16.742784600211646, 16.68427815766247, 16.625895458611, 16.567641876376022, 16.509522670079573, 16.45154298747203, 16.393707868422275, 16.336022247883598, 16.27849095868826, 16.221118734581758, 16.163910212830586, 16.106869936959832, 16.050002359157844, 15.993311842932966, 15.936802665326628, 15.880479019354743, 15.824345016126932, 15.768404686995313, 15.712661985734963, 15.657120790467255, 15.601784905648337, 15.546658063920907, 15.491743927918497, 15.437046092169679, 15.382568084608975, 15.328313368307523, 15.274285343221132, 15.220487347578999, 15.16692265923469, 15.113594497620934, 15.0605060246799, 15.007660346458055, 14.955060514464767, 14.90270952682481, 14.850610329633396, 14.798765818107297, 14.747178837940567, 14.69585218616739, 14.644788612493286, 14.593990820280863, 14.543461467685608, 14.493203168535093, 14.443218493447299, 14.39350997078121, 14.344080087478691, 14.294931290060344, 14.246065985484877, 14.197486542047413, 14.149195290142538, 14.101194523154854, 14.053486498128224, 14.00607343682552, 13.958957526073434, 13.912140918789852, 13.865625734588422, 13.819414060513688, 13.7735079516744, 13.727909431921148, 13.682620494481162, 13.637643102609184, 13.592979190079985, 13.548630662075965, 13.504599395247906, 13.460887238869981, 13.417496014964822, 13.374427519057994, 13.331683520572508, 13.289265763416816, 13.247175966417203, 13.205415823915152, 13.163987006093256, 13.122891159524887, 13.082129907627525, 13.041704851049357, 13.00161756813468, 12.961869615439428, 12.92246252783121, 12.883397819280237, 12.844676983002383, 12.806301491915125, 12.768272798969974, 12.730592337536208, 12.693261521807058, 12.656281747111938, 12.619654390164264, 12.583380809564924, 12.547462346008738, 12.511900322601504, 12.476696045229488, 12.441850802865273, 12.407365867693482, 12.373242495726032, 12.339481926742957, 12.30608538481023, 12.273054078443513, 12.240389200883609, 12.208091930367303, 12.176163430335453, 12.144604849869609, 12.113417323629534, 12.082601972368685, 12.052159902950859, 12.022092208828521, 11.992399969993468, 11.963084253360218, 11.934146112942067, 11.905586590110847, 11.87740671362035, 11.849607500150267, 11.822189954119143, 11.795155068047137, 11.768503822944655, 11.742237188086747, 11.716356121512085, 11.690861570169744, 11.665754469830294, 11.641035745721641, 11.61670631223086, 11.592767073301818, 11.56921892259734, 11.54606274368794, 11.523299410005265, 11.500929785229562, 11.478954723325614, 11.457375068721841, 11.436191656370681, 11.415405312010517, 11.395016852308974, 11.375027084858633, 11.355436808374336, 11.3362468128885, 11.31745787988622, 11.299070782263815, 11.281086284692925, 11.26350514351713, 11.246328107054104, 11.229555915546886, 11.213189301433452, 11.19722898932358, 11.181675696265113, 11.166530131659304, 11.151792997582067, 11.137464988637976, 11.12354679236573, 11.110039089001837, 11.096942551941297, 11.084257847527535, 11.071985635306518, 11.06012656807583, 11.04868129196148, 11.037650446580678, 11.027034665116119, 11.01683457427242, 11.00705079452253, 10.997683940178195, 10.988734619313746, 10.980203434096214, 10.972090980619463, 10.964397849202978, 10.957124624281988, 10.950271884558536, 10.943840203180603, 10.93783014765815, 10.932242279952963, 10.927077156683769, 10.922335329126735, 10.91801734312764, 10.914123739478882, 10.910655053684259, 10.907611816305053, 10.904994552693946, 10.90280378342666, 10.901040024063288, 10.899703785374516, 10.898795573363255, 10.898315889314617, 10.898265229903306, 10.898644087067368, 10.899452948405138, 10.900692296872883],
'r_squared': 0.9822381604039494,
'variabel': [152821.1630401214, 55.892346845823056, 23.76556145798208, 26712.22875205879, -0.6822113529338901]
}
This is my PHP code :
$result = shell_exec('C:\xampp\htdocs\prosold\python\venv\Scripts\python.exe C:/xampp/htdocs/prosold/python/main.py ' . escapeshellarg(json_encode($testday)));
//echo $output;
//$dataParse = [];
$dataParse = json_decode(json_encode($result), true);
echo $dataParse;
This is my Python code:
ParseData = {}
ParseData['total'] = totalProduksi
ParseData['produksiHarian'] = fittedData
ParseData['r_squared'] = r_square
ParseData['variabel'] = variabel
print(ParseData)
and when try to extract total, using this code:
$dataParse = json_decode(json_encode($result), true);
echo $dataParse['total'];
I get an error:
illegal string offset
But, when I try do this:
$dataParse = json_decode(json_encode($result), true);
echo $dataParse[0];
echo $dataParse[1];
echo $dataParse[2];
echo $dataParse[3];
This is the result:
{'to
What do I miss?

That data is JSON-like, but not valid JSON. The keys are wrapped in single quotes, but the JSON standard requires double quotes.
To produce valid JSON from a Python object use json.dumps()
For example:
import json
dictionary = {'a':34, 'b':61, 'c':82}
jsonString = json.dumps(dictionary, indent=4)
print(jsonString)
Which gives:
{
"a": 34,
"b": 61,
"c": 82
}
In that form PHP should be able to read your data and decode it with json_decode()

Encoding foreign alphabet characters

I am getting data from an XML provided by an API that for some reason lists Czechslovak characters in a different encoding (e.g. instead of correct Czechoslovak "ý" it uses "Ã½"). Therefore, instead of providing the
correct output to the user -> "Zelený"
the output is -> "ZelenÃ½"
I went through multiple StackOverflow posts, other fora and tutorials, but I still cannot figure out how to make it turn "ZelenÃ½" into "Zelený" (this is just one of the weird characters used by the XML so I cannot use str.replace).
I figured out, that the correct encoding for the Czech/Slovak language is "windows-1250"
My code:
def change_encoding(what):
what = what.encode("windows-1250")
return what
clean_xml_input = change_encoding(xml_input)
This produces error:
'charmap' codec can't encode characters in position 5-6: character
maps to <undefined>

"ZelenÃ½".encode("Windows-1252").decode("utf-8") #'Zelený'
"ZelenÃ½".encode("windows-1254").decode("utf-8") #'Zelený'
"ZelenÃ½".encode("iso-8859-1").decode("utf-8") #'Zelený'
"ZelenÃ½".encode("iso-8859-9").decode("utf-8") #'Zelený'
If it is helpful
from itertools import permutations
all_encoding = ['ASMO-708',
'big5',
'cp1025',
'cp866',
'cp875',
'csISO2022JP',
'DOS-720',
'DOS-862',
'EUC-CN',
'EUC-JP',
'euc-jp',
'euc-kr',
'GB18030',
'gb2312',
'hz-gb-2312',
'IBM00858',
'IBM00924',
'IBM01047',
'IBM01140',
'IBM01141',
'IBM01142',
'IBM01143',
'IBM01144',
'IBM01145',
'IBM01146',
'IBM01147',
'IBM01148',
'IBM01149',
'IBM037',
'IBM1026',
'IBM273',
'IBM277',
'IBM278',
'IBM280',
'IBM284',
'IBM285',
'IBM290',
'IBM297',
'IBM420',
'IBM423',
'IBM424',
'IBM437',
'IBM500',
'ibm737',
'ibm775',
'ibm850',
'ibm852',
'IBM855',
'ibm857',
'IBM860',
'ibm861',
'IBM863',
'IBM864',
'IBM865',
'ibm869',
'IBM870',
'IBM871',
'IBM880',
'IBM905',
'IBM-Thai',
'iso-2022-jp',
'iso-2022-jp',
'iso-2022-kr',
'iso-8859-1',
'iso-8859-13',
'iso-8859-15',
'iso-8859-2',
'iso-8859-3',
'iso-8859-4',
'iso-8859-5',
'iso-8859-6',
'iso-8859-7',
'iso-8859-8',
'iso-8859-8-i',
'iso-8859-9',
'Johab',
'koi8-r',
'koi8-u',
'ks_c_5601-1987',
'macintosh',
'shift_jis',
'us-ascii',
'utf-16',
'utf-16BE',
'utf-32',
'utf-32BE',
'utf-7',
'utf-8',
'windows-1250',
'windows-1251',
'Windows-1252',
'windows-1253',
'windows-1254',
'windows-1255',
'windows-1256',
'windows-1257',
'windows-1258',
'windows-874',
'x-Chinese-CNS',
'x-Chinese-Eten',
'x-cp20001',
'x-cp20003',
'x-cp20004',
'x-cp20005',
'x-cp20261',
'x-cp20269',
'x-cp20936',
'x-cp20949',
'x-cp50227',
'x-EBCDIC-KoreanExtended',
'x-Europa',
'x-IA5',
'x-IA5-German',
'x-IA5-Norwegian',
'x-IA5-Swedish',
'x-iscii-as',
'x-iscii-be',
'x-iscii-de',
'x-iscii-gu',
'x-iscii-ka',
'x-iscii-ma',
'x-iscii-or',
'x-iscii-pa',
'x-iscii-ta',
'x-iscii-te',
'x-mac-arabic',
'x-mac-ce',
'x-mac-chinesesimp',
'x-mac-chinesetrad',
'x-mac-croatian',
'x-mac-cyrillic',
'x-mac-greek',
'x-mac-hebrew',
'x-mac-icelandic',
'x-mac-japanese',
'x-mac-korean',
'x-mac-romanian',
'x-mac-thai',
'x-mac-turkish',
'x-mac-ukrainian']
for i,j in permutations(all_encoding, 2):
try:
if("ZelenÃ½".encode(i).decode(j) == 'Zelený'):
print(f'encode with `{i}` and decode with `{j}`')
except:
pass

Python.net is not receiving correct encoded string from .net

I am using .net 4.7.1 console program talking to python.net that VS2017 is reporting as version 2.5.1.0 (runtime version v4.0.30319) Python code is in 3.6
python:
def ping(input):
if (input == 'ping'):
return 'pong'
return 'invalid'
def headervalid(header):
if (header == '#\n\u001e\rANSI '):
return True
return False
if __name__ == '__main__':
input = '#\n\u001e\rANSI '
print(headervalid(input))
input = 'ping'
print(ping(input))
dot net :
using (Py.GIL())
{
dynamic np = Py.Import("numpy");
Console.WriteLine(np.cos(np.pi * 2));
dynamic sin = np.sin;
Console.WriteLine(sin(5));
double c = np.cos(5) + sin(5);
Console.WriteLine(c);
dynamic a = np.array(new List<float> { 1, 2, 3 });
Console.WriteLine(a.dtype);
dynamic b = np.array(new List<float> { 6, 5, 4 }, dtype: np.int32);
Console.WriteLine(b.dtype);
Console.WriteLine(a * b);
dynamic parsers = Py.Import("newworld_parsers.bridgetest");
string input = "ping";
var result = parsers.ping(input);
Console.WriteLine(result);
input = #"#\n\u001e\rANSI ";
result = parsers.headervalid(input);
Console.WriteLine(result);
Console.WriteLine("=======");
Console.ReadLine();
}
The python stand alone run reports:
True
pong
Press any key to continue . . .
Dot net run reports:
1.0
-0.9589242746631385
-0.675262089199912
float64
int32
[ 6. 10. 12.]
pong
False
=== Press any key to continue ====
Notice the True in python vs the False when calling from C#
The special characters in headervalid() from dot net don't seem to be going over correctly. What should I do to fix this? Any ideas greatly appreciated!

Putting '#' character in front of C# string turns it into a raw string, meaning no escape sequences inside will work.
You can see that by adding Console.WriteLine(input); to your code.

Python how convert single quotes to double quotes to format as json string

I have a file where on each line I have text like this (representing cast of a film):
[{'cast_id': 23, 'character': "Roger 'Verbal' Kint", 'credit_id': '52fe4260c3a36847f8019af7', 'gender': 2, 'id': 1979, 'name': 'Kevin Spacey', 'order': 5, 'profile_path': '/x7wF050iuCASefLLG75s2uDPFUu.jpg'}, {'cast_id': 27, 'character': 'Edie's Finneran', 'credit_id': '52fe4260c3a36847f8019b07', 'gender': 1, 'id': 2179, 'name': 'Suzy Amis', 'order': 6, 'profile_path': '/b1pjkncyLuBtMUmqD1MztD2SG80.jpg'}]
I need to convert it in a valid json string, thus converting only the necessary single quotes to double quotes (e.g. the single quotes around word Verbal must not be converted, eventual apostrophes in the text also should not be converted).
I am using python 3.x. I need to find a regular expression which will convert only the right single quotes to double quotes, thus the whole text resulting in a valid json string. Any idea?

First of all, the line you gave as example is not parsable! … 'Edie's Finneran' … contains a syntax error, not matter what.
Assuming that you have control over the input, you could simply use eval() to read in the file. (Although, in that case one would wonder why you can't produce valid JSON in the first place…)
>>> f = open('list.txt', 'r')
>>> s = f.read().strip()
>>> l = eval(s)
>>> import pprint
>>> pprint.pprint(l)
[{'cast_id': 23,
'character': "Roger 'Verbal' Kint",
...
'profile_path': '/b1pjkncyLuBtMUmqD1MztD2SG80.jpg'}]
>>> import json
>>> json.dumps(l)
'[{"cast_id": 23, "character": "Roger \'Verbal\' Kint", "credit_id": "52fe4260ca36847f8019af7", "gender": 2, "id": 1979, "name": "Kevin Spacey", "order": 5, "rofile_path": "/x7wF050iuCASefLLG75s2uDPFUu.jpg"}, {"cast_id": 27, "character":"Edie\'s Finneran", "credit_id": "52fe4260c3a36847f8019b07", "gender": 1, "id":2179, "name": "Suzy Amis", "order": 6, "profile_path": "/b1pjkncyLuBtMUmqD1MztDSG80.jpg"}]'
If you don't have control over the input, this is very dangerous, as it opens you up to code injection attacks.
I cannot emphasize enough that the best solution would be to produce valid JSON in the first place.

If you do not have control over the JSON data, do not eval() it!
I created a simple JSON correction mechanism, as that is more secure:
def correctSingleQuoteJSON(s):
rstr = ""
escaped = False
for c in s:
if c == "'" and not escaped:
c = '"' # replace single with double quote
elif c == "'" and escaped:
rstr = rstr[:-1] # remove escape character before single quotes
elif c == '"':
c = '\\' + c # escape existing double quotes
escaped = (c == "\\") # check for an escape character
rstr += c # append the correct json
return rstr
You can use the function in the following way:
import json
singleQuoteJson = "[{'cast_id': 23, 'character': 'Roger \\'Verbal\\' Kint', 'credit_id': '52fe4260c3a36847f8019af7', 'gender': 2, 'id': 1979, 'name': 'Kevin Spacey', 'order': 5, 'profile_path': '/x7wF050iuCASefLLG75s2uDPFUu.jpg'}, {'cast_id': 27, 'character': 'Edie\\'s Finneran', 'credit_id': '52fe4260c3a36847f8019b07', 'gender': 1, 'id': 2179, 'name': 'Suzy Amis', 'order': 6, 'profile_path': '/b1pjkncyLuBtMUmqD1MztD2SG80.jpg'}]"
correctJson = correctSingleQuoteJSON(singleQuoteJson)
print(json.loads(correctJson))

Here is the code to get desired output
import ast
def getJson(filepath):
fr = open(filepath, 'r')
lines = []
for line in fr.readlines():
line_split = line.split(",")
set_line_split = []
for i in line_split:
i_split = i.split(":")
i_set_split = []
for split_i in i_split:
set_split_i = ""
rev = ""
i = 0
for ch in split_i:
if ch in ['\"','\'']:
set_split_i += ch
i += 1
break
else:
set_split_i += ch
i += 1
i_rev = (split_i[i:])[::-1]
state = False
for ch in i_rev:
if ch in ['\"','\''] and state == False:
rev += ch
state = True
elif ch in ['\"','\''] and state == True:
rev += ch+"\\"
else:
rev += ch
i_rev = rev[::-1]
set_split_i += i_rev
i_set_split.append(set_split_i)
set_line_split.append(":".join(i_set_split))
line_modified = ",".join(set_line_split)
lines.append(ast.literal_eval(str(line_modified)))
return lines
lines = getJson('test.txt')
for i in lines:
print(i)

Apart from eval() (mentioned in user3850's answer), you can use ast.literal_eval
This has been discussed in the thread: Using python's eval() vs. ast.literal_eval()?
You can also look at the following discussion threads from Kaggle competition which has data similar to the one mentioned by OP:
https://www.kaggle.com/c/tmdb-box-office-prediction/discussion/89313#latest-517927
https://www.kaggle.com/c/tmdb-box-office-prediction/discussion/80045#latest-518338

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

How I can print characters from various alphabets in python 3? - python

Related

Replacing elements of a Python dictionary using regex

Extracting JSON to PHP

Encoding foreign alphabet characters

Python.net is not receiving correct encoded string from .net

Python how convert single quotes to double quotes to format as json string

Categories

Resources