Related
I have a dictionary of students and their grades, and I need to return students who
Have TP01, TP02, and TP03 sum higher than 30
Here is a mock dict to work with:
{'Name': ['Colin', 'Doyon', 'Tremblay', 'Audry', 'Gamelin', 'Dagenais', 'Legault',
'Larochelle', 'Gagnon', 'Simard'],
'FirstName': ['Jonas', 'Elisabeth', 'Françcois', 'Francine', 'Geneviève',
'Robert', 'Maxime', 'Pierre', 'Emilie', 'Martine'],
'TP01': ['7', '9.5', '8', '5', '6', '8', '10', '3', '7', '8'],
'TP02': ['12', '14', '12', '13', '10', '12', '14', '11', '10', '15'],
'TP03': ['11', '12', '11', '10', '10', '13', '12', '6', '7', '14'],
'EXAMEN_INTRA': ['22', '26', '20', '22', '20', '23', '27', '15', '13', '28'],
'EXAMEN_FINAL': ['28', '20', '22', '15', '14', '22', '25', '14', '14', '27']}
I can unfortunately not use any libraries.
What I have tried:
sums = list()
for tp1, tp2, tp3 in zip(dict_etudiants['TP01'], dict_etudiants['TP02'], dict_etudiants['TP03']):
sums.append(sum((float(tp1), float(tp2), float(tp3)))) # but then it leads nowhere
The biggest problem is your data structure - you keep values in separated lists but all information about single student should be in single dictionary or list and then it would be much easier.
Something like this:
studends = [
{'Name': 'Colin', 'FirstName': 'Jonas', 'TP01': '7', 'TP02': '12', 'TP03': '11', 'EXAMEN_INTRA': '22', 'EXAMEN_FINAL': '28'},
{'Name': 'Doyon', 'FirstName': 'Elisabeth', ....},
...
]
This code get all studends with sum > 30 but it returns every student in separate list - so if you need your original structure then you will have to convert result.
dict_etudiants = {
'Name': ['Colin', 'Doyon', 'Tremblay', 'Audry', 'Gamelin', 'Dagenais', 'Legault',
'Larochelle', 'Gagnon', 'Simard'],
'FirstName': ['Jonas', 'Elisabeth', 'Françcois', 'Francine', 'Geneviève',
'Robert', 'Maxime', 'Pierre', 'Emilie', 'Martine'],
'TP01': ['7', '9.5', '8', '5', '6', '8', '10', '3', '7', '8'],
'TP02': ['12', '14', '12', '13', '10', '12', '14', '11', '10', '15'],
'TP03': ['11', '12', '11', '10', '10', '13', '12', '6', '7', '14'],
'EXAMEN_INTRA': ['22', '26', '20', '22', '20', '23', '27', '15', '13', '28'],
'EXAMEN_FINAL': ['28', '20', '22', '15', '14', '22', '25', '14', '14', '27']
}
students = list()
for tp1, tp2, tp3, name, firstname, examen_intra, examen_final in zip(dict_etudiants['TP01'], dict_etudiants['TP02'], dict_etudiants['TP03'],
dict_etudiants['Name'], dict_etudiants['FirstName'], dict_etudiants['EXAMEN_INTRA'], dict_etudiants['EXAMEN_FINAL']):
if sum((float(tp1), float(tp2), float(tp3))) > 30:
students.append([name, firstname, tp1, tp2, tp3, examen_intra, examen_final])
print(students)
Result
[
['Doyon', 'Elisabeth', '9.5', '14', '12', '26', '20'],
['Tremblay', 'Françcois', '8', '12', '11', '20', '22'],
['Dagenais', 'Robert', '8', '12', '13', '23', '22'],
['Legault', 'Maxime', '10', '14', '12', '27', '25'],
['Simard', 'Martine', '8', '15', '14', '28', '27']
]
EDIT: I released that you could convert it to pandas.DataFrame and then it is easy to search students.
result = df[ df['TP01'] + df['TP02'] + df['TP03'] > 30 ]
Full code
dict_etudiants = {
'Name': ['Colin', 'Doyon', 'Tremblay', 'Audry', 'Gamelin', 'Dagenais', 'Legault',
'Larochelle', 'Gagnon', 'Simard'],
'FirstName': ['Jonas', 'Elisabeth', 'Françcois', 'Francine', 'Geneviève',
'Robert', 'Maxime', 'Pierre', 'Emilie', 'Martine'],
'TP01': ['7', '9.5', '8', '5', '6', '8', '10', '3', '7', '8'],
'TP02': ['12', '14', '12', '13', '10', '12', '14', '11', '10', '15'],
'TP03': ['11', '12', '11', '10', '10', '13', '12', '6', '7', '14'],
'EXAMEN_INTRA': ['22', '26', '20', '22', '20', '23', '27', '15', '13', '28'],
'EXAMEN_FINAL': ['28', '20', '22', '15', '14', '22', '25', '14', '14', '27']
}
import pandas as pd
import pprint
# create DateFrame
df = pd.DataFrame(dict_etudiants)
# convert strings to float
df['TP01'] = df['TP01'].astype(float)
df['TP02'] = df['TP02'].astype(float)
df['TP03'] = df['TP03'].astype(float)
# search students
result = df[ df['TP01'] + df['TP02'] + df['TP03'] > 30 ]
# convert back to dictionary
dict_etudiants_selected = result.to_dict(orient='list')
print('\n--- DataFrame ---\n')
print(result)
print('\n--- dict ---\n')
pprint.pprint(dict_etudiants_selected)
Result
--- DataFrame ---
Name FirstName TP01 TP02 TP03 EXAMEN_INTRA EXAMEN_FINAL
1 Doyon Elisabeth 9.5 14.0 12.0 26 20
2 Tremblay Françcois 8.0 12.0 11.0 20 22
5 Dagenais Robert 8.0 12.0 13.0 23 22
6 Legault Maxime 10.0 14.0 12.0 27 25
9 Simard Martine 8.0 15.0 14.0 28 27
--- dict ---
{'EXAMEN_FINAL': ['20', '22', '22', '25', '27'],
'EXAMEN_INTRA': ['26', '20', '23', '27', '28'],
'FirstName': ['Elisabeth', 'Françcois', 'Robert', 'Maxime', 'Martine'],
'Name': ['Doyon', 'Tremblay', 'Dagenais', 'Legault', 'Simard'],
'TP01': [9.5, 8.0, 8.0, 10.0, 8.0],
'TP02': [14.0, 12.0, 12.0, 14.0, 15.0],
'TP03': [12.0, 11.0, 13.0, 12.0, 14.0]}
After reading from a file I have a list of lists contaning not only digits but also other characters, which I would like to get rid of.
I've tried using re.sub function but this doesn't seem to work
import re
Poly_id= [['0', '[4', '8', '18', '20', '5', '0', '4]'], ['1', '[13', '16',
'6', '11', '13]'], ['2', '[3', '1', '10', '9', '2', '15', '3]'], ['3',
'[13', '12', '16', '13]'], ['4', '[13', '11', '17', '14', '7', '13]']]
for x in Poly_id:
[re.sub(r'\W', '', ch) for ch in x]
This doesn't seem to change a thing in this list.
I would like to have a list with only numbers as elements so that I could convert them into integers
I guess technically [4 is non numeric so you can do something like this:
Poly_id = [[char for char in _list if str.isnumeric(char)] for _list in Poly_id]
Output:
['0', '8', '18', '20', '5', '0']
['1', '16', '6', '11']
['2', '1', '10', '9', '2', '15']
['3', '12', '16']
['4', '11', '17', '14', '7']
If you just want to remove the non numeric values and not the complete entry then you can do this:
Poly_id = [[''.join(char for char in substring if str.isnumeric(char)) for substring in _list] for _list in Poly_id]
Output:
['0', '4', '8', '18', '20', '5', '0', '4']
['1', '13', '16', '6', '11', '13']
['2', '3', '1', '10', '9', '2', '15', '3']
['3', '13', '12', '16', '13']
['4', '13', '11', '17', '14', '7', '13']
Here a solution if you want to get rid of the '[' in '[4' but keep the '4':
res = [[re.sub(r'\W', '', st) for st in inlist] for inlist in Poly_id]
res is:
[
['0', '4', '8', '18', '20', '5', '0', '4'],
['1', '13', '16', '6', '11', '13'],
['2', '3', '1', '10', '9', '2', '15', '3'],
['3', '13', '12', '16', '13'],
['4', '13', '11', '17', '14', '7', '13']
]
You can use a module, "itertools"
import itertools
list_of_lists = [[1, 2], [3, 4]]
print(list(itertools.chain(*list_of_lists)))
>>>[1, 2, 3, 4]
I'm trying to do some webscraping to download all the results of euromillions, stuck with errors now. I'm using jupyter and python 3 with the modules specified. With just one link the code worked just fine but now I added a loop and some modifications and rip xD
import bs4
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
years = list(range(2004,2018))
for year in years:
my_urls = ('https://www.euro-millions.com/pt/arquivo-de-resultados-' + str(year),)
my_url = my_urls[0]
for my_url in my_urls:
Client = uReq(my_url)
html = Client.read
Client.close()
euro = soup(html, "html")
containers = euro.findAll("div",{"class":"archives"})
print(containers)
container = containers[0]
for container in containers:
data = container.a["href"].replace('/pt/resultados/','') #Usamos os [] como num dicionario, .strip tbm retira o lixo sometimes
bolasN = container.ul.findAll("li",{"class":"ball"})
bolasS = container.ul.findAll("li",{"class":"lucky-star"})
bola1 = bolasN[0].text
bola2 = bolasN[1].text
bola3 = bolasN[2].text
bola4 = bolasN[3].text
bola5 = bolasN[4].text
star1 = bolasS[0].text
star2 = bolasS[1].text
TUDO = [data, bola1, bola2, bola3, bola4, bola5, star1, star2]
print(TUDO)
TRACEBACK:
TypeError Traceback (most recent call last)
<ipython-input-31-b11e2044b5ea> in <module>
12 html = Client.read
13 Client.close()
---> 14 euro = soup(html, "html")
15 containers = euro.findAll("div",{"class":"archives"})
16 print(containers)
/usr/local/lib/python3.5/dist-packages/bs4/__init__.py in __init__(self, markup, features, builder, parse_only, from_encoding, exclude_encodings, **kwargs)
244 if hasattr(markup, 'read'): # It's a file-type object.
245 markup = markup.read()
--> 246 elif len(markup) <= 256 and (
247 (isinstance(markup, bytes) and not b'<' in markup)
248 or (isinstance(markup, str) and not '<' in markup)
TypeError: object of type 'method' has no len()
The markup was wrong, it should be 'html.parser' or 'lxml'
import requests
from bs4 import BeautifulSoup as soup
years = list(range(2004,2018))
for year in years:
my_urls = ('https://www.euro-millions.com/pt/arquivo-de-resultados-' + str(year),)
my_url = my_urls[0]
for my_url in my_urls:
Client = requests.get(my_url)
euro = soup(Client.content, "html.parser")
containers = euro.findAll("div",{"class":"archives"})
#print(containers)
container = containers[0]
for container in containers:
data = container.a["href"].replace('/pt/resultados/','') #Usamos os [] como num dicionario, .strip tbm retira o lixo sometimes
bolasN = container.ul.findAll("li",{"class":"ball"})
bolasS = container.ul.findAll("li",{"class":"lucky-star"})
bola1 = bolasN[0].text
bola2 = bolasN[1].text
bola3 = bolasN[2].text
bola4 = bolasN[3].text
bola5 = bolasN[4].text
star1 = bolasS[0].text
star2 = bolasS[1].text
TUDO = [data, bola1, bola2, bola3, bola4, bola5, star1, star2]
print(TUDO)
Output:
['29-12-2017', '4', '8', '22', '23', '48', '1', '12']
['26-12-2017', '4', '17', '30', '43', '44', '2', '10']
['22-12-2017', '5', '24', '30', '31', '43', '3', '6']
['19-12-2017', '8', '15', '30', '38', '46', '4', '7']
['15-12-2017', '25', '30', '31', '42', '50', '2', '11']
['12-12-2017', '20', '37', '39', '44', '50', '4', '8']
['08-12-2017', '4', '22', '30', '32', '34', '3', '4']
['05-12-2017', '11', '36', '43', '44', '48', '2', '7']
['01-12-2017', '5', '24', '29', '35', '46', '11', '12']
['28-11-2017', '1', '6', '12', '18', '42', '2', '7']
['24-11-2017', '19', '24', '28', '30', '50', '3', '10']
['21-11-2017', '2', '10', '14', '28', '31', '5', '7']
['17-11-2017', '20', '26', '35', '36', '42', '5', '12']
['14-11-2017', '14', '16', '39', '40', '41', '8', '10']
['10-11-2017', '13', '22', '29', '36', '37', '1', '9']
['07-11-2017', '7', '19', '20', '37', '41', '2', '12']
['03-11-2017', '5', '12', '17', '33', '41', '4', '9']
['31-10-2017', '1', '12', '36', '43', '46', '3', '5']
['27-10-2017', '3', '16', '23', '32', '39', '1', '4']
['24-10-2017', '9', '11', '13', '27', '33', '7', '10']
['20-10-2017', '4', '17', '23', '27', '30', '3', '8']
['17-10-2017', '13', '17', '19', '26', '36', '2', '3']
['13-10-2017', '23', '29', '37', '45', '50', '5', '11']
['10-10-2017', '4', '21', '34', '36', '37', '3', '6']
['06-10-2017', '1', '9', '15', '19', '25', '1', '7']
['03-10-2017', '6', '24', '32', '48', '50', '1', '5']
['29-09-2017', '7', '18', '19', '32', '48', '3', '7']
['26-09-2017', '1', '29', '40', '41', '48', '6', '12']
['22-09-2017', '6', '11', '31', '39', '42', '1', '3']
['19-09-2017', '1', '8', '21', '30', '45', '2', '3']
['15-09-2017', '13', '18', '37', '44', '49', '9', '12']
['12-09-2017', '10', '17', '27', '29', '35', '4', '11']
['08-09-2017', '9', '24', '42', '47', '49', '1', '5']
['05-09-2017', '6', '9', '18', '28', '29', '1', '9']
['01-09-2017', '3', '7', '8', '14', '49', '5', '8']
['29-08-2017', '4', '12', '15', '32', '38', '1', '5']
['25-08-2017', '1', '5', '7', '15', '47', '9', '12']
['22-08-2017', '3', '10', '12', '17', '27', '3', '5']
['18-08-2017', '2', '24', '39', '42', '45', '2', '8']
['15-08-2017', '10', '14', '30', '35', '46', '4', '10']
['11-08-2017', '18', '28', '39', '46', '48', '5', '12']
['08-08-2017', '15', '25', '26', '40', '41', '4', '5']
['04-08-2017', '29', '30', '36', '40', '41', '2', '9']
['01-08-2017', '14', '21', '24', '29', '30', '8', '10']
['28-07-2017', '5', '9', '29', '31', '41', '2', '4']
['25-07-2017', '12', '14', '43', '44', '48', '2', '11']
['21-07-2017', '1', '8', '9', '26', '49', '5', '9']
['18-07-2017', '1', '25', '27', '41', '45', '5', '7']
['14-07-2017', '11', '14', '20', '21', '47', '7', '10']
['11-07-2017', '14', '22', '26', '42', '50', '8', '10']
['07-07-2017', '11', '20', '35', '37', '45', '3', '6']
['04-07-2017', '10', '22', '25', '37', '49', '5', '8']
['30-06-2017', '17', '35', '39', '47', '50', '6', '8']
['27-06-2017', '9', '17', '21', '28', '45', '1', '3']
['23-06-2017', '3', '4', '21', '31', '38', '3', '7']
['20-06-2017', '11', '18', '26', '43', '44', '8', '10']
['16-06-2017', '15', '17', '38', '41', '42', '9', '12']
['13-06-2017', '3', '12', '22', '27', '49', '4', '11']
['09-06-2017', '9', '20', '27', '39', '43', '10', '11']
['06-06-2017', '20', '22', '25', '37', '40', '3', '7']
['02-06-2017', '8', '10', '24', '33', '42', '3', '9']
['30-05-2017', '7', '12', '27', '38', '48', '6', '9']
['26-05-2017', '5', '7', '26', '36', '39', '2', '10']
['23-05-2017', '8', '15', '25', '27', '42', '1', '4']
['19-05-2017', '9', '11', '12', '19', '30', '4', '9']
['16-05-2017', '8', '11', '15', '20', '30', '3', '8']
['12-05-2017', '2', '20', '28', '29', '44', '3', '9']
['09-05-2017', '8', '12', '16', '22', '26', '6', '7']
['05-05-2017', '3', '7', '30', '35', '43', '1', '3']
['02-05-2017', '6', '19', '23', '25', '27', '11', '12']
['28-04-2017', '14', '20', '25', '30', '39', '2', '8']
['25-04-2017', '9', '11', '19', '32', '43', '3', '9']
['21-04-2017', '2', '13', '16', '22', '49', '4', '5']
['18-04-2017', '17', '22', '31', '38', '45', '5', '12']
['14-04-2017', '4', '14', '20', '23', '33', '6', '10']
['11-04-2017', '5', '21', '22', '31', '49', '2', '8']
['07-04-2017', '2', '10', '19', '35', '50', '6', '7']
['04-04-2017', '1', '9', '24', '33', '34', '2', '6']
['31-03-2017', '17', '24', '26', '28', '45', '4', '12']
['28-03-2017', '9', '13', '31', '33', '46', '6', '10']
['24-03-2017', '2', '17', '21', '27', '34', '5', '9']
['21-03-2017', '1', '20', '23', '44', '47', '4', '11']
['17-03-2017', '6', '10', '19', '29', '36', '3', '9']
['14-03-2017', '3', '5', '21', '36', '44', '3', '6']
['10-03-2017', '31', '36', '38', '47', '49', '8', '11']
['07-03-2017', '6', '37', '41', '48', '50', '4', '5']
['03-03-2017', '2', '11', '29', '30', '47', '1', '12']
['28-02-2017', '10', '20', '31', '35', '42', '2', '12']
['24-02-2017', '2', '4', '13', '22', '43', '8', '9']
['21-02-2017', '13', '19', '41', '45', '49', '3', '4']
['17-02-2017', '19', '25', '33', '36', '48', '2', '9']
['14-02-2017', '2', '10', '24', '40', '44', '3', '10']
['10-02-2017', '7', '21', '26', '35', '43', '2', '9']
['07-02-2017', '4', '10', '31', '38', '44', '8', '10']
['03-02-2017', '3', '4', '15', '46', '50', '5', '9']
['31-01-2017', '3', '4', '17', '23', '44', '6', '9']
['27-01-2017', '17', '20', '28', '45', '48', '5', '9']
['24-01-2017', '1', '5', '7', '17', '23', '3', '8']
['20-01-2017', '10', '17', '27', '31', '49', '3', '5']
['17-01-2017', '4', '16', '25', '43', '47', '2', '10']
['13-01-2017', '3', '7', '16', '26', '50', '4', '7']
['10-01-2017', '2', '11', '29', '35', '44', '4', '9']
['06-01-2017', '10', '14', '18', '21', '49', '9', '11']
['03-01-2017', '19', '23', '27', '34', '49', '1', '11']
try passing HTML text directly
soup = BeautifulSoup(html.text)
I am trying to create an array using two lists, one of which has a list for each element. The problem is that in the first case I manage to do what I want, using np.column_stack but in the second case, although my initial lists look similar (in structure), my list of lists enters the array flattened (which is not what I need.
I am attaching two examples to replicate, on the first case, I manage to get an array, where each line has a string as first element, and a list as a second, while in the second case, I get 4 columns (the list is flattened) with no obvious reason.
Example 1
temp_list_column1=['St. Raphael',
'Goppingen',
'HSG Wetzlar',
'Huttenberg',
'Kiel',
'Stuttgart',
'Izvidac',
'Viborg W',
'Silkeborg-Voel W',
'Bjerringbro W',
'Lyngby W',
'Most W',
'Ostrava W',
'Presov W',
'Slavia Prague W',
'Dicken',
'Elbflorenz',
'Lubeck-Schwartau',
'HK Ogre/Miandum',
'Stal Mielec',
'MKS Perla Lublin W',
'Koscierzyna W',
'CS Madeira W',
'CSM Focsani',
'CSM Bucuresti',
'Constanta',
'Iasi',
'Suceava',
'Timisoara',
'Saratov',
'Alisa Ufa W',
'Pozarevac',
'Nove Zamky',
'Aranas',
'Ricoh',
'H 65 Hoor W',
'Lugi W',
'Strands W']
temp_list_column2=[['32', '16', '16'],
['32', '16', '16'],
['27', '13', '14'],
['23', '9', '14'],
['29', '14', '15'],
['24', '17', '7'],
['30', '15', '15'],
['26', '12', '14'],
['27', '13', '14'],
['26'],
['18', '9', '9'],
['34', '15', '19'],
['30', '13', '17'],
['31', '13', '18'],
['27', '10', '17'],
['28', '14', '14'],
['24', '14', '10'],
['28', '12', '16'],
['28', '9', '19'],
['22', '13', '9'],
['30', '14', '16'],
['22', '14', '8'],
['17', '8', '9'],
['26'],
['41', '21', '20'],
['36', '18', '18'],
['10'],
['25', '12', '13'],
['27', '16', '11'],
['31', '15', '16'],
['25', '15', '10'],
['24', '8', '16'],
['28', '14', '14'],
['24', '13', '11'],
['26', '14', '12'],
['33', '17', '16'],
['26', '12', '14'],
['17', '12', '5']]
import numpy as np
temp_array = np.column_stack((temp_list_column1,temp_list_column2))
output
array([['St. Raphael', ['32', '16', '16']],
['Goppingen', ['32', '16', '16']],
['HSG Wetzlar', ['27', '13', '14']],
['Huttenberg', ['23', '9', '14']],
['Kiel', ['29', '14', '15']],
['Stuttgart', ['24', '17', '7']],
['Izvidac', ['30', '15', '15']],
['Viborg W', ['26', '12', '14']],
['Silkeborg-Voel W', ['27', '13', '14']],
['Bjerringbro W', ['26']],
['Lyngby W', ['18', '9', '9']],
['Most W', ['34', '15', '19']],
['Ostrava W', ['30', '13', '17']],
['Presov W', ['31', '13', '18']],
['Slavia Prague W', ['27', '10', '17']],
['Dicken', ['28', '14', '14']],
['Elbflorenz', ['24', '14', '10']],
['Lubeck-Schwartau', ['28', '12', '16']],
['HK Ogre/Miandum', ['28', '9', '19']],
['Stal Mielec', ['22', '13', '9']],
['MKS Perla Lublin W', ['30', '14', '16']],
['Koscierzyna W', ['22', '14', '8']],
['CS Madeira W', ['17', '8', '9']],
['CSM Focsani', ['26']],
['CSM Bucuresti', ['41', '21', '20']],
['Constanta', ['36', '18', '18']],
['Iasi', ['10']],
['Suceava', ['25', '12', '13']],
['Timisoara', ['27', '16', '11']],
['Saratov', ['31', '15', '16']],
['Alisa Ufa W', ['25', '15', '10']],
['Pozarevac', ['24', '8', '16']],
['Nove Zamky', ['28', '14', '14']],
['Aranas', ['24', '13', '11']],
['Ricoh', ['26', '14', '12']],
['H 65 Hoor W', ['33', '17', '16']],
['Lugi W', ['26', '12', '14']],
['Strands W', ['17', '12', '5']]], dtype=object)
Example 2
temp_list_column1b=['Benidorm',
'Alpla Hard',
'Dubrava',
'Frydek-Mistek',
'Karvina',
'Koprivnice',
'Nove Veseli',
'Vardar',
'Meble Elblag Wojcik',
'Zaglebie',
'Benfica',
'Barros W',
'Juvelis W',
'Assomada W',
'UOR No.2 Moscow',
'Izhevsk W',
'Stavropol W',
'Din. Volgograd W',
'Zvenigorod W',
'Adyif W',
'Crvena zvezda',
'Ribnica',
'Slovan',
'Jeruzalem Ormoz',
'Karlskrona',
'Torslanda W']
temp_list_column2b=[['28', '14', '14'],
['27', '12', '15'],
['24', '13', '11'],
['24', '14', '10'],
['28', '17', '11'],
['30', '16', '14'],
['26', '15', '11'],
['38', '18', '20'],
['24', '13', '11'],
['33', '15', '18'],
['24', '10', '14'],
['18', '11', '7'],
['22', '9', '13'],
['25', '12', '13'],
['19', '11', '8'],
['24', '10', '14'],
['21', '9', '12'],
['18', '10', '8'],
['31', '17', '14'],
['29', '15', '14'],
['26', '14', '12'],
['29', '12', '17'],
['25', '11', '14'],
['33', '19', '14'],
['32', '14', '18'],
['19', '12', '7']]
import numpy as np
temp_arrayb = np.column_stack((temp_list_column1b,temp_list_column2b))
output
array([['Benidorm', '28', '14', '14'],
['Alpla Hard', '27', '12', '15'],
['Dubrava', '24', '13', '11'],
['Frydek-Mistek', '24', '14', '10'],
['Karvina', '28', '17', '11'],
['Koprivnice', '30', '16', '14'],
['Nove Veseli', '26', '15', '11'],
['Vardar', '38', '18', '20'],
['Meble Elblag Wojcik', '24', '13', '11'],
['Zaglebie', '33', '15', '18'],
['Benfica', '24', '10', '14'],
['Barros W', '18', '11', '7'],
['Juvelis W', '22', '9', '13'],
['Assomada W', '25', '12', '13'],
['UOR No.2 Moscow', '19', '11', '8'],
['Izhevsk W', '24', '10', '14'],
['Stavropol W', '21', '9', '12'],
['Din. Volgograd W', '18', '10', '8'],
['Zvenigorod W', '31', '17', '14'],
['Adyif W', '29', '15', '14'],
['Crvena zvezda', '26', '14', '12'],
['Ribnica', '29', '12', '17'],
['Slovan', '25', '11', '14'],
['Jeruzalem Ormoz', '33', '19', '14'],
['Karlskrona', '32', '14', '18'],
['Torslanda W', '19', '12', '7']],
dtype='<U19')
In the first case, shape is (38, 2), while in the second is (26, 4) (i am interested in the number of columns only). Am I missing something obvious?
Your problem here seems to be that the first B list is jagged, while your second is rectangular.
Look at the difference in how Numpy converts the following two lists into Arrays (which, as #hpaulj points out, is exactly what happens when you pass them to column_stack:
In [1]: b1 = [
...: [1,2,3],
...: [2,3,4],
...: [3,4,5],
...: [4,5,6]]
In [2]: np.array(b1)
Out[2]:
array([[1, 2, 3],
[2, 3, 4],
[3, 4, 5],
[4, 5, 6]])
In [3]: b2 = [
...: [1,2,3],
...: [2,3],
...: [3]]
In [4]: np.array(b2)
Out[4]: array([list([1, 2, 3]), list([2, 3]), list([3])], dtype=object)
Thus, when column stacking your example lists, in the first case you have a 1D array of lists that gets converted into a single column, whereas in the second case you have a 2D matrix of numbers that has 3 columns.
You should probably just not even be using Numpy's column_stack in this case, just zip the two lists together. If you want a numpy array as your final result, just np.array(list(zip(list_a, list_b)))
EDIT: In retrospect, your data structure sounds more like what's typically referred to as a DataFrame, rather than a matrix which is what Numpy is trying to give you.
import pandas as pd
data = pd.DataFrame()
data['name'] = temp_list_column1
data['numbers'] = test_list_column2
# Or
data = pd.DataFrame(list(zip(temp_list_column1, temp_list_column2)), columns=['name', 'numbers'])
Which gives you a data structure that looks like:
name numbers
0 John [1, 2, 3]
1 James [2, 3, 4]
2 Peter [3, 4, 5]
3 Paul [4, 5, 6]
Diagnosis
It seems like the issue is for the 2nd example, all the sublists has 3 elements while in the first example there are sublists with length 1 e.g. ['Bjerringbro W', ['26']]; the list ['26'] has only one element.
In the second case apparently np.column_stack forces to NOT HAVE lists as a cell element. In fact, we can have another discussion about why you want to see lists as cell elements which I will not go through here. Here is the solution
Special Case Solution
I assume you don't mind using pandas
import pandas as pd
series_1 = pd.Series(temp_list_column1b).to_frame(name='col1') # name it whatever you want
series_2 = pd.Series(temp_list_column2b).to_frame(name='col2') # name it whatever you want
df = pd.concat([series_1, series_2], axis=1)
# print(df) # view in pandas form
# print(df.values) # to see how it looks like as a numpy array
# print(df.values.shape) # to see how what the shape is in terms of numpy
Generalized Solution
Assuming you have a list of such columns which is called "list_of_cols". Then:
import pandas as pd
'''
list_of_cols: all the lists you want to combine
'''
df = pd.concat([pd.Series(temp_col).to_frame() for temp_col in list_of_cols], axis=1)
I hope this helps!
Illinois: ['13', '12', '18', '23', '26', '25', '24', '19', '13', '10', '15', '14', '14', '4', '3']
Indiana: ['7', '6', '7', '8', '11', '11', '13', '12', '7', '7', '7', '7', '9', '2', '2']
Those are in my dictionary as d.
How would I get the largest and smallest value in each key in the dictionary and get the index where's the value is.
For example:
In Illinois, 26 is the largest value which is index 5 and 3 is the smallest value which is index 15.
in Indiana: 13 is largest value which is index 7 and 2 is the smallest value which is index 14
The output:
Illinois: 26 in index 5 and 3 in index 15
Indiana: 13 in index 7 and 2 in index 14
How would I do this?
d = {}
for row in csv_f:
d[row[0]]=row[1:]
You can get the max and mins printed out as your string is like this:
(assuming you only want the first occurrence)
MY_D = {'Illinois': ['13', '12', '18', '23', '26', '25', '24', '19', '13', '10', '15', '14', '14', '4', '3'],
'Indiana': ['7', '6', '7', '8', '11', '11', '13', '12', '7', '7', '7', '7', '9', '2', '2']}
for k,v in MY_D.items():
#This assumes that everything in v is an int, or rather can be converted to one.
my_l = [int(n) for n in v]
#if not
#my_l = [int(n) for n in v if n.isdigit()]
_max, _min = max(my_l), min(my_l)
print("%s: Min - %d in index %d, Max - %d in index %d" % (k, _min, my_l.index(_min), _max, my_l.index(_max)))
Here is a solution returning a dict {country: (maxval, index), (minval, index))}:
d = {
'Illinois': ['13', '12', '18', '23', '26', '25', '24', '19', '13', '10', '15', '14', '14', '4', '3'],
'Indiana': ['7', '6', '7', '8', '11', '11', '13', '12', '7', '7', '7', '7', '9', '2', '2']
}
maxmin = {}
for state, numbers in d.items():
maxmin[state] = (
max(enumerate(numbers), key=lambda x: int(x[1])),
min(enumerate(numbers), key=lambda x: int(x[1]))
)
print(maxmin)
Bit thrown together, but seems to do the job.
d = {"Illinois": ['13', '12', '18', '23', '26', '25', '24', '19', '13', '10', '15', '14', '14', '4', '3'],
"Indiana": ['7', '6', '7', '8', '11', '11', '13', '12', '7', '7', '7', '7', '9', '2', '2']}
if __name__ == "__main__":
print d
for state in d:
# returns the numbers with their index (#, index)
pairs = [(int(d[state][x]), x) for x in xrange(len(d[state]))]
minpair = min(pairs)
maxpair = max(pairs)
print "%s: %d in index %d and %d in index %d"%(state,maxpair[0],maxpair[1],
minpair[0],minpair[1])
Output:
{'Indiana': ['7', '6', '7', '8', '11', '11', '13', '12', '7', '7', '7', '7', '9', '2', '2'], 'Illinois': ['13', '12', '18', '23', '26', '25', '24', '19', '13', '10', '15', '14', '14', '4', '3']}
Indiana: 13 in index 6 and 2 in index 13
Illinois: 26 in index 4 and 3 in index 14
to get around the blank string, you could break up the list comprehension into
pairs = []
for x in xrange(len(d[state])):
try:
pairs.append( (int(d[state][x]), x) )
except ValueError:
pass # not a valid number