Multiplying a specific column in txt file with constant number - python

I have txt file with 7 column...I want to mutiply a 3rd column with a constant number keeping all other column same and then output the file containing all the columns. Anyone can help?
1 2 1
2 2 1
3 2 1
mutiplying column 3 with "14" the output should be like
1 2 14
2 2 14
3 2 14

While you have a text file with 7 columns, your example only shows 3.
So I have based my answer on your example:
The important part of code related to multiplication is this:
matrix[:,(target_col-1)] *= c_val
Here is the full PYTHON code:
import numpy as np
# Constant value (used for multiplication)
c_val = 14
# Number of columns in the matrix
n_col = 3
# Column to be multiplied (ie. Third column)
target_col = 3
# Import the text file containing the matrix
filename = 'data.txt'
matrix = np.loadtxt(filename, usecols=range(n_col))
# Multiply the target column (ie. 3rd column) by c_val (ie.14)
matrix[:,(target_col-1)] *= c_val
# Save the matrix to a new text file
with open('new_text_file.txt','wb') as f:
np.savetxt(f, matrix, delimiter=' ', fmt='%d')
OUTPUT:
new_text_file.txt
1 2 14
2 2 14
3 2 14

This is a possible solution for C++17.
If you are sure about the format of the input file, you could reduce the code to the one below:
Just walk the input stream, multiply every 3rd number by a constant, and add a new line for every 10th number (you mentioned 7 numbers per line but then your example contained 9 numbers).
Notice you would need to use file streams instead of string streams.
#include <fmt/core.h>
#include <sstream> // istringstream, ostringstream
void parse_iss(std::istringstream& iss, std::ostringstream& oss, int k) {
for (int number_counter{ 0 }, number; iss >> number; ++number_counter) {
oss << ((number_counter % 3 == 2) ? number*k : number);
oss << ((number_counter % 9 == 8) ? "\n" : " ");
}
}
int main() {
std::istringstream iss{
"1 2 1 2 2 1 3 2 1\n"
"2 4 2 4 4 5 5 5 6\n"
};
std::ostringstream oss{};
parse_iss(iss, oss, 14);
fmt::print("{}", oss.str());
}
// Outputs:
//
// 1 2 14 2 2 14 3 2 14
// 2 4 28 4 4 70 5 5 84
[Demo]

Can be done as below:
MULTIPLIER = 14
input_file_name = "numbers_in.txt"
output_file_name = "numbers_out.txt"
with open(input_file_name, 'r') as f:
lines = f.readlines()
with open(output_file_name, 'w+') as f:
for line in lines:
new_line = ""
for i, x in enumerate(line.strip().split(" ")):
if (i+1)%3 == 0:
new_line += str(int(x)*MULTIPLIER) + " "
else:
new_line += x + " "
f.writelines(new_line + "\n")
# numbers_in.txt:
# 1 2 1 2 2 1 3 2 1
# 1 3 1 3 3 1 4 3 1
# 1 4 1 4 4 1 5 4 1
# numbers_out.txt:
# 1 2 14 2 2 14 3 2 14
# 1 3 14 3 3 14 4 3 14
# 1 4 14 4 4 14 5 4 14

Related

Python Welsh-Powell graph coloring

I have implemented algorithm for Welsh-Powell graph coloring.
Task is that you have txt document with lines of 2 numbers, in which the first one represent vertex and the second one represent its neighbor.
Looks like this:
1 2
1 4
1 5
1 8
1 9
1 11
2 3
2 4
2 5
2 6
2 7
2 8
2 10
2 11
2 12
3 4
3 5
3 6
3 7
3 8
3 9
3 11
4 6
4 9
4 10
4 11
4 12
5 6
5 7
5 10
5 11
5 12
6 7
6 10
6 11
6 12
7 8
7 10
7 11
7 12
8 10
8 11
8 12
9 10
9 12
10 12
Output should be also txt document with two numbers in line, where the first one is vertex and the second one is number of color like this:
1 2
2 1
3 2
4 3
5 3
6 4
7 5
8 3
9 1
10 2
11 6
12 6
The number of used colors is not important, as it shouldnt be more than all vertexes.
This is graphColoring.py
def file_to_graph(filename):
file1 = open(filename, 'r')
lines = file1.readlines()
graph = [[], []]
for line in lines:
if line == '':
continue
row = line.split(' ')
from_vertex = int(row[0])
to_vertex = int(row[1])
found_from_vertex = -1
found_to_vertex = -1
for i in range(len(graph[0])):
if graph[0][i][0] == from_vertex:
graph[0][i][1] += 1
found_from_vertex = i
if graph[0][i][0] == to_vertex:
graph[0][i][1] += 1
found_to_vertex = i
if found_from_vertex == -1:
graph[0].append([from_vertex, 1])
if found_to_vertex == -1:
graph[0].append([to_vertex, 1])
graph[1].append((from_vertex, to_vertex))
return graph
def get_color(colored_vertexes, vertex):
for colored_vertex in colored_vertexes:
if colored_vertex[0] == vertex:
return colored_vertex[1]
return -1
def color_graph(graph, max_colors=-1):
sorted_vertexes = sorted(graph[0], key=lambda x: x[1])
colored_vertexes = []
for vertex_with_edges in reversed(sorted_vertexes):
colored_vertexes.append([vertex_with_edges[0], 0])
actual_color = 1
while True:
for vertex_with_edge_count in reversed(sorted_vertexes):
color = 0
colored_vertex_index = -1
vertex_name = vertex_with_edge_count[0]
for i in range(len(colored_vertexes)):
if colored_vertexes[i][0] == vertex_name:
color = colored_vertexes[i][1]
colored_vertex_index = i
break
if color != 0:
continue
color_of_neighbours = []
for (from_vertex, to_vertex) in graph[1]:
if from_vertex == vertex_name:
color_of_neighbours.append(get_color(colored_vertexes, to_vertex))
if to_vertex == vertex_name:
color_of_neighbours.append(get_color(colored_vertexes, from_vertex))
if actual_color not in color_of_neighbours:
colored_vertexes[colored_vertex_index][1] = actual_color
actual_color += 1
is_colored = True
for colored_vertex in colored_vertexes:
if colored_vertex[1] == 0:
is_colored = False
break
if is_colored:
break
return colored_vertexes
And this is main.py
from graphColoring import file_to_graph, color_graph
import sys
if __name__ == '__main__':
graph = file_to_graph(sys.argv[1])
colored_vertexes = color_graph(graph)
for colored_vertex in sorted(colored_vertexes, key=lambda x: x[0]):
print(str(colored_vertex[0]) + " " + str(colored_vertex[1]))
EDIT//: I received a solution for problem with input, but I dont have expected output. Instead of expected output above, i received this:
1 2
2 1
3 2
4 4
5 6
6 5
7 4
8 5
9 1
10 3
11 3
12 2
//
I should use "python main.py ./input.txt" to input txt document, but after running the program it says:
line 7, in <module>
graph = file_to_graph(sys.argv[1])
~~~~~~~~^^^
IndexError: list index out of range
I˙m not so good at programming and all of this is really hard for me to understand so I really appreciate some help to make this functional.
For me it is working. And it also gives your desired output
1 1
2 2
3 1
4 2
I think it is about how you run the script. Because when I run
python .\main.py I also get
Traceback (most recent call last):
File "C:\Users\fhdwnig\Desktop\main.py", line 5, in <module>
graph = file_to_graph(sys.argv[1])
~~~~~~~~^^^
you have to run it like this:
python .\main.py .\input.txt
(on windows)
The script is missing the input file.
change this line
sorted_vertexes = sorted(graph[0], key=lambda x: x[1])
to this one
sorted_vertexes = sorted(sorted(graph[0][::-1],key=lambda x:x[0], reverse=True), key=lambda x: x[1])

Python Get values to variable only between 1 to 255

data = """
abcd1 1
abcd2 2
abcd3 3
abcd4 4
abcd5 5
abcd6 6
abcd7 7
abcd8 8
abcd9 9
.
.
.
abcd256 1
abcd257 2
abcd258 3
abcd259 4
abcd260 5
abcd261 6
abcd262 7
abcd263 8
abcd264 9
"""
if abcd1, then Get value 1,
if abcd2, then Get value 2,...so on
if abcd256, then Get value 1,
if abcd257, then Get value 2,
Condition value must be in 1 to 255
Check string already exist in data variable. I have used below code:
check = set()
for line in data.split("\n"):
if len(line.split()) > 1:
line = line.strip()
check.add(line.split()[0])
if not "abcd264" in check:
print "Not exist":
value = 9#Help required to get value here
else:
print "Its already exist. Program exit"
sys.exit()
Suggested using Pandas in other post, But I need to implement without using Pandas
If you wish to do it in pure Python, you can try doing it this way:
data = """
abcd1 1
abcd2 2
abcd3 3
abcd4 4
abcd5 5
abcd6 6
abcd7 7
abcd8 8
abcd9 9
abcd256 1
abcd257 2
abcd258 3
abcd259 4
abcd260 5
abcd261 6
abcd262 7
abcd263 8
abcd264 9
"""
data = data.replace(" ","").replace(" "," ").split("\n")[1:-1]
for d in data:
number = int(d.split()[0][4:])
print("For number %d the result is: %d" % (number,number % 255))
Output:
For number 1 the result is: 1
For number 2 the result is: 2
For number 3 the result is: 3
For number 4 the result is: 4
For number 5 the result is: 5
For number 6 the result is: 6
For number 7 the result is: 7
For number 8 the result is: 8
For number 9 the result is: 9
For number 256 the result is: 1
For number 257 the result is: 2
For number 258 the result is: 3
For number 259 the result is: 4
For number 260 the result is: 5
For number 261 the result is: 6
For number 262 the result is: 7
For number 263 the result is: 8
For number 264 the result is: 9
You are better off vectorising your manipulations via a library such as pandas. Here is an example:
from io import StringIO
import pandas as pd
mystr = """abcd1 1
abcd2 2
abcd3 3
abcd4 4
abcd5 5
abcd6 6
abcd7 7
abcd8 8
abcd9 9
"""
df = pd.read_csv(StringIO(mystr), delim_whitespace=True, header=None)
df['idx'] = df[0].str[4:].astype(int)
res = set(df.loc[df['idx'] <= 5, 1])
# {1, 2, 3, 4, 5}

Listing distributions in Python

I have a sheet of numbers, separated by spaces into columns. Each column represents a different category, and within each column, each number represents a different value. For example, column number four represents age, and within the column, the number 5 represents an age of 44-55. Obviously, each row is a different person's record. I'd like to use a Python script to search through the the sheet, and find all columns where the sixth column is number "1." After that, I want to know how many times each number in column one appears where the number in column six is equal to "1." The script should output to the user that "While column six equals '1', the value '1' appears 12 times in column one. The value '2' appears 18 times..." etc. I hope I'm being clear here. I just want it to list the numbers, basically. Anyway, I'm new to Python. I've attached my code below. I think I should be using dictionaries, but I'm just not totally sure how. So far, I haven't really come close to figuring this out. I would really appreciate if someone could walk me through the logic that would be behind such code. Thank you so much!
ldata = open("list.data", "r")
income_dist = {}
for line in ldata:
linelist = line.strip().split(" ")
key_income_dist = linelist[6]
if key_income_dist in income_dist:
income_dist[key_income_dist] = 1 + income_dist[key_income_dist]
else:
income_dist[key_income_dist] = 1
ldata.close()
print value_no_occupations
First, indentation is majorly important in Python and the above is bad: the 5 lines following linelist = line.strip().split(" ") need to be indented to be in the loop like they should be.
Next they should be indented further and this line added before them:
if len(linelist)>6 and linelist[6]=="1":
This line skips over short lines (there are some), and tests for what you said you wanted: "where column six equals "1."" This is column [6] where the first number on the line is referenced as [0] (these are "offsets", not "cardinal", or counting, numbers).
You'll probably want to change key_income_dist = linelist[6] to key_income_dist = linelist[0] or [1] to get what you want. Play around if necessary.
Finally, you should say print income_dist at the end to get a look at your results. If you want fancier output, study up on formatting.
This is actually easier than it seems! The key is collections.Counter
from collections import Counter
ldata = open("list.data")
rows = [tuple(row.split()) for row in ldata if row.split()[5]==1]
# warning this will break if some rows are shorter than 6 columns
first_col = Counter(item[0] for item in rows)
If you want the distribution of every column (not just the first) do:
distribution = {column: Counter(item[column] for item in rows) for column in range(len(rows[0]))}
# warning this will break if all rows are not the same size!
Considering that the data file has ~9000 rows of data, if you don't want to keep the original data, you can combine step 1 & 2 to make the program use less memory and a little faster.
ldata = open("list.data", "r")
# read in all the rows, note that the list values are strings instead of integers
# keep only the rows with 6th column = '1'
only1 = []
for line in ldata:
if line.strip() == '': # ignor blank lines
continue
row = tuple(line.strip().split(" "))
if row[5] == '1':
only1.append(row)
ldata.close()
# tally the statistics
income_dist = {}
for row in only1:
if row[0] in income_dist:
income_dist[row[0]] += 1
else:
income_dist[row[0]] = 1
# print result
print "While column six equals '1',"
for num in sorted(income_dist):
print "the value %s appears %d times in column one." % (num, income_dist[num])
Sample Test Data in list.data:
9 2 1 5 4 5 5 3 3 0 1 1 7 NA
9 1 1 5 5 5 5 3 5 2 1 1 7 1
9 2 1 3 5 1 5 2 3 1 2 3 7 1
1 2 5 1 2 6 5 1 4 2 3 1 7 1
1 2 5 1 2 6 3 1 4 2 3 1 7 1
8 1 1 6 4 8 5 3 2 0 1 1 7 1
1 1 5 2 3 9 4 1 3 1 2 3 7 1
6 1 3 3 4 1 5 1 1 0 2 3 7 1
2 1 1 6 3 8 5 3 3 0 2 3 7 1
4 1 1 7 4 8 4 3 2 0 2 3 7 1
1 1 5 2 4 1 5 1 1 0 2 3 7 1
4 2 2 2 3 2 5 1 2 0 1 1 5 1
8 2 1 3 6 6 2 2 4 2 1 1 7 1
7 2 1 5 3 5 5 3 4 0 2 1 7 1
1 1 5 2 3 9 4 1 3 1 2 3 7 1
6 1 3 3 4 1 5 1 1 0 2 3 7 1
2 1 1 6 3 8 5 3 3 0 2 3 7 1
4 1 1 7 4 8 4 3 2 0 2 3 7 1
1 1 5 2 4 9 5 1 1 0 2 3 7 1
4 2 2 2 3 2 5 1 2 0 1 1 5 1
Following your original program logic, I come up with this version:
ldata = open("list.data", "r")
# read in all the rows, note that the list values are strings instead of integers
linelist = []
for line in ldata:
linelist.append(tuple(line.strip().split(" ")))
ldata.close()
# keep only the rows with 6th column = '1'
only1 = []
for row in linelist:
if row[5] == '1':
only1.append(row)
# tally the statistics
income_dist = {}
for row in only1:
if row[0] in income_dist:
income_dist[row[0]] += 1
else:
income_dist[row[0]] = 1
# print result
print "While column six equals '1',"
for num in sorted(income_dist):
print "the value %s appears %d times in column one." % (num, income_dist[num])

Number Pyramid Nested for Loop

I'm wondering if you could help me out. I'm trying to write a nested for loop in Python 3 that displays a number pyramid that looks like;
1
1 2 1
1 2 4 2 1
1 2 4 8 4 2 1
1 2 4 8 16 8 4 2 1
1 2 4 8 16 32 16 8 4 2 1
1 2 4 8 16 32 64 32 16 8 4 2 1
1 2 4 8 16 32 64 128 64 32 16 8 4 2 1
Can anybody help me out? It would be much appreciated!
This is what I have so far:
col = 1
for i in range(-1, 18, col*2):
for j in range(1, 0, 1):
print(" ", end = "")
for j in range(i, 0, -2):
print(j, end = " ")
print()
So, I can only get half of the pyramid to display.
I guess the main problems I'm having is:
How do i get the output to display an increasing and then decreasing value (ie. 1, 2, 4, 2, 1)?
An alternate way using list comprehensions.
Always break the problem down into digestable chunks. Each line is a mirror of itself, so lets just deal with first making out set of numbers we need.
This generates a list of strings that hold all powers of two which is what this is generating
lines = []
for i in range(1,9):
lines.append([str(2**j) for j in range(i)])
But if we just print this list, a) its going to only have half, and b) its going to mush the numbers together. We need to buffer the numbers with spaces. Fortunately, the last row will have the largest digits for any column, so:
Firstly, how long does each line need to end up being (we need this later) and also, what is the longest number in each column. We can use len as we cast the numbers to strings above.
b = len(lines[-1])
buffers = [len(x) for x in lines[-1]]
Now I have everything I need to print the strings (we stopped using numbers above):
So, for each line, find out how long it is, and expand the array it to the length of the longest line by filling the left of the array with empty strings (for this we're still pretending we're only printing the left half of the triangle):
for line in lines:
l = len(line)
line = [" "]*(b-len(line)) + line
With each line now buffered, we'll make a new array that we will print from. By zip()ing together the line and the buffer, we can easily right justify (String.rjust()) numberic strings, expanded out to the length required.
out = []
for x,y in zip(line,buffers):
out.append(x.rjust(y))
Remmeber until now, we've still just been working with the left half of the pyramid. So we take the output array, reverse it (array[::-1]) and then take every element but the first (array[1:]) and join it all together with a string and print it out.
print(" ".join(out+out[::-1][1:]))
Voila! The completed code:
lines = []
for i in range(1,9):
lines.append([str(2**j) for j in range(i)])
b = len(lines[-1])
buffers = [len(x) for x in lines[-1]]
for line in lines:
l = len(line)
line = [" "]*(b-len(line)) + line
out = []
for x,y in zip(line,buffers):
out.append(x.rjust(y))
print(" ".join(out+out[::-1][1:]))
Output:
1
1 2 1
1 2 4 2 1
1 2 4 8 4 2 1
1 2 4 8 16 8 4 2 1
1 2 4 8 16 32 16 8 4 2 1
1 2 4 8 16 32 64 32 16 8 4 2 1
1 2 4 8 16 32 64 128 64 32 16 8 4 2 1
height = 8
maxHeight = height - 1
for i in range(height):
k, Max = 1, i * 2 + 1
print(maxHeight * " ", end="")
maxHeight -= 1
for j in range(Max):
print("%5d" % k, end="")
if (j < (Max // 2)):
k *= 2
else:
k //= 2
print()
Output:
1
1 2 1
1 2 4 2 1
1 2 4 8 4 2 1
1 2 4 8 16 8 4 2 1
1 2 4 8 16 32 16 8 4 2 1
1 2 4 8 16 32 64 32 16 8 4 2 1
1 2 4 8 16 32 64 128 64 32 16 8 4 2 1
This could be the other 9 line solution.
Generate power of two's numbers as series
Find the offset need to add in each rows
Print the empty space for the each row before printing the palindromic list.
Ie. (offset * (n - i)) times " "(empty space)
Build palindromic series by slice operation ie. temp + temp[::-1][1:]
Print the palindromic series and offset spaces relative to the length of the number you are printing.
Code:
n = 8
numbers = [2**x for x in range(n)] # Generate interseted series.
offset = len(str(numbers[-1:])) -1 # Find the max offset for the tree.
for i in range(1, n+1): # Iterate n times. 1 to n+1 helps eazy slicing.
temp = numbers[:i] # Slice series to get first row numbers.
print(' ' * (offset * (n - i)), end=" ") # Prefix spaces, multiples of offset.
for num in temp + temp[::-1][1:]: # Generate palindromic series for the row.
print(num, end=" " * (offset - len(str(num)))) # Adjust offset for the number.
print('')
output:
1
1 2 1
1 2 4 2 1
1 2 4 8 4 2 1
1 2 4 8 16 8 4 2 1
1 2 4 8 16 32 16 8 4 2 1
1 2 4 8 16 32 64 32 16 8 4 2 1
1 2 4 8 16 32 64 128 64 32 16 8 4 2 1

Grouping list of integers in a range into chunks

Given a set or a list (assume its ordered)
myset = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
I want to find out how many numbers appear in a range.
say my range is 10. Then given the list above, I have two sets of 10.
I want the function to return [10,10]
if my range was 15. Then I should get [15,5]
The range will change. Here is what I came up with
myRange = 10
start = 1
current = start
next = current + myRange
count = 0
setTotal = []
for i in myset:
if i >= current and i < next :
count = count + 1
print str(i)+" in "+str(len(setTotal)+1)
else:
current = current + myRange
next = myRange + current
if next >= myset[-1]:
next = myset[-1]
setTotal.append(count)
count = 0
print setTotal
Output
1 in 1
2 in 1
3 in 1
4 in 1
5 in 1
6 in 1
7 in 1
8 in 1
9 in 1
10 in 1
12 in 2
13 in 2
14 in 2
15 in 2
16 in 2
17 in 2
18 in 2
19 in 2
[10, 8]
notice 11 and 20 where skipped. I also played around with the condition and got wired results.
EDIT: Range defines a range that every value in the range should be counted into one chuck.
think of a range as from current value to currentvalue+range as one chunk.
EDIT:
Wanted output:
1 in 1
2 in 1
3 in 1
4 in 1
5 in 1
6 in 1
7 in 1
8 in 1
9 in 1
10 in 1
11 in 2
12 in 2
13 in 2
14 in 2
15 in 2
16 in 2
17 in 2
18 in 2
19 in 2
[10, 10]
With the right key function, thegroupbymethod in the itertoolsmodule makes doing this fairly simple:
from itertools import groupby
def ranger(values, range_size):
def keyfunc(n):
key = n/(range_size+1) + 1
print '{} in {}'.format(n, key)
return key
return [len(list(g)) for k, g in groupby(values, key=keyfunc)]
myset = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
print ranger(myset, 10)
print ranger(myset, 15)
You want to use simple division and the remainder; the divmod() function gives you both:
def chunks(lst, size):
count, remainder = divmod(len(lst), size)
return [size] * count + ([remainder] if remainder else [])
To create your desired output, then use the output of chunks():
lst = range(1, 21)
size = 10
start = 0
for count, chunk in enumerate(chunks(lst, size), 1):
for i in lst[start:start + chunk]:
print '{} in {}'.format(i, count)
start += chunk
count is the number of the current chunk (starting at 1; python uses 0-based indexing normally).
This prints:
1 in 1
2 in 1
3 in 1
4 in 1
5 in 1
6 in 1
7 in 1
8 in 1
9 in 1
10 in 1
11 in 2
12 in 2
13 in 2
14 in 2
15 in 2
16 in 2
17 in 2
18 in 2
19 in 2
20 in 2
If you don't care about what numbers are in a given chunk, you can calculate the size easily:
def chunk_sizes(lst, size):
complete = len(lst) // size # Number of `size`-sized chunks
partial = len(lst) % size # Last chunk
if partial: # Sometimes the last chunk is empty
return [size] * complete + [partial]
else:
return [size] * complete

Categories

Resources