Using text files for word search python

Using text files for word search python - python

I want to create a word search generator but I'm finding it a little hard. Ive looked online and found this word search generator which works well but i would like to use my own grid from a text file. Is there way I can code this and place my text file instead of random choice letters.
##while True:
## intext=input('Enter file name: ')
##
## if not intext=='grid.txt':
## print("No such file was found! Enter file name.")
## continue
## else:
##
## break
##grid= open(intext,'r')
import string
import random
width = 12
height = 12
def put_word(word,grid):
word = random.choice([word,word[::-1]])
d = random.choice([[1,0],[0,1],[1,1]])
xsize = width if d[0] == 0 else width - len(word)
ysize = height if d[1] == 0 else height - len(word)
x = random.randrange(0,xsize)
y = random.randrange(0,ysize)
print([x,y])
for i in range(0,len(word)):
grid[y + d[1]*i][x + d[0]*i] = word[i]
return grid
grid = [[random.choice(string.ascii_uppercase) for i in range(0,width)] for j in range(0,height)]
for word in ["HELLO", "THERE", "AGAIN"]:
grid = put_word(word, grid)
print("\n".join(map(lambda row: " ".join(row), grid))
in the grid.txt files are:
xmfycxvtljlqbbybkoumjqwbt
caubmeknbeydqmcnzyjpvrank
aqactivexnyvwdvcoshoyaohg
paghzkctudptjdphsztprhttl
sbsnakjwqbouftmgnjqbtlinu
tsewohvobdsduqjiffkyylodo
oukwwefroyamapmlrrpvdolop
cqkfxtlksjvtmtrsbycmqrrri
kfervlqidqaxaoanfqjlmcpjh
yoyywrbpfcjlfbcbbcoecspwl
twbxetyuyufvvmauawjmbwlqh
txokdexmdbtgvhpsvsqtmljdx
dcatenrehteoxqdgeueljtrrn
jarteqvtxejfsqddkbuhcysfq
hpdrowssapxtrxhpdxcdhicon

Related

How to check if a string contains a specific character or not in python

I am new to python, but fairly experienced in programming. While learning python I was trying to create a simple function that would read words in from a text file (each line in the text file is a new word) and then check if the each word has the letter 'e' or not. The program should then count the amount of words that don't have the letter 'e' and use that amount to calculate the percentage of words that don't have an 'e' in the text file.
I am running into a problem where I'm very certain that my code is right, but after testing the output it is wrong. Please help!
Here is the code:
def has_n_e(w):
hasE = False
for c in w:
if c == 'e':
hasE = True
return hasE
f = open("crossword.txt","r")
count = 0
for x in f:
word = f.readline()
res = has_n_e(word)
if res == False:
count = count + 1
iAns = (count/113809)*100 //113809 is the amount of words in the text file
print (count)
rAns = round(iAns,2)
sAns = str(rAns)
fAns = sAns + "%"
print(fAns)

Here is the code after doing some changes that may help:
def has_n_e(w):
hasE = False
for c in w:
if c == 'e':
hasE = True
return hasE
f = open("crossword.txt","r").readlines()
count = 0
for x in f:
word = x[:-1]
res = has_n_e(word)# you can use ('e' in word) instead of the function
if res == False:
count = count + 1
iAns = (count/len(f))*100 //len(f) #is the amount of words in the text file
print (count)
rAns = round(iAns,2)
sAns = str(rAns)
fAns = sAns + "%"
print(fAns)
Hope this will help

Check if string is exactly the same as line in file

I've been writing a Countdown program in Python, and in it. I've written this:
#Letters Game
global vowels, consonants
from random import choice, uniform
from time import sleep
from itertools import permutations
startLetter = ""
words = []
def check(word, startLetter):
fileName = startLetter + ".txt"
datafile = open(fileName)
for line in datafile:
print("Checking if", word, "is", line.lower())
if word == line.lower():
return True
return False
def generateLetters():
lettersLeft = 9
output = []
while lettersLeft >= 1:
lType = input("Vowel or consonant? (v/c)")
sleep(uniform(0.5, 1.5))
if lType not in ("v", "c"):
print("Please input v or c")
continue
elif lType == "v":
letter = choice(vowels)
print("Rachel has picked an", letter)
vowels.remove(letter)
output.append(letter)
elif lType == "c":
letter = choice(consonants)
print("Rachel has picked a", letter)
consonants.remove(letter)
output.append(letter)
print("Letters so far:", output)
lettersLeft -= 1
return output
def possibleWords(letters, words):
for i in range(1,9):
print(letters)
print(i)
for item in permutations(letters, i):
item = "".join(list(item))
startLetter = list(item)[0]
if check(item, startLetter):
print("\n\n***Got one***\n", item)
words.append(item)
return words
vowels = ["a"]*15 + ["e"]*21 + ["i"]*13 + ["o"]*13+ ["u"]*5
consonants = ["b"]*2 + ["c"]*3 + ["d"]*6 + ["f"]*2 + ["g"]*3 +["h"]*2 +["j"]*1 +["k"]*1 +["l"]*5 +["m"]*4 +["n"]*8 +["p"]*4 +["q"]*1 +["r"]*9 +["s"]*9 +["t"]*9 + ["v"]*1 +["w"]*1 +["x"]*1 +["y"]*1 +["z"]*1
print("***Let's play a letters game!***")
sleep(3)
letters = generateLetters()
sleep(uniform(1, 1.5))
print("\n\n***Let's play countdown***\n\n\n\n\n")
print(letters)
for count in reversed(range(1, 31)):
print(count)
sleep(1)
print("\n\nStop!")
print("All possible words:")
print(possibleWords(letters, words))
'''
#Code for sorting the dictionary into files
alphabet = "abcdefghijklmnopqrstuvwxyz"
alphabet = list(alphabet)
for letter in alphabet:
allFile = open("Dictionary.txt", "r+")
filename = letter + ".txt"
letterFile = open(filename, "w")
for line in allFile:
if len(list(line.lower())) <= 9:
if list(line.lower())[0] == letter:
print("Writing:", line.lower())
letterFile.write(line.lower())
allFile.close()
letterFile.close()
I have 26 text files called a.txt, b.txt, c.txt... to make the search quicker
(Sorry it's not very neat - I haven't finished it yet)
However, instead of returning what I expect (pan), it returns all words with pan in it (pan, pancake, pans, pandemic...)
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Thanks

Your post is strangely written so excuse me if I missmatch
Is there any way in Python you can only return the line if it's EXACTLY the same as the string? Do I have to .read() the file first?
Yes, there is!!!
file = open("file.txt")
content = file.read() # which is a str
lines = content.split('\n') # which is a list (containing every lines)
test_string = " pan "
positive_match = [l for l in lines if test_string in l]
This is a bit hacky since we avoid getting pancake for pan (for instance) but using spaces (and then, what about cases like ".....,pan "?). You should have a look at tokenization function. As pythonists, we hve one of the best library for this: nltk
(because, basically, you are reinventing the wheel)

How to wrap text in pygame using pygame.font.Font()?

I am making a would you rather game, and I would like to not have character restrictions for the W.Y.R. questions. I have seen many examples here on Stack Overflow and other websites, but they use other modules and methods I don't understand how to use or want to use. So I would rather use
button_text_font = pygame.font.Font(font_location, 20)
red_button_text = button_text_font.render(red_text, True, bg_color)
blue_button_text = button_text_font.render(blue_text, True, bg_color)
I would like to know how to use this method and, for example, somehow input how far the text can go until it wraps to the next line.
Thanks
P.S. If you could, please also include centering text, etc.

This is adapted from some very old code I wrote:
def renderTextCenteredAt(text, font, colour, x, y, screen, allowed_width):
# first, split the text into words
words = text.split()
# now, construct lines out of these words
lines = []
while len(words) > 0:
# get as many words as will fit within allowed_width
line_words = []
while len(words) > 0:
line_words.append(words.pop(0))
fw, fh = font.size(' '.join(line_words + words[:1]))
if fw > allowed_width:
break
# add a line consisting of those words
line = ' '.join(line_words)
lines.append(line)
# now we've split our text into lines that fit into the width, actually
# render them
# we'll render each line below the last, so we need to keep track of
# the culmative height of the lines we've rendered so far
y_offset = 0
for line in lines:
fw, fh = font.size(line)
# (tx, ty) is the top-left of the font surface
tx = x - fw / 2
ty = y + y_offset
font_surface = font.render(line, True, colour)
screen.blit(font_surface, (tx, ty))
y_offset += fh
The basic algorithm is to split the text into words and iteratively build up lines word by word checking the resulting width each time and splitting to a new line when you would exceed the width.
As you can query how wide the rendered text will be, you can figure out where to render it to centre it.

This is messy and there is far more you can do but if you want a specific length of text for say a paragraph...
font = pygame.font.SysFont("Times New Roman, Arial", 20, bold=True)
your_text = "blah blah blah"
txtX, txtY = 125, 500
wraplen = 50
count = 0
my_wrap = textwrap.TextWrapper(width=wraplen)
wrap_list = my_wrap.wrap(text=your_text)
# Draw one line at a time further down the screen
for i in wrap_list:
txtY = txtY + 35
Mtxt = font.render(f"{i}", True, (255, 255, 255))
WIN.blit(Mtxt, (txtX, txtY))
count += 1
# Update All Window and contents
pygame.display.update()

Using the implementation in Pygame Zero, text can be wrapped with the following function.
# Adapted from https://github.com/lordmauve/pgzero/blob/master/pgzero/ptext.py#L81-L143
def wrap_text(text, font, max_width):
texts = text.replace("\t", " ").split("\n")
lines = []
for text in texts:
text = text.rstrip(" ")
if not text:
lines.append("")
continue
# Preserve leading spaces in all cases.
a = len(text) - len(text.lstrip(" "))
# At any time, a is the rightmost known index you can legally split a line. I.e. it's legal
# to add text[:a] to lines, and line is what will be added to lines if
# text is split at a.
a = text.index(" ", a) if " " in text else len(text)
line = text[:a]
while a + 1 < len(text):
# b is the next legal place to break the line, with `bline`` the
# corresponding line to add.
if " " not in text[a + 1:]:
b = len(text)
bline = text
else:
# Lines may be split at any space character that immediately follows a non-space
# character.
b = text.index(" ", a + 1)
while text[b - 1] == " ":
if " " in text[b + 1:]:
b = text.index(" ", b + 1)
else:
b = len(text)
break
bline = text[:b]
bline = text[:b]
if font.size(bline)[0] <= max_width:
a, line = b, bline
else:
lines.append(line)
text = text[a:].lstrip(" ")
a = text.index(" ", 1) if " " in text[1:] else len(text)
line = text[:a]
if text:
lines.append(line)
return lines
Bear in mind that wrapping text requires multiple lines that must be rendered separately. Here's an example of how you could render each line.
def create_text(text, color, pos, size, max_width=None, line_spacing=1):
font = pygame.font.SysFont("monospace", size)
if max_width is not None:
lines = wrap_text(text, font, max_width)
else:
lines = text.replace("\t", " ").split("\n")
line_ys = (
np.arange(len(lines)) - len(lines) / 2 + 0.5
) * 1.25 * font.get_linesize() + pos[1]
# Create the surface and rect that make up each line
text_objects = []
for line, y_pos in zip(lines, line_ys):
text_surface = font.render(line, True, color)
text_rect = text_surface.get_rect(center=(pos[0], y_pos))
text_objects.append((text_surface, text_rect))
return text_objects
# Example case
lines = create_text(
text="Some long text that needs to be wrapped",
color=(255, 255, 255), # White
pos=(SCREEN_WIDTH // 2, SCREEN_HEIGHT // 2), # Center of the screen
size=16,
max_width=SCREEN_WIDTH,
)
# Render each line
for text_object in lines:
screen.blit(*text_object)

Python find function selects one match per line

I am trying to make a simple text editor using python. I am now trying to make a find function. This is what I've got:
def Find():
text = textArea.get('1.0', END+'-1c').lower()
input = simpledialog.askstring("Find", "Enter text to find...").lower()
startindex = []
endindex = []
lines = 0
if input in text:
text = textArea.get('1.0', END+'-1c').lower().splitlines()
for var in text:
character = text[lines].index(input)
start = str(lines + 1) + '.' + str(character)
startindex.append(start)
end = str(lines + 1) + '.' + str(character + int(len(input)))
endindex.append(end)
textArea.tag_add('select', startindex[lines], endindex[lines])
lines += 1
textArea.tag_config('select', background = 'green')
This will succesfully highlight words that match the users input with a green background. But the problem is, that it only highlights the first match every line, as you can see here.
I want it to highlight all matches.
Full code here: https://pastebin.com/BkuXN5pk

Recommend using the text widget's built-in search capability. Shown using python3.
from tkinter import *
root = Tk()
textArea = Text(root)
textArea.grid()
textArea.tag_config('select', background = 'green')
f = open('mouse.py', 'r')
content = f.read()
f.close()
textArea.insert(END, content)
def Find(input):
start = 1.0
length = len(input)
while 1:
pos = textArea.search(input, start, END)
if not pos:
break
end_tag = pos + '+' + str(length) + 'c'
textArea.tag_add('select', pos, end_tag)
start = pos + '+1c'
Find('display')
root.mainloop()

python is inexplicably shortening the step size with each iteration of a sliding window analysis

I am working on a program that estimates the statistic Tajima's D in a series of sliding windows across a chromosome. The chromosome itself is also divided into a number of different regions with (hopefully) functional significance. The sliding window analysis is performed by my script on each region.
At the start of the program, I define the size of the sliding windows and the size of the steps that move from one window to the next. I import a file which contains the coordinates for each different chromosomal region, and import another file which contains all the SNP data I am working with (this is read line-by-line, as it is a large file). The program loops through the list of chromosomal locations. For each location, it generates an index of steps and windows for the analysis, partitions the SNP data into output files (corresponding with the steps), calculates key statistics for each step file, and combines these statistics to estimate Tajima's D for each window.
The program works well for small files of SNP data. It also works well for the first iteration over the first chromosomal break point. However, for large files of SNP data, the step size in the analysis is inexplicably decreased as the program iterates over each chromosomal regions. For the first chromosomal regions, the step size is 2500 nucleotides (this is what it is suppose to be). For the second chromosome segment, however, the step size is 1966, and for the third it is 732.
If anyone has any suggestions at to why this might be the case, please let me know. I am especially stumped as this program seems to work size for small files but not for larger ones.
My code is below:
import sys
import math
import fileinput
import shlex
import string
windowSize = int(500)
stepSize = int(250)
n = int(50) #number of individuals in the anaysis
SNP_file = open("SNPs-1.txt",'r')
SNP_file.readline()
breakpoints = open("C:/Users/gwilymh/Desktop/Python/Breakpoint coordinates.txt", 'r')
breakpoints = list(breakpoints)
numSegments = len(breakpoints)
# Open a file to store the Tajima's D results:
outputFile = open("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/Tajima's D estimates.txt", 'a')
outputFile.write(str("segmentNumber\tchrSegmentName\tsegmentStart\tsegmentStop\twindowNumber\twindowStart\twindowStop\tWindowSize\tnSNPs\tS\tD\n"))
#Calculating parameters a1, a2, b1, b2, c1 and c2
numPairwiseComparisons=n*((n-1)/2)
b1=(n+1)/(3*(n-1))
b2=(2*(n**2+n+3))/(9*n*(n-1))
num=list(range(1,n)) # n-1 values as a list
i=0
a1=0
for i in num:
a1=a1+(1/i)
i=i+1
j=0
a2=0
for j in num:
a2=a2+(1/j**2)
j=j+1
c1=(b1/a1)-(1/a1**2)
c2=(1/(a1**2+a2))*(b2 - ((n+2)/(a1*n))+ (a2/a1**2) )
counter6=0
#For each segment, assign a number and identify the start and stop coodrinates and the segment name
for counter6 in range(counter6,numSegments):
segment = shlex.shlex(breakpoints[counter6],posix = True)
segment.whitespace += '\t'
segment.whitespace_split = True
segment = list(segment)
segmentName = segment[0]
segmentNumber = int(counter6+1)
segmentStartPos = int(segment[1])
segmentStopPos = int(segment[2])
outputFile1 = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_Count of SNPs and mismatches per step.txt")%(str(segmentNumber),str(segmentName))), 'a')
#Make output files to index the lcoations of each window within each segment
windowFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_windowFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'a')
k = segmentStartPos - 1
windowNumber = 0
while (k+1) <=segmentStopPos:
windowStart = k+1
windowNumber = windowNumber+1
windowStop = k + windowSize
if windowStop > segmentStopPos:
windowStop = segmentStopPos
windowFileIndex.write(("%s\t%s\t%s\n")%(str(windowNumber),str(windowStart),str(windowStop)))
k=k+stepSize
windowFileIndex.close()
# Make output files for each step to export the corresponding SNP data into + an index of these output files
stepFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_stepFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'a')
i = segmentStartPos-1
stepNumber = 0
while (i+1) <= segmentStopPos:
stepStart = i+1
stepNumber = stepNumber+1
stepStop = i+stepSize
if stepStop > segmentStopPos:
stepStop = segmentStopPos
stepFile = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(stepNumber))), 'a')
stepFileIndex.write(("%s\t%s\t%s\n")%(str(stepNumber),str(stepStart),str(stepStop)))
i=i+stepSize
stepFile.close()
stepFileIndex.close()
# Open the index file for each step in current chromosomal segment
stepFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_stepFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'r')
stepFileIndex = list(stepFileIndex)
numSteps = len(stepFileIndex)
while 1:
currentSNP = SNP_file.readline()
if not currentSNP: break
currentSNP = shlex.shlex(currentSNP,posix=True)
currentSNP.whitespace += '\t'
currentSNP.whitespace_split = True
currentSNP = list(currentSNP)
SNPlocation = int(currentSNP[0])
if SNPlocation > segmentStopPos:break
stepIndexBin = int(((SNPlocation-segmentStartPos-1)/stepSize)+1)
#print(SNPlocation, stepIndexBin)
writeFile = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(stepIndexBin))), 'a')
writeFile.write((("%s\n")%(str(currentSNP[:]))))
writeFile.close()
counter3=0
for counter3 in range(counter3,numSteps):
# open up each step in the list of steps across the chromosomal segment:
L=shlex.shlex(stepFileIndex[counter3],posix=True)
L.whitespace += '\t'
L.whitespace_split = True
L=list(L)
#print(L)
stepNumber = int(L[0])
stepStart = int(L[1])
stepStop = int(L[2])
stepSize = int(stepStop-(stepStart-1))
#Now open the file of SNPs corresponding with the window in question and convert it into a list:
currentStepFile = open(("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_step_%s.txt")%(str(segmentNumber),str(segmentName),str(counter3+1)),'r')
currentStepFile = list(currentStepFile)
nSNPsInCurrentStepFile = len(currentStepFile)
print("number of SNPs in this step is:", nSNPsInCurrentStepFile)
#print(currentStepFile)
if nSNPsInCurrentStepFile == 0:
mismatchesPerSiteList = [0]
else:
# For each line of the file, estimate the per site parameters relevent to Tajima's D
mismatchesPerSiteList = list()
counter4=0
for counter4 in range(counter4,nSNPsInCurrentStepFile):
CountA=0
CountG=0
CountC=0
CountT=0
x = counter4
lineOfData = currentStepFile[x]
counter5=0
for counter5 in range(0,len(lineOfData)):
if lineOfData[counter5]==("A" or "a"): CountA=CountA+1
elif lineOfData[counter5]==("G" or "g"): CountG=CountG+1
elif lineOfData[counter5]==("C" or "c"): CountC=CountC+1
elif lineOfData[counter5]==("T" or "t"): CountT=CountT+1
else: continue
AxG=CountA*CountG
AxC=CountA*CountC
AxT=CountA*CountT
GxC=CountG*CountC
GxT=CountG*CountT
CxT=CountC*CountT
NumberMismatches = AxG+AxC+AxT+GxC+GxT+CxT
mismatchesPerSiteList=mismatchesPerSiteList+[NumberMismatches]
outputFile1.write(str(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n")%(segmentNumber, segmentName,stepNumber,stepStart,stepStop,stepSize,nSNPsInCurrentStepFile,sum(mismatchesPerSiteList))))
outputFile1.close()
windowFileIndex = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_windowFileIndex.txt")%(str(segmentNumber),str(segmentName))), 'r')
windowFileIndex = list(windowFileIndex)
numberOfWindows = len(windowFileIndex)
stepData = open((("C:/Users/gwilymh/Desktop/Python/Sliding Window Analyses-2/%s_%s_Count of SNPs and mismatches per step.txt")%(str(segmentNumber),str(segmentName))), 'r')
stepData = list(stepData)
numberOfSteps = len(stepData)
counter = 0
for counter in range(counter, numberOfWindows):
window = shlex.shlex(windowFileIndex[counter], posix = True)
window.whitespace += "\t"
window.whitespace_split = True
window = list(window)
windowNumber = int(window[0])
firstCoordinateInCurrentWindow = int(window[1])
lastCoordinateInCurrentWindow = int(window[2])
currentWindowSize = lastCoordinateInCurrentWindow - firstCoordinateInCurrentWindow +1
nSNPsInThisWindow = 0
nMismatchesInThisWindow = 0
counter2 = 0
for counter2 in range(counter2,numberOfSteps):
step = shlex.shlex(stepData[counter2], posix=True)
step.whitespace += "\t"
step.whitespace_split = True
step = list(step)
lastCoordinateInCurrentStep = int(step[4])
if lastCoordinateInCurrentStep < firstCoordinateInCurrentWindow: continue
elif lastCoordinateInCurrentStep <= lastCoordinateInCurrentWindow:
nSNPsInThisStep = int(step[6])
nMismatchesInThisStep = int(step[7])
nSNPsInThisWindow = nSNPsInThisWindow + nSNPsInThisStep
nMismatchesInThisWindow = nMismatchesInThisWindow + nMismatchesInThisStep
elif lastCoordinateInCurrentStep > lastCoordinateInCurrentWindow: break
if nSNPsInThisWindow ==0 :
S = 0
D = 0
else:
S = nSNPsInThisWindow/currentWindowSize
pi = nMismatchesInThisWindow/(currentWindowSize*numPairwiseComparisons)
print(nSNPsInThisWindow,nMismatchesInThisWindow,currentWindowSize,S,pi)
D = (pi-(S/a1))/math.sqrt(c1*S + c2*S*(S-1/currentWindowSize))
outputFile.write(str(("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n")%(segmentNumber,segmentName,segmentStartPos,segmentStopPos,windowNumber,firstCoordinateInCurrentWindow,lastCoordinateInCurrentWindow,currentWindowSize,nSNPsInThisWindow,S,D)))

A quick search shows that you do change your stepSize on line 110:
stepStart = int(L[1])
stepStop = int(L[2])
stepSize = int(stepStop-(stepStart-1))
stepStop and stepStart appear to depend on your files' contents, so we can't debug it further.

Develop Reference

Python is a programming language that lets you work quickly and integrate systems more effectively.

Using text files for word search python - python

Related

How to check if a string contains a specific character or not in python

Check if string is exactly the same as line in file

How to wrap text in pygame using pygame.font.Font()?

Python find function selects one match per line

python is inexplicably shortening the step size with each iteration of a sliding window analysis

Categories

Resources