That's a single threaded code.
In particular: ahocorasick Python extension module (easy_install ahocorasick).
I isolated the problem to a trivial example:
import ahocorasick
t = ahocorasick.KeywordTree()
t.add("a")
When I run it in gdb, all is fine, same happens when I enter these instructions into Python CLI. However, when I try to run the script regularily, I get a segfault.
To make it even weirder, the line that causes segfault (identified by core dump analysis) is a regular int incrementation (see the bottom of the function body).
I'm completely stuck by this moment, what can I do?
int
aho_corasick_addstring(aho_corasick_t *in, unsigned char *string, size_t n)
{
aho_corasick_t* g = in;
aho_corasick_state_t *state,*s = NULL;
int j = 0;
state = g->zerostate;
// As long as we have transitions follow them
while( j != n &&
(s = aho_corasick_goto_get(state,*(string+j))) != FAIL )
{
state = s;
++j;
}
if ( j == n ) {
/* dyoo: added so that if a keyword ends up in a prefix
of another, we still mark that as a match.*/
aho_corasick_output(s) = j;
return 0;
}
while( j != n )
{
// Create new state
if ( (s = xalloc(sizeof(aho_corasick_state_t))) == NULL )
return -1;
s->id = g->newstate++;
debug(printf("allocating state %d\n", s->id)); /* debug */
s->depth = state->depth + 1;
/* FIXME: check the error return value of
aho_corasick_goto_initialize. */
aho_corasick_goto_initialize(s);
// Create transition
aho_corasick_goto_set(state,*(string+j), s);
debug(printf("%u -> %c -> %u\n",state->id,*(string+j),s->id));
state = s;
aho_corasick_output(s) = 0;
aho_corasick_fail(s) = NULL;
++j; // <--- HERE!
}
aho_corasick_output(s) = n;
return 0;
}
There are other tools you can use that will find faults that does not necessarily crash the program.
valgrind, electric fence, purify, coverity, and lint-like tools may be able to help you.
You might need to build your own python in some cases for this to be usable. Also, for memory corruption things, there is (or was, haven't built exetensions in a while) a possibility to let python use direct memory allocation instead of pythons own.
Have you tried translating that while loop to a for loop? Maybe there's some subtle misunderstanding with the ++j that will disappear if you use something more intuitive.
Related
I have met a Python Segmentation fault when developoing a python c module.
After debugging, it turns out that one of the pools current using has freeblock set to be 0xffffffff.
Core Dump gdb frames:
(gdb) frame 0
#0 Py0bject_Malloc (nbytes=53) at../Objects/obmalloc.c:837
837 in ../Objects/obmalloc.c
(gdb) p bp
$6 = (block *) Oxffffffffffffffff <error: Cannot access memory at address Oxffffffffffffffff>
for Better colored text, still provide gdb screen shots here.
Relative code:
void *PyObject_Malloc(size_t nbytes) {
...
/*
* This implicitly redirects malloc(0).
*/
if ((nbytes - 1) < SMALL_REQUEST_THRESHOLD) {
LOCK();
/*
* Most frequent paths first
*/
size = (uint)(nbytes - 1) >> ALIGNMENT_SHIFT;
pool = usedpools[size + size];
if (pool != pool->nextpool) {
/*
* There is a used pool for this size class.
* Pick up the head block of its free list.
*/
++pool->ref.count;
bp = pool->freeblock;
assert(bp != NULL);
if ((pool->freeblock = *(block **)bp) != NULL) {
UNLOCK();
return (void *)bp;
}
...
}
A above have shown, it pick value that pool's freeblock(bp) point to while bp is 0xffffffff which violates our cognization to python memory management.
So the question is, when and why, would the freeblock pointer have been assigned with 0xffffffff?
It turns out that i have not use gil properly, and somewhere multithreading error has been triggered. Then the freeblock's value has been wrong.
How can I pass a stdin stream a custom string using for example python with printf? I can't do it before starting the executable because it's something i need to do after some other inputs.
The code is :
void play(){
int guess=0;
char buf[CHOICEBUFSIZE];
long long int attempt=0;
time_t timer;
int value;
time(&timer);
timer /= 60;
timer *= 60;
//printf("<<<<<< Time: %t\n", timer);
srand(timer);
do {
attempt += 1;
value = rand();
puts(" > Gimme the magic number or 0 to gtfo!");
printf(" < ");
//RIGHT DOWN HERE
fgets(buf, CHOICEBUFSIZE, stdin);
guess = atoi(buf);
printf("%d\n", value);
if (guess == value){
win(attempt);
return;
}
else {
puts(" > Not even close!\n > Try again!");
}
} while (guess);
}
Here's a screen showing where I need to input it.
Thanks in advance. I'm sure the answer is simple but I couldn't find anything similiar to my case.
stdin is an input stream, so you cannot write to it. Since it sounds like you're trying to perform two tasks in parallel and have one communicate with the other, you may want to look into using pthreads or fork().
I am using ctypes to try and speed up my code.
My problem is similar to the one in this tutorial : https://cvstuff.wordpress.com/2014/11/27/wraping-c-code-with-python-ctypes-memory-and-pointers/
As pointed out in the tutorial I should free the memory after using the C function. Here is my C code
//C functions
double* getStuff(double *R_list, int items){
double results[items];
double* results_p;
for(int i = 0; i < items; i++){
res = calculation ; \\do some calculation
results[i] = res; }
results_p = results;
printf("C allocated address %p \n", results_p);
return results_p; }
void free_mem(double *a){
printf("freeing address: %p\n", a);
free(a); }
Which I compile with gcc -shared -Wl,-lgsl,-soname, simps -o libsimps.so -fPIC simps.c
And python:
//Python
from ctypes import *
import numpy as np
mydll = CDLL("libsimps.so")
mydll.getStuff.restype = POINTER(c_double)
mydll.getStuff.argtypes = [POINTER(c_double),c_int]
mydll.free_mem.restype = None
mydll.free_mem.argtypes = [POINTER(c_double)]
R = np.logspace(np.log10(0.011),1, 100, dtype = float) #input
tracers = c_int(len(R))
R_c = R.ctypes.data_as(POINTER(c_double))
for_list = mydll.getStuff(R_c,tracers)
print 'Python allocated', hex(for_list)
for_list_py = np.array(np.fromiter(for_list, dtype=np.float64, count=len(R)))
mydll.free_mem(for_list)
Up to the last line the code does what I want it to and the for_list_py values are correct. However, when I try to free the memory, I get a Segmentation fault and on closer inspection the address associated with for_list --> hex(for_list) is different to the one allocated to results_p within C part of the code.
As pointed out in this question, Python ctypes: how to free memory? Getting invalid pointer error , for_list will return the same address if mydll.getStuff.restype is set to c_void_p. But then I struggle to put the actual values I want into for_list_py. This is what I've tried:
cast(for_list, POINTER(c_double) )
for_list_py = np.array(np.fromiter(for_list, dtype=np.float64, count=len(R)))
mydll.free_mem(for_list)
where the cast operation seems to change for_list into an integer. I'm fairly new to C and very confused. Do I need to free that chunk of memory? If so, how do I do that whilst also keeping the output in a numpy array? Thanks!
Edit: It appears that the address allocated in C and the one I'm trying to free are the same, though I still recieve a Segmentation fault.
C allocated address 0x7ffe559a3960
freeing address: 0x7ffe559a3960
Segmentation fault
If I do print for_list I get <__main__.LP_c_double object at 0x7fe2fc93ab00>
Conclusion
Just to let everyone know, I've struggled with c_types for a bit.
I've ended up opting for SWIG instead of c_types. I've found that the code runs faster on the whole (compared to the version presented here). I found this documentation on dealing with memory deallocation in SWIG very useful https://scipy-cookbook.readthedocs.io/items/SWIG_Memory_Deallocation.html as well as the fact that SWIG gives you a very easy way of dealing with numpy n-dimensional arrays.
After getStuff function exits, the memory allocated to results array is not available any more, so when you try to free it, it crashes the program.
Try this instead:
double* getStuff(double *R_list, int items)
{
double* results_p = malloc(sizeof((*results_p) * (items + 1));
if (results_p == NULL)
{
// handle error
}
for(int i = 0; i < items; i++)
{
res = calculation ; \\do some calculation
results_p[i] = res;
}
printf("C allocated address %p \n", results_p);
return results_p;
}
I'm a Python veteran, but haven't dabbled much in C. After half a day of not finding anything on the internet that works for me, I thought I would ask here and get the help I need.
What I want to do is write a simple C function that accepts a string and returns a different string. I plan to bind this function in several languages (Java, Obj-C, Python, etc.) so I think it has to be pure C?
Here's what I have so far. Notice I get a segfault when trying to retrieve the value in Python.
hello.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
const char* hello(char* name) {
static char greeting[100] = "Hello, ";
strcat(greeting, name);
strcat(greeting, "!\n");
printf("%s\n", greeting);
return greeting;
}
main.py
import ctypes
hello = ctypes.cdll.LoadLibrary('./hello.so')
name = "Frank"
c_name = ctypes.c_char_p(name)
foo = hello.hello(c_name)
print c_name.value # this comes back fine
print ctypes.c_char_p(foo).value # segfault
I've read that the segfault is caused by C releasing the memory that was initially allocated for the returned string. Maybe I'm just barking up the wrong tree?
What's the proper way to accomplish what I want?
Your problem is that greeting was allocated on the stack, but the stack is destroyed when the function returns. You could allocate the memory dynamically:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
const char* hello(char* name) {
char* greeting = malloc(100);
snprintf("Hello, %s!\n", 100, name)
printf("%s\n", greeting);
return greeting;
}
But that's only part of the battle because now you have a memory leak. You could plug that with another ctypes call to free().
...or a much better approach is to read up on the official C binding to python (python 2.x at http://docs.python.org/2/c-api/ and python 3.x at http://docs.python.org/3/c-api/). Have your C function create a python string object and hand that back. It will be garbage collected by python automatically. Since you are writing the C side, you don't have to play the ctypes game.
...edit..
I didn't compile and test, but I think this .py would work:
import ctypes
# define the interface
hello = ctypes.cdll.LoadLibrary('./hello.so')
# find lib on linux or windows
libc = ctypes.CDLL(ctypes.util.find_library('c'))
# declare the functions we use
hello.hello.argtypes = (ctypes.c_char_p,)
hello.hello.restype = ctypes.c_char_p
libc.free.argtypes = (ctypes.c_void_p,)
# wrap hello to make sure the free is done
def hello(name):
_result = hello.hello(name)
result = _result.value
libc.free(_result)
return result
# do the deed
print hello("Frank")
In hello.c you return a local array. You have to return a pointer to an array, which has to be dynamically allocated using malloc.
char* hello(char* name)
{
char hello[] = "Hello ";
char excla[] = "!\n";
char *greeting = malloc ( sizeof(char) * ( strlen(name) + strlen(hello) + strlen(excla) + 1 ) );
if( greeting == NULL) exit(1);
strcpy( greeting , hello);
strcat(greeting, name);
strcat(greeting, excla);
return greeting;
}
I ran into this same problem today and found you must override the default return type (int) by setting restype on the method. See Return types in the ctype doc here.
import ctypes
hello = ctypes.cdll.LoadLibrary('./hello.so')
name = "Frank"
c_name = ctypes.c_char_p(name)
hello.hello.restype = ctypes.c_char_p # override the default return type (int)
foo = hello.hello(c_name)
print c_name.value
print ctypes.c_char_p(foo).value
I also ran into the same problem but used a different approach. I was suppose to find a string in a list of strings matchin a certain value.
Basically I initalized a char array with the size of longest string in my list. Then passed that as an argument to my function to hold the corresponding value.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void find_gline(char **ganal_lines, /*line array*/
size_t size, /*array size*/
char *idnb, /* id number for check */
char *resline) {
/*Iterates over lines and finds the one that contains idnb
then affects the result to the resline*/
for (size_t i = 0; i < size; i++) {
char *line = ganal_lines[i];
if (strstr(line, idnb) != NULL) {
size_t llen = strlen(line);
for (size_t k = 0; k < llen; k++) {
resline[k] = line[k];
}
return;
}
}
return;
}
This function was wrapped by the corresponding python function:
def find_gline_wrap(lines: list, arg: str, cdll):
""
# set arg types
mlen = maxlen(lines) # gives the length of the longest string in string list
linelen = len(lines)
line_array = ctypes.c_char_p * linelen
cdll.find_gline.argtypes = [
line_array,
ctypes.c_size_t,
ctypes.c_char_p,
ctypes.c_char_p,
]
#
argbyte = bytes(arg, "utf-8")
resbyte = bytes("", "utf-8")
ganal_lines = line_array(*lines)
size = ctypes.c_size_t(linelen)
idnb = ctypes.c_char_p(argbyte)
resline = ctypes.c_char_p(resbyte * mlen)
pdb.set_trace()
result = cdll.find_gline(ganal_lines, size, idnb, resline)
# getting rid of null char at the end
result = resline.value[:-1].decode("utf-8")
return result
Here's what happens. And why it's breaking. When hello() is called, the C stack pointer is moved up, making room for any memory needed by your function. Along with some function call overhead, all of your function locals are managed there. So that static char greeting[100], means that 100 bytes of the increased stack are for that string. You than use some functions that manipulate that memory. At the you place a pointer on the stack to the greeting memory. And then you return from the call, at which point, the stack pointer is retracted back to it's original before call position. So those 100 bytes that were on the stack for the duration of your call, are essentially up for grabs again as the stack is further manipulated. Including the address field which pointed to that value and that you returned. At that point, who knows what happens to it, but it's likely set to zero or some other value. And when you try to access it as if it were still viable memory, you get a segfault.
To get around, you need to manage that memory differently somehow. You can have your function allocate the memory on the heap, but you'll need to make sure it gets free()'ed at a later date, by your binding. OR, you can write your function so that the binding language passes it a glump of memory to be used.
I started on a little toy project in C lately and have been scratching my head over the best way to mimic the strip() functionality that is part of the python string objects.
Reading around for fscanf or sscanf says that the string is processed upto the first whitespace that is encountered.
fgets doesn't help either as I still have newlines sticking around.
I did try a strchr() to search for a whitespace and setting the returned pointer to '\0' explicitly but that doesn't seem to work.
Python strings' strip method removes both trailing and leading whitespace. The two halves of the problem are very different when working on a C "string" (array of char, \0 terminated).
For trailing whitespace: set a pointer (or equivalently index) to the existing trailing \0. Keep decrementing the pointer until it hits against the start-of-string, or any non-white character; set the \0 to right after this terminate-backwards-scan point.
For leading whitespace: set a pointer (or equivalently index) to the start of string; keep incrementing the pointer until it hits a non-white character (possibly the trailing \0); memmove the rest-of-string so that the first non-white goes to the start of string (and similarly for everything following).
There is no standard C implementation for a strip() or trim() function. That said, here's the one included in the Linux kernel:
char *strstrip(char *s)
{
size_t size;
char *end;
size = strlen(s);
if (!size)
return s;
end = s + size - 1;
while (end >= s && isspace(*end))
end--;
*(end + 1) = '\0';
while (*s && isspace(*s))
s++;
return s;
}
If you want to remove, in place, the final newline on a line, you can use this snippet:
size_t s = strlen(buf);
if (s && (buf[s-1] == '\n')) buf[--s] = 0;
To faithfully mimic Python's str.strip([chars]) method (the way I interpreted its workings), you need to allocate space for a new string, fill the new string and return it. After that, when you no longer need the stripped string you need to free the memory it used to have no memory leaks.
Or you can use C pointers and modify the initial string and achieve a similar result.
Suppose your initial string is "____forty two____\n" and you want to strip all underscores and the '\n'
____forty two___\n
^ ptr
If you change ptr to the 'f' and replace the first '_' after two with a '\0' the result is the same as Python's "____forty two____\n".strip("_\n");
____forty two\0___\n
^ptr
Again, this is not the same as Python. The string is modified in place, there's no 2nd string and you cannot revert the changes (the original string is lost).
I wrote C code to implement this function. I also wrote a few trivial tests to make sure my function does sensible things.
This function writes to a buffer you provide, and should never write past the end of the buffer, so it should not be prone to buffer overflow security issues.
Note: only Test() uses stdio.h, so if you just need the function, you only need to include ctype.h (for isspace()) and string.h (for strlen()).
// strstrip.c -- implement white space stripping for a string in C
//
// This code is released into the public domain.
//
// You may use it for any purpose whatsoever, and you don't need to advertise
// where you got it, but you aren't allowed to sue me for giving you free
// code; all the risk of using this is yours.
#include <ctype.h>
#include <stdio.h>
#include <string.h>
// strstrip() -- strip leading and trailing white space from a string
//
// Copies from sIn to sOut, writing at most lenOut characters.
//
// Returns number of characters in returned string, or -1 on an error.
// If you get -1 back, then nothing was written to sOut at all.
int
strstrip(char *sOut, unsigned int lenOut, char const *sIn)
{
char const *pStart, *pEnd;
unsigned int len;
char *pOut;
// if there is no room for any output, or a null pointer, return error!
if (0 == lenOut || !sIn || !sOut)
return -1;
pStart = sIn;
pEnd = sIn + strlen(sIn) - 1;
// skip any leading whitespace
while (*pStart && isspace(*pStart))
++pStart;
// skip any trailing whitespace
while (pEnd >= sIn && isspace(*pEnd))
--pEnd;
pOut = sOut;
len = 0;
// copy into output buffer
while (pStart <= pEnd && len < lenOut - 1)
{
*pOut++ = *pStart++;
++len;
}
// ensure output buffer is properly terminated
*pOut = '\0';
return len;
}
void
Test(const char *s)
{
int len;
char buf[1024];
len = strstrip(buf, sizeof(buf), s);
if (!s)
s = "**null**"; // don't ask printf to print a null string
if (-1 == len)
*buf = '\0'; // don't ask printf to print garbage from buf
printf("Input: \"%s\" Result: \"%s\" (%d chars)\n", s, buf, len);
}
main()
{
Test(NULL);
Test("");
Test(" ");
Test(" ");
Test("x");
Test(" x");
Test(" x ");
Test(" x y z ");
Test("x y z");
}
This potential ‘solution' is by no means as complete or thorough as others have presented. This is for my own toy project in C - a text-based adventure game that I’m working on with my 14-year old son. If you’re using fgets() then strcspn() may just work for you as well. The sample code below is the beginning of an interactive console-based loop.
#include <stdio.h>
#include <string.h> // for strcspn()
int main(void)
{
char input[64];
puts("Press <q> to exit..");
do {
printf("> ");
fgets(input,64,stdin); // fgets() captures '\n'
input[strcspn(input, "\n")] = 0; // replaces '\n' with 0
if (input[0] == '\0') continue;
printf("You entered '%s'\n", input);
} while (strcmp(input,"q")!= 0); // returns 0 (false) when input = "q"
puts("Goodbye!");
return 0;
}