Pointer argument passing in python ctypes - python

I have the following c function.
/* returns (uint8_t *)outbuf */
uint8_t *func(uint8_t *inbuf, uint32_t inbuf_len, uint32_t *outbuf_len);
This function returns outbuf, the output length is unknown before calling the function so the function receives a pointer to the length as an argument outbuf_len, also the caller is responsible to free outbuf.
I want to get the result of this function from python, so I started writing the following code:
import ctypes as ct
libb = ct.cdll.LoadLibrary('./a.so')
libb.func.restype = ct.c_char_p
inbuf = bytearray(inbuf_len)
inbuf = python_data
arr = ct.c_ubyte * inbuf_len
outbuf_len = ct.c_uint # there is no ct.c_uint_p...
outbuf = libb.func(arr.from_buffer_copy(inbuf), inbuf_len, outbuf_len)
print hexlify(outbuf) #prints only the first 4 bytes of outbuf
The problems i have is:
I didn't find pointer to uint in ctypes types, so how can I pass the outbuf_len pointer to the C function?
when printing the outbuf, only the first 4 bytes that are pointed by the pointer are printed.
How do I free() the outbuf buffer from python?
I have the source of the C function so it is possible to change how arguments are passed the the C function.
Thanks.

If you'll be passing Python byte strings as the input buffer, here's a way to do it. I made a minimal example of the C call:
test.c
#include <stdint.h>
#include <stdlib.h>
#define API __declspec(dllexport)
// double every character in the input buffer as an example
API uint8_t *func(uint8_t *inbuf, uint32_t inbuf_len, uint32_t *outbuf_len) {
*outbuf_len = inbuf_len * 2;
uint8_t* outbuf = malloc(*outbuf_len);
for(uint32_t i = 0; i < inbuf_len; ++i) {
outbuf[i*2] = inbuf[i];
outbuf[i*2+1] = inbuf[i];
}
return outbuf;
}
API void freebuf(uint8_t* buf) {
free(buf);
}
test.py
import ctypes as ct
dll = ct.CDLL('./test')
# c_char_p accepts Python byte strings and is compatible with C uint8_t*
# but don't use it for the output buffer because ctypes converts the pointer
# back to a Python byte string and you would lose access to the pointer for
# later freeing it. Use POINTER(ct.c_char) to get the actual pointer back.
dll.func.argtypes = ct.c_char_p, ct.c_uint32, ct.POINTER(ct.c_uint32)
dll.func.restype = ct.POINTER(ct.c_char)
dll.freebuf.argtypes = ct.POINTER(ct.c_char),
dll.freebuf.restype = None
def func(inbuf):
outlen = ct.c_uint32() # create storage for the output length and pass by reference
outbuf = dll.func(inbuf, len(inbuf), ct.byref(outlen))
# Slicing the returned POINTER(c_char) returns a Python byte string.
# If you used POINTER(c_uint8) for the return value instead,
# you'd get a list of integer byte values.
data = outbuf[:outlen.value]
# Can free the pointer now if you want, or return it for freeing later
dll.freebuf(outbuf)
return data
print(func(b'ABC'))
Output:
b'AABBCC'

Related

Python Bytes to CTypes Void Pointer

I have a c function that takes as arguments a void * pointer and an integer length for the size of the buffer pointed to.
e.g.
char* myfunc(void *mybuffer, int buflen)
On the python side I have a bytes object of binary data read from a file.
What I am trying to figure out is the right conversions to be able to call the c function from python, and am struggling a bit.
I understand the conversions for dealing with simple string data (e.g. encoding to utf-8 and using a char_p type) but dealing with a bytes object has been a bit of a struggle....
Thanks in advance!
Given your commented description, you can just use the obvious types if you don't need to free the returned char* memory. You can pass a bytes object to a void*. Here's a quick demo:
test.c
#include <stdio.h>
#include <stdint.h>
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
API char* myfunc(void *mybuffer, int buflen) {
const uint8_t* tmp = (const uint8_t*)mybuffer;
for(int i = 0; i < buflen; ++i) // show the passed bytes
printf("%02X\n", tmp[i]);
return "output"; // static string no deallocation required
}
test.py
import ctypes as ct
import os
dll = ct.CDLL('./test')
dll.myfunc.argtypes = ct.c_void_p, ct.c_int
dll.myfunc.restype = ct.c_char_p
buf = bytes([1,2,0,0xaa,0x55]) # including embedded null
ret = dll.myfunc(buf, len(buf))
print(ret)
Output:
01
02
00
AA
55
b'output'

Python Ctypes - Writing to memory created by malloc

I have something like this:
import ctypes
dll = ctypes.CDLL('libc.so.6')
dll.malloc.argtypes = ctypes.c_size_t
dll.malloc.restype = ctypes.c_void_p
ptr: int = dll.malloc(100) # how do i write to this memory?
print(ptr) # gives a memory address
How do I set a value to that memory? Preferably a ctypes.py_object.
I'm aware that doing this in Python is a bad idea, but I'm just experimenting with breaking the rules.
You can write to the pointer similar to C. Cast the void pointer to something that can be dereferenced and write to it with Python or use ctypes to call a C function that can use the pointer. A couple of examples:
import ctypes as ct
dll = ct.CDLL('msvcrt') # Windows C runtime
# void* malloc(size_t size);
dll.malloc.argtypes = ct.c_size_t,
dll.malloc.restype = ct.c_void_p
# void free(void* ptr);
dll.free.argtypes = ct.c_void_p,
dll.free.restype = None
# char* strcpy(char* dest, const char* src)
dll.strcpy.argtypes = ct.c_char_p, ct.c_char_p
dll.strcpy.restype = ct.c_char_p
# casting to pointer to specific-sized array enables
# use of ".raw" and checks that array bounds aren't exceeded.
ptr = ct.cast(dll.malloc(10), ct.POINTER(ct.c_char * 10))
print(ptr.contents.raw)
# Note use of [:] to replace the entire array contents with a same-sized bytes object
ptr.contents[:] = b'abcdefghij'
print(ptr.contents.raw)
# char (*ptr)(10) isn't correct for strcpy, so cast to char*
dll.strcpy(ct.cast(ptr, ct.POINTER(ct.c_char)), b'ABCDEFGHIJ')
print(ptr.contents.raw)
dll.free(ptr)
Output:
b'\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00'
b'abcdefghij'
b'ABCDEFGHIJ'

cTypes - passing a string as a pointer from python to c

I'm trying to pass a string (as a pointer) from python to a C function using cTypes.
The c function needs to get a pointer to a string (array of chars) but I haven't successfully got this to work with multiple chars, but my only success(kind of successful, look at the output) is with 1 char and would love for some help!
I need to send a pointer of a string to char - (unsigned char * input)
My Python Code:
def printmeP(CHAR):
print("In Print function")
print(CHAR)
c_sends = pointer(c_char_p(CHAR.encode('utf-8')))
print("c_sends: ")
print(c_sends[0])
python_p_printme(c_sends)
print("DONE function - Python")
print(c_sends[0])
return
from ctypes import c_double, c_int,c_char, c_wchar,c_char_p, c_wchar_p, pointer, POINTER,create_string_buffer, byref, CDLL
import sys
lib_path = '/root/mbedtls/programs/test/mylib_linux.so' .format(sys.platform)
CHAR = "d"
try:
mylib_lib = CDLL(lib_path)
except:
print('STARTING...' )
python_p_printme = mylib_lib.printme
python_p_printme.restype = None
printmeP(CHAR)
My C Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void printme(char * param) {
printf("\nStart c Function!\n");
printf("%s\n Printing param\n ", param);
char add = '!';
strncat(param, &add, 1);
printf("%s\n Printing param\n ", param);
printf("Function Done - c\n");
}
My Output:
In Print function
d <--- this is what i am sending
c_sends:
b'd' <--- this is after encoding
��[� <-------------=|
Printing param |
��[� | This is the c code print
Printing param | **Question:** Why does it print '�' and no what's supposed to be printed
Function Done - c <--=|
DONE function - Python
b'd!' <--------------------- this is the last print that prints after the change.
Would love for some help, thanks to everyone that participated :)
sincerely,
Roe
There are a number of problems:
Define .argtypes for your functions. It will catch errors passing incorrect parameters. Add the line below and note it is plural and is a tuple of argument types. The comma makes a 1-tuple:
python_p_printme.argtypes = c_char_p,
Once you make that change, you'll get an error because this code:
c_sends = pointer(c_char_p(CHAR.encode('utf-8')
is actually sending a C char** (a pointer to a c_char_p). Once you've set the argtypes properly, you can just call the function with a byte string and it will work. Your function becomes:
def printmeP(CHAR):
print("In Print function")
print(CHAR)
python_p_printme(CHAR.encode())
print("DONE function - Python")
There is one more subtle problem. While the program may appear to work at this point Python strings are immutable so if the function being called requires a mutable string, you must allocate a mutable buffer using either create_unicode_buffer (for w_char_p) or create_string_buffer (for c_char_p); otherwise, the strcat in your C code is going to corrupt the Python string.
Here's a full example:
test.cpp
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// For Windows compatibility
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
// For C++ compiler compatibility
#ifdef __cplusplus
extern "C" {
#endif
API void alter_me(char* param, size_t length) {
// truncates if additional info exceeds length
strncat_s(param, length, " adding some info", length - 1);
}
#ifdef __cplusplus
}
#endif
test.py
from ctypes import *
lib = CDLL('./test')
alter_me = lib.alter_me
alter_me.argtypes = c_char_p,c_size_t
alter_me.restype = None
data = create_string_buffer(b'test',size=10)
alter_me(data,sizeof(data))
print(data.value)
data = create_string_buffer(b'test',size=50)
alter_me(data,sizeof(data))
print(data.value)
Output:
b'test addi'
b'test adding some info'
Note you do not need to use create_string_buffer if the C function does not alter the buffer, such as if the C parameter is const char*. Then you could just call printme(b'test string').
You could use create_string_buffer.
The documentation can be found here:
https://docs.python.org/3/library/ctypes.html#ctypes.create_string_buffer
ctypes.create_string_buffer(init_or_size, size=None)
This function creates a mutable character buffer. The returned object is a ctypes array of c_char.
init_or_size must be an integer which specifies the size of the array, or a bytes object which will be used to initialize the array items.
With buf.value.decode("utf-8") you can convert the buffer back to a UTF-8 python string.
A small example with your C code library might look like this:
from ctypes import *
mylib_lib = cdll.LoadLibrary("<your-lib>")
mylib_lib.printme.argtypes = c_char_p,
buf = create_string_buffer(128)
buf.value = b'Hello World'
mylib_lib.printme(buf)
print("print on python side:", buf.value.decode("utf-8"))
It would output:
Start c Function!
Hello World
...
print on python side: Hello World!
on the console.

Python 3 ctypes call to a function that needs an indirect reference to a buffer through another structure

I have a C shared library with a function that takes one argument.
This argument is a pointer to a structure with two fields.
typedef struct
{
uint8_t *p_data; // Pointer to a fixed lenth buffer (100 bytes)
uint16_t len; // number of valid bytes in the buffer (range 1-100)
} data_t;
I need to setup a buffer of 100 bytes in my Python 3 script (I am using 3.7.2 / 3.7.3),
load the library and call this function.
int
fn_convert_buffer(data_t *data_p)
{
...
}
My Python 3 ctypes call attempt hits incompatible types.
import ctypes as ct
# load the library, etc...
# lib_cdll = ct.CDLL(mySharedLib)
def c_py_fn_convert_buffer(b_p):
global lib_cdll
val = lib_cdll.fn_convert_buffer(ct.byref(b_p))
return int(val)
data_a = bytearray(100)
# Initialize the buffer with data.
uint8_p = ct.c_uint8 * len(data_a)
class BufferStruct_t (ct.Structure):
_pack_ = 1
_fields_ = [
("p_data", ct.POINTER(ct.c_uint8 * len(data_a))),
("len", ct.c_uint16)
]
data_buf = BufferStruct_t(uint8_p.from_buffer(data_a), ct.c_uint16(8))
# TypeError: incompatible types, c_ubyte_Array_100 instance
# instead of LP_c_ubyte_Array_100 instance
# Call C function in shared-library: int fn_convert_buffer(data_t *data_p);
z = c_py_fn_convert_buffer(data_buf)
I need help in understanding what I've missed in the BufferStruct_t definition above. The from_buffer is supposed to get a pointer to the buffer but it seems to get c_ubyte_ARRAY_100.
A byref() on that does not work either
data_buf = BufferStruct_t(ct.byref(uint8_p.from_buffer(data_a)), ct.c_uint16(8))
# TypeError: expected LP_c_ubyte_Array_100 instance, got CArgObject
To test the basics of my flow, I made a sample case that will send the buffer and length parameters individually.
def c_py_fn_convert_data(d_p,l):
global lib_cdll
val = lib_cdll.fn_convert_data(ct.byref(d_p),ct.c_uint32(l))
return int(val)
test_a = ct.c_uint8 * len(data_a)
# Call C function in shared-library:
# int fn_convert_data(uint8_t *data_p, uint32_t length);
z = c_py_fn_convert_data(test_a.from_buffer(data_a), 8)
This simplified case works.
How do I get about building a Python 3 object that carries a reference to a buffer that the shared-library function expects?
Update with two variations that worked.
Update 1 Tried a cast based on something I read later (I don't cast lightly :-))
Changed,
data_buf = BufferStruct_t(uint8_p.from_buffer(data_a), ct.c_uint16(8))
to a pointer that is cast to refer an Array of specific length,
data_buf = BufferStruct_t(cast(uint8_p.from_buffer(data_a),
ct.POINTER(ct.c_uint8 * len(data_a))),
ct.c_uint16(8))
Update 2 based on Mark's answer.
Changed _field_ from,
("p_data", ct.POINTER(ct.c_uint8 * len(data_a))),
to a simple-pointer form,
("p_data", ct.POINTER(ct.c_uint8)),
Both variations worked.
I am however curious to know which of these two ways is more safe/correct ctypes handling.
Is it better to cast to the Array form? or,
Is it better to use simple pointers and rely on the length sent independently?
Your structure definition declared a pointer to an array, not a simple pointer as in the C structure. Here's a working example with a simple implementation of the DLL where the function sums the data:
test.c
#include <stdint.h>
#ifdef _WIN32
# define API __declspec(dllexport)
#else
# define API
#endif
typedef struct {
uint8_t *p_data;
uint16_t len;
} data_t;
API int fn_convert_buffer(data_t *data_p)
{
int i;
int sum = 0;
for(i = 0; i < data_p->len; ++i)
sum += data_p->p_data[i];
return sum;
}
test.py
import ctypes as ct
class BufferStruct_t(ct.Structure):
_pack_ = 1
_fields_ = [("p_data", ct.POINTER(ct.c_uint8)), # just a pointer
("len", ct.c_uint16)]
# Helper to initialize the data
def __init__(self,data):
self.p_data = (ct.c_uint8 * len(data))(*data)
self.len = len(data)
dll = ct.CDLL('test')
dll.fn_convert_buffer.argtypes = ct.POINTER(BufferStruct_t),
dll.fn_convert_buffer.restype = ct.c_int
data_buf = BufferStruct_t([1,2,3,4,5])
print(dll.fn_convert_buffer(data_buf))
Output:
15

ctypes return a string from c function

I'm a Python veteran, but haven't dabbled much in C. After half a day of not finding anything on the internet that works for me, I thought I would ask here and get the help I need.
What I want to do is write a simple C function that accepts a string and returns a different string. I plan to bind this function in several languages (Java, Obj-C, Python, etc.) so I think it has to be pure C?
Here's what I have so far. Notice I get a segfault when trying to retrieve the value in Python.
hello.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
const char* hello(char* name) {
static char greeting[100] = "Hello, ";
strcat(greeting, name);
strcat(greeting, "!\n");
printf("%s\n", greeting);
return greeting;
}
main.py
import ctypes
hello = ctypes.cdll.LoadLibrary('./hello.so')
name = "Frank"
c_name = ctypes.c_char_p(name)
foo = hello.hello(c_name)
print c_name.value # this comes back fine
print ctypes.c_char_p(foo).value # segfault
I've read that the segfault is caused by C releasing the memory that was initially allocated for the returned string. Maybe I'm just barking up the wrong tree?
What's the proper way to accomplish what I want?
Your problem is that greeting was allocated on the stack, but the stack is destroyed when the function returns. You could allocate the memory dynamically:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
const char* hello(char* name) {
char* greeting = malloc(100);
snprintf("Hello, %s!\n", 100, name)
printf("%s\n", greeting);
return greeting;
}
But that's only part of the battle because now you have a memory leak. You could plug that with another ctypes call to free().
...or a much better approach is to read up on the official C binding to python (python 2.x at http://docs.python.org/2/c-api/ and python 3.x at http://docs.python.org/3/c-api/). Have your C function create a python string object and hand that back. It will be garbage collected by python automatically. Since you are writing the C side, you don't have to play the ctypes game.
...edit..
I didn't compile and test, but I think this .py would work:
import ctypes
# define the interface
hello = ctypes.cdll.LoadLibrary('./hello.so')
# find lib on linux or windows
libc = ctypes.CDLL(ctypes.util.find_library('c'))
# declare the functions we use
hello.hello.argtypes = (ctypes.c_char_p,)
hello.hello.restype = ctypes.c_char_p
libc.free.argtypes = (ctypes.c_void_p,)
# wrap hello to make sure the free is done
def hello(name):
_result = hello.hello(name)
result = _result.value
libc.free(_result)
return result
# do the deed
print hello("Frank")
In hello.c you return a local array. You have to return a pointer to an array, which has to be dynamically allocated using malloc.
char* hello(char* name)
{
char hello[] = "Hello ";
char excla[] = "!\n";
char *greeting = malloc ( sizeof(char) * ( strlen(name) + strlen(hello) + strlen(excla) + 1 ) );
if( greeting == NULL) exit(1);
strcpy( greeting , hello);
strcat(greeting, name);
strcat(greeting, excla);
return greeting;
}
I ran into this same problem today and found you must override the default return type (int) by setting restype on the method. See Return types in the ctype doc here.
import ctypes
hello = ctypes.cdll.LoadLibrary('./hello.so')
name = "Frank"
c_name = ctypes.c_char_p(name)
hello.hello.restype = ctypes.c_char_p # override the default return type (int)
foo = hello.hello(c_name)
print c_name.value
print ctypes.c_char_p(foo).value
I also ran into the same problem but used a different approach. I was suppose to find a string in a list of strings matchin a certain value.
Basically I initalized a char array with the size of longest string in my list. Then passed that as an argument to my function to hold the corresponding value.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void find_gline(char **ganal_lines, /*line array*/
size_t size, /*array size*/
char *idnb, /* id number for check */
char *resline) {
/*Iterates over lines and finds the one that contains idnb
then affects the result to the resline*/
for (size_t i = 0; i < size; i++) {
char *line = ganal_lines[i];
if (strstr(line, idnb) != NULL) {
size_t llen = strlen(line);
for (size_t k = 0; k < llen; k++) {
resline[k] = line[k];
}
return;
}
}
return;
}
This function was wrapped by the corresponding python function:
def find_gline_wrap(lines: list, arg: str, cdll):
""
# set arg types
mlen = maxlen(lines) # gives the length of the longest string in string list
linelen = len(lines)
line_array = ctypes.c_char_p * linelen
cdll.find_gline.argtypes = [
line_array,
ctypes.c_size_t,
ctypes.c_char_p,
ctypes.c_char_p,
]
#
argbyte = bytes(arg, "utf-8")
resbyte = bytes("", "utf-8")
ganal_lines = line_array(*lines)
size = ctypes.c_size_t(linelen)
idnb = ctypes.c_char_p(argbyte)
resline = ctypes.c_char_p(resbyte * mlen)
pdb.set_trace()
result = cdll.find_gline(ganal_lines, size, idnb, resline)
# getting rid of null char at the end
result = resline.value[:-1].decode("utf-8")
return result
Here's what happens. And why it's breaking. When hello() is called, the C stack pointer is moved up, making room for any memory needed by your function. Along with some function call overhead, all of your function locals are managed there. So that static char greeting[100], means that 100 bytes of the increased stack are for that string. You than use some functions that manipulate that memory. At the you place a pointer on the stack to the greeting memory. And then you return from the call, at which point, the stack pointer is retracted back to it's original before call position. So those 100 bytes that were on the stack for the duration of your call, are essentially up for grabs again as the stack is further manipulated. Including the address field which pointed to that value and that you returned. At that point, who knows what happens to it, but it's likely set to zero or some other value. And when you try to access it as if it were still viable memory, you get a segfault.
To get around, you need to manage that memory differently somehow. You can have your function allocate the memory on the heap, but you'll need to make sure it gets free()'ed at a later date, by your binding. OR, you can write your function so that the binding language passes it a glump of memory to be used.

Categories

Resources