Re: emm and fd migration

Snow Cat (snowcat@gd.cs.CSUFresno.EDU)
Wed, 12 Jul 1995 09:19:34 -0700 (PDT)


Louis-D. Dubeau once wrote:
>
>
> I think a saw somewhere that somebody tried to integrate file
> descriptor migration to the kernel (like the I_SENDFD ioctl for
> streams). Have I dreamed or has anybody actually worked on this?
>
> The reason I ask this is that I'd like to implement external memory
> managers in Linux. My plan is not to get the default memory manager
> out of the kernel (not right now anyway) but rather to permit the use
> of emms for DBMSes and other data intensive applications.
>
> Here is an outline of what I have in head:
>
> - the server creates the object with a syscall that returns a fd
> the created file uses a special mmap operator
> - the client gets the object from the server: the fd must
> be send to the client
> - the client mmaps the object using the fd it received
> - faults and such are send to the server through ipc
> - the results are returned to the kernel by ipc (????)
>
> I'm not sure that the kernel supports fd transfers between processes.
> Is the multithreading support (through clone) robust enough? (An
> external mem manager needs threads.)
>
> ldd
>

Linux does support passing file descriptors through the /proc filesystem.
The way I implemented it is that client and server are connected through a
normal socket to signal each other about what they are doing. Server process
fork()'s, closes all files except the one it wants to pass and then changes
it's uid to make the file descriptor open()able by target client process.
After the client gets fd, it kills the child process of the server as a
synchronization method.

Here is my code:

givefd.c:

#include "runix.h"
#include "net/net.h"
#include "runixd/runixd.h"

/*
* Offer an fd to the new runixd process spawned by fork(). There are at least
* 3 ways to do it on different systems:
*
* ^oo^ BSD socket sendmsg/recvmsg
* ^oo^ SVR4 ioctl
* ^oo^ Linux /proc filesystem
*
* As SVR4 also has networking, complete with sendmsg, I didn't actually
* implement ioctl method.
*/

#ifdef linux

/*
* Processes running with the same euid can access each other's /proc/pid#/fd
* directories. Unfortunately, changing uid gives access to *all* file
* descriptors, as well as ptrace(). To minimize the danger, I use a separate
* process and close all file descriptors but the one to pass ASAP.
*
* The recieving process kills the sender once it gets the file descriptor.
*/

void givefd(int fd)
{
ulg handle = htonl((ulg)getpid());
int i;

dup2(fd, 0);
for(i = 1; i < FD_SETSIZE; i++) close(fd);

write(0, &handle, sizeof(ulg));

/*
* This read makes sure that we go away if the client dies before making
* RPC call. It's also a nice way to wait for signal.
*/
read(0, &handle, sizeof(ulg));
}

#else

#include <sys/socket.h>
#include <sys/un.h>

void givefd(int fd)
{
int s, s0;
ulg handle;
static struct sockaddr_un un;
struct stat st;
fd_set fds;

{
static struct sigaction sa;
sa.sa_handler = SIG_IGN;
sigaction(SIGPIPE, &sa, NULL);
}

if((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) return;

un.sun_family = AF_UNIX;

/*
* Creating intermediate files can be messy as any given name could already
* exist and be owned by someone else.
*/

for(handle = random() ;; handle ++) {
sprintf(un.sun_path, "/tmp/%lX", handle);
if(stat(un.sun_path, &st) == -1 &&
bind(s, &un, sizeof(un)) != -1)
break;
}

/*
* Should be no race condition, because we didn't listen() yet?
*/

chmod(un.sun_path, 0600);
listen(s, 5);
handle = htonl(handle);
write(fd, &handle, sizeof(ulg));

/*
* Wait for connection, but also keep an eye on fd: if it's ready
* for read, it means the client exited and so should we.
*/

FD_ZERO(&fds);
FD_SET(fd, &fds);
FD_SET(s, &fds);

while(select(FD_SETSIZE, &fds, NULL, NULL, NULL) == -1)
if(errno != EINTR) {
forgetit:;
unlink(un.sun_path);
return;
}

if(FD_ISSET(s, &fds)) {
struct iovec io;
static struct msghdr mhdr;
struct sockaddr_un peer;
int psize = sizeof(peer);

while((s0 = accept(s, &peer, &psize)) == -1)
if(errno != EINTR)
goto forgetit;

io.iov_base = &fd;
io.iov_len = sizeof(int);

mhdr.msg_iov = &io;
mhdr.msg_iovlen = 1;

#ifdef NEW_SENDMSG
/*
* "Advanced Programming in the UNIX Environment" by W. Richard Stevens
* says that this is how passing fd's works in BSD 4.3Reno and later. This
* is completely untested. Good luck!
*/
{
static struct {
struct cmsghdr cm;
int fd;
} x;

x.cm.cmsg_len = sizeof(x);
x.cm.cmsg_level = SOL_SOCKET;
x.cm.cmsg_type = SCM_RIGHTS;
x.fd = s;

mhdr.msg_control = &x;
mhdr.msg_controllen = sizeof(x);
}
#else
mhdr.msg_accrights = &fd;
mhdr.msg_accrightslen = sizeof(int);

while(sendmsg(s0, &mhdr, 0) == -1 && errno == EINTR);
#endif
}

unlink(un.sun_path);
}

#endif

getfd.c:

#include "runix.h"
#include "rpc/runix_prot.h"
#include "svc/runix_server.h"

/*
* See comments in givefd.c
*/

#ifdef linux

int getfd(ulg handle)
{
char buf[64];
int fd;

sprintf(buf, "/proc/%lu/fd/0", handle);

if((fd = open(buf, O_RDWR)) == -1) return -1;

/* Off it goes */
kill((pid_t)handle, SIGKILL);
return fd;
}

#else

#include <sys/socket.h>
#include <sys/un.h>

int getfd(ulg handle)
{
int s, i, fd;
static struct sockaddr_un un;
static struct msghdr mhdr;
struct iovec io;

#ifdef NEW_SENDMSG
struct {
struct cmsghdr cm;
int fd;
} x;
#endif

if((s = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) return -1;

un.sun_family = AF_UNIX;
sprintf(un.sun_path, "/tmp/%lX", handle);

/*
* This happens inside a section delimited by sigprocmask(), so
* it should never get EINTR
*/

if(connect(s, &un, sizeof(un)) == -1) {
close(s);
return -1;
}

io.iov_base = &i;
io.iov_len = sizeof(int);

mhdr.msg_iov = &io;
mhdr.msg_iovlen = 1;

#ifdef NEW_SENDMSG
x.cm.cmsg_len = sizeof(x);
x.cm.cmsg_level = SOL_SOCKET;
x.cm.cmsg_type = SCM_RIGHTS;

mhdr.msg_control = &x;
mhdr.msg_controllen = sizeof(x);
#else
mhdr.msg_accrights = &fd;
mhdr.msg_accrightslen = sizeof(int);
#endif

if(recvmsg(s, &mhdr, 0) == -1) {
close(s);
return -1;
}

close(s);

#ifdef NEW_SENDMSG
return x.fd;
#else
return fd;
#endif
}

#endif

-- 
     Snow ^oo^ Cat <snowcat@gd.cs.CSUFresno.EDU>
      _  ->  <-    aka Oleg Kibirev <oleg@gd.cs.CSUFresno.EDU>
  ___(_)  _ _)_
 /            _)
 \_.-._
       |___/	Purr!