[an error occurred while processing this directive]

HP OpenVMS Systems

ask the wizard
Content starts here

Debugging Application DECthreads Deadlock?

» close window

The Question is:

 
I am running OpenVMS 6.2 on a multiprocessor Dec machine. I written a
 Tcp-Server that accepts job requests from a remote client, and passes these
 requests to a "database process" running on the same system as the Tcp Server.
 I am new to all of the concept
s required: Threads, C code, ASTs, MBX, etc. My code works, but occassionally
 crashes with a "%DECthreads bugcheck (version V2.12-296), terminating execution.
% Running on OpenVMS VAX [OpenVMS V6.2; VAX 7000-640, 4 cpus, 512Mb]
% Reason: set_kernel: deadlock at _$22$DIA1:[CMARTL.SRC]CMA_MUTEX.C;1:2027"
 
My code has a primary thread to accept new connections. It then spawns a child
 thread per new connection. Each child thread serves a remote client. Each
 child thread gets TCP job requets from the client, then delivers the job my
 "database process" MBX. Th
e "database process" returns job results to the child thread's MBX then, in
 turn,  these results are sent via TCP to the client. The TCP messages from the
 client and the "job result" MBX messages from the "database server" are both
 trigger via ASTs.
 
If the Wizard can look through my code to offer suggestions, I would appreciate
 it. I am new to this, and have peiced this together from many different
 resources.
 
My code follows. I hope it is not inappropriate to post an entire program
 here....
 
Begin Code:
/********************************************************************
*  To compile and link this server:
 
*    $ CC T4
*    $ LINK T4,SYS$INPUT:/OPT
*    MULTINET:MULTINET_SOCKET_LIBRARY/SHARE
*    SYS$SHARE:VAXCRTL/SHARE
*    SYS$SHARE:CMA$LIB_SHR/SHARE
*    ^Z
********************************************************************/
 
#include <ssdef.h>
#include <stsdef.h>
#include <descrip.h>
#include <psldef.h>
#include <lnmdef.h>
#include <lib$routines.h>
#include <starlet.h>
#include <iodef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
#include "multinet_root:[multinet.include.sys]types.h"
#include "multinet_root:[multinet.include.sys]socket.h"
#include "multinet_root:[multinet.include.sys]ioctl.h"
#include "multinet_root:[multinet.include.netinet]in.h"
#include "multinet_root:[multinet.include]netdb.h"
#include "multinet_root:[multinet.include.arpa]inet.h"
#include <pthread.h>
#include <dvidef.h>
 
#ifndef NULL                            /* Just in case this is not defined */
# define NULL       ((void*)0)
#endif
 
struct request_buffer {
      char SOH;
      char clientName[20];
      char destMbx[15];
      int bornOnDate;
      int timeout;
      int receiveDate;
      char replyDevice[20];
      char commandBody[512];
      char EOT;
    };
 
struct reply_buffer {
      char SOH;
      char clientName[20];
      char destMbx[15];
      int bornOnDate;
      int timeout;
      int receiveDate;
      char replyDevice[20];
      unsigned short replyMessageLen;
      char replyBody[19928];
    };
 
struct request_AST {
    struct request_buffer buf;
    char isSet;
    unsigned short iosb[4];
};
 
struct reply_AST {
    struct reply_buffer buf;
    unsigned short int clientSocket;
    char isSet;
    unsigned short iosb[4];
};
 
struct global_AST {
    pthread_cond_t  cond;
    struct request_AST requestBuf;
    struct reply_AST replyBuf;
    char replyMbxName[30];
    char replyDeviceName[10];
    unsigned short int replyMbxChan;
};
 
unsigned int requestBufSize;
unsigned int replyBufSize;
 
/* Function prototypes */
void *serverProcess (void* arg);
void deleteMbx (char* mbxName);
void createMbx (char* commandBody);
void show_error (unsigned long int inStatus);
void processRequest (struct global_AST* requestBuf);
void processReply (struct global_AST* replyBuf);
char* now();
 
main()
 
    const int MAX_CLIENTS = 200;
    unsigned short serverSocket;
    unsigned short clientSocket;
    struct sockaddr_in addrServer;
    struct sockaddr_in addrClient;
    unsigned long int status;
    pthread_t thread;
    int clientCntr = 0;
    int on=1;
    int length;
    struct request_buffer tmpRequestBuf;
    struct reply_buffer   tmpReplyBuf;
 
    printf ("\nProcess started\n");
    printf ("  %s\n",now());
 
    requestBufSize = sizeof(tmpRequestBuf);
    replyBufSize   = sizeof(tmpReplyBuf);
 
    /********************************************************************
    * CREATE AN IP-FAMILY SOCKET
    ********************************************************************/
    serverSocket = socket(AF_INET, SOCK_STREAM, 0);
    if (serverSocket < 0) {
        socket_perror("acitcpserver: socket");
        exit(0x10000000);
    }
 
    /********************************************************************
    * Set the "REUSEADDR" option on this socket. This will allow us
    * to bind() to it EVEN if there already connections in progress
    * on this port number. Otherwise, we would get an "Address already
    * in use" error.
    ********************************************************************/
    if (setsockopt(serverSocket, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))
        < 0) {
        socket_perror("acitcpserver: setsockopt");
        exit(0x10000000);
    }
 
    /********************************************************************
    *  Create a "sockaddr_in" structure which describes the port we
    *  want to listen to. Address INADDR_ANY means we will accept
    *  connections to any of our local IP addresses.
    ********************************************************************/
    addrServer.sin_family = AF_INET;
    addrServer.sin_addr.s_addr = INADDR_ANY;
    addrServer.sin_port = htons(51111);
 
    /********************************************************************
    *  Bind to that address...
    ********************************************************************/
    if (bind(serverSocket, &addrServer, sizeof (addrServer)) < 0) {
        socket_perror("acitcpserver: bind");
        exit(0x10000000);
    }
 
    /********************************************************************
    *  Declare to the kernel that we want to listen for connections
    *  on this port, and that the kernel may queue up to MAX_CLIENTS such
    *  connections for us.
    ********************************************************************/
    if (listen(serverSocket, MAX_CLIENTS) < 0) {
        socket_perror("acitcpserver: listen");
        exit(0x10000000);
    }
 
    /********************************************************************
    *  Now go into a loop, waiting for Client connections and processing
    *  messages.
    ********************************************************************/
    for (;;) {
        /********************************************************************
        * Call accept to accept a new connection. This 'peels'
        * a connection off of the original socket and returns to us
        * a new socket to the connection. We could now close
        * down the original socket if we didn't want to handle
        * more connections.
        ********************************************************************/
        length = sizeof(addrClient);               /* Pass in the length */
        clientSocket = accept(serverSocket, &addrClient, &length);
        if (clientSocket < 0) {
            socket_perror("acitcpserver: accept");
            exit(0x10000000);
        }
 
        /********************************************************************
        * `addrClient' will be a sockaddr_in structure describing the
        * remote IP address (and port #) which the connection
        * was made from. Before we start to echo data, write a
        * string into the network describing this port.
        ********************************************************************/
        printf ("New connection accepted:\n")  ;
        printf ("  %s\n",now());
        printf ("  IPA         : %s\n", inet_ntoa(addrClient.sin_addr)) ;
        printf ("  Socket      : %d\n", clientSocket) ;
        printf ("  Port        : %u\n\n", htons(addrClient.sin_port)) ;
 
 
        pthread_create ( &thread, pthread_attr_default, serverProcess,
          (void*)clientSocket );
        clientCntr++;
    }
 
 
void *serverProcess (void *arg) {
 
    unsigned short clientSocket = (unsigned short)arg;
    struct global_AST globalBuf;
    int  msgLength;
    unsigned short int serverMbx;
    unsigned long int  status;
    struct dsc$descriptor_s name_d;
    pthread_t me;
 
    static pthread_mutex_t myLock;
    struct timespec abstime;
    struct timespec deltatime;
 
    deltatime.tv_sec = 30;
    deltatime.tv_nsec = 0;
 
    pthread_mutex_init(&myLock, pthread_mutexattr_default);
    pthread_mutex_lock(&myLock);
    pthread_cond_init(&globalBuf.cond, pthread_condattr_default);
 
    me = pthread_self();
    globalBuf.replyBuf.clientSocket = clientSocket;
    name_d.dsc$b_class   = DSC$K_CLASS_S;
    name_d.dsc$b_dtype   = DSC$K_DTYPE_T;
    globalBuf.requestBuf.isSet = 0;
    globalBuf.replyBuf.isSet = 0;
    strcpy(globalBuf.replyMbxName,"NONE");
 
    printf("Host thread started.\n");
    printf ("  %s\n",now());
    for (;;) {
       if (!globalBuf.requestBuf.isSet) {
          globalBuf.requestBuf.isSet = 1;
          status = sys$qio(0, clientSocket, IO$_READVBLK,
                &globalBuf.requestBuf.iosb,
                processRequest,
                &globalBuf,
                &globalBuf.requestBuf.buf,
                requestBufSize, 0, 0, 0, 0);
          if (!(status & STS$M_SUCCESS)) {
             printf ("  (sys_assign) Error setting socket read AST.\n") ;
             show_error (status);
             pthread_exit((void*)status);
          }
       }
       if (!globalBuf.replyBuf.isSet &&
             (strcmp(globalBuf.replyMbxName,"NONE") != 0)) {
          globalBuf.replyBuf.isSet = 1;
          status = sys$qio(0, globalBuf.replyMbxChan, IO$_READVBLK,
                &globalBuf.replyBuf.iosb,
                processReply,
                &globalBuf,
                &globalBuf.replyBuf.buf,
                replyBufSize, 0, 0, 0, 0);
          if (!(status & STS$M_SUCCESS)) {
             printf ("  (sys_assign) Error setting reply MBX read AST.\n") ;
             show_error (status);
             pthread_exit((void*)status);
          }
       }
       printf("\n");
       while (globalBuf.requestBuf.isSet == 1 &&
            (globalBuf.replyBuf.isSet == 1 ||
            (strcmp(globalBuf.replyMbxName,"NONE") == 0))) {
          status = pthread_get_expiration_np(&deltatime, &abstime);
          status = pthread_cond_timedwait(&globalBuf.cond,
                &myLock,
                &abstime);
       }
 
       /******************************************


The Answer is :

 
  Please apply the available manadatory ECO kits for OpenVMS, for
  TCP/IP Services, and any kits relevent to DECthreads and C.
 
  General SMP coding information is available in topic (1661),
  and in topics referenced there.
 
  Please then contact the customer support center resolving this
  particular apparent-deadlock matter -- this question is beyond
  what the OpenVMS Wizard can reasonably hope to discuss or debug
  here.
 
 

answer written or last revised on ( 13-AUG-2004 )

» close window