淺讀Advanced Linux Programming的最後一章

講述Unix programming的好書不少，不過通常都厚度驚人，雖然閱讀實作這類書籍不會太費力氣，但我通常還是喜歡透過具體而微的小範例快速獲得大觀念(Top down)，然後再依據興趣與精力，決定如何深入細節(bottom up)。

Advanced Linux Programming(ALP)這本書就很對我的胃口。整本書共11章，前10章用很小的範例說明重要的Linux service與GNU開發環境，然後最後一章將大部分的知識以一個小型的http server來整合，跟著快速走過一遍，馬上能夠從門外漢變成看懂門道的巷內人。:-)

這篇blog主要簡單說明最後一章的http server的實作，算是為閱讀此書作個紀錄。

OK，這個http server的功能如下：

1. 可回應簡單的http GET request
2. 根據request，從模組動態產生網頁
3. 模組可以動態外掛進server
4. 同時處理多個http request
5. 此server不需superuser權限，不過有此權限可以看到更多資訊

讓我們從主程式開始看起:

/***********************************************************************
* Code listing from "Advanced Linux Programming," by CodeSourcery LLC  *
* Copyright (C) 2001 by New Riders Publishing                          *
* See COPYRIGHT for license information.                               *
***********************************************************************/

#include <assert.h>
#include <getopt.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>

#include "server.h"

/* Description of long options for getopt_long.  */

static const struct option long_options[] = {
  { "address",          1, NULL, 'a' },
  { "help",             0, NULL, 'h' },
  { "module-dir",       1, NULL, 'm' },
  { "port",             1, NULL, 'p' },
  { "verbose",          0, NULL, 'v' },
};

/* Description of short options for getopt_long.  */

static const char* const short_options = "a:hm:p:v";

/* Usage summary text.  */

static const char* const usage_template = 
  "Usage: %s [ options ]\n"
  "  -a, --address ADDR        Bind to local address (by default, bind\n"
  "                              to all local addresses).\n"
  "  -h, --help                Print this information.\n"
  "  -m, --module-dir DIR      Load modules from specified directory\n"
  "                              (by default, use executable directory).\n"
  "  -p, --port PORT           Bind to specified port.\n"
  "  -v, --verbose             Print verbose messages.\n";

/* Print usage information and exit.  If IS_ERROR is non-zero, write to
   stderr and use an error exit code.  Otherwise, write to stdout and
   use a non-error termination code.  Does not return.  */

static void print_usage (int is_error)
{
  fprintf (is_error ? stderr : stdout, usage_template, program_name);
  exit (is_error ? 1 : 0);
}

int main (int argc, char* const argv[])
{
  struct in_addr local_address;
  uint16_t port;
  int next_option;

  /* Store the program name, which we'll use in error messages.  */
  program_name = argv[0];

  /* Set defaults for options.  Bind the server to all local addresses,
     and assign an unused port automatically.  */
  local_address.s_addr = INADDR_ANY;
  port = 0;
  /* Don't print verbose messages.  */
  verbose = 0;
  /* Load modules from the directory containing this executable.  */
  module_dir = get_self_executable_directory ();
  assert (module_dir != NULL);

  /* Parse options.  */
  do {
    next_option = 
      getopt_long (argc, argv, short_options, long_options, NULL);
    switch (next_option) {
    case 'a':  
      /* User specified -a or --address.  */
      {
 struct hostent* local_host_name;
 
 /* Look up the host name the user specified.  */
 local_host_name = gethostbyname (optarg);
 if (local_host_name == NULL || local_host_name->h_length == 0)
   /* Could not resolve the name.  */
   error (optarg, "invalid host name");
 else
   /* Host name is OK, so use it.  */
   local_address.s_addr = 
     *((int*) (local_host_name->h_addr_list[0]));
      }
      break;      

    case 'h':  
      /* User specified -h or --help.  */
      print_usage (0);

    case 'm':
      /* User specified -m or --module-dir.  */
      {
 struct stat dir_info;

 /* Check that it exists.  */
 if (access (optarg, F_OK) != 0)
   error (optarg, "module directory does not exist");
 /* Check that it is accessible.  */
 if (access (optarg, R_OK | X_OK) != 0)
   error (optarg, "module directory is not accessible");
 /* Make sure that it is a directory.  */
 if (stat (optarg, &dir_info) != 0 || !S_ISDIR (dir_info.st_mode))
   error (optarg, "not a directory");
 /* It looks OK, so use it.  */
 module_dir = strdup (optarg);
      }
      break;

    case 'p':  
      /* User specified -p or --port.  */
      {
 long value;
 char* end;

 value = strtol (optarg, &end, 10);
 if (*end != '\0')
   /* The user specified non-digits in the port number.  */
   print_usage (1);
 /* The port number needs to be converted to network (big endian)
           byte order.  */
 port = (uint16_t) htons (value);
      }
      break;

    case 'v':  
      /* User specified -v or --verbose.  */
      verbose = 1;
      break;

    case '?':  
      /* User specified an nrecognized option.  */
      print_usage (1);

    case -1:  
      /* Done with options.  */
      break;

    default:
      abort ();
    }
  } while (next_option != -1);

  /* This program takes no additional arguments.  Issue an error if the
     user specified any.  */
  if (optind != argc)
    print_usage (1);

  /* Print the module directory, if we're running verbose.  */
  if (verbose)
    printf ("modules will be loaded from %s\n", module_dir);

  /* Run the server.  */
  server_run (local_address, port);

  return 0;
}

很典型的文字模式程式寫法：透過getopt()獲取命令列參數，然後設定好相關選項變數，設定好後便把實際動作執行起來（server_run()）。有不少小程式可能會選擇自行parse命令列，不過getopt()使用並不困難，學習一下總比用苦工刻好多了～

我們在繼續讀下去之前，想想看對於上述程式是否有更好的寫法？我想到的幾點可能改進方式：

1. 將選項變數group進一個struct env，避免global variable，這樣單獨測試會容易一點。
2. 選項對應的動作以table方式對應，方便未來新增選項。

OK，在修改前，讓我們幾續看下去，下一個實作就來看server_run()：

/***********************************************************************
* Code listing from "Advanced Linux Programming," by CodeSourcery LLC  *
* Copyright (C) 2001 by New Riders Publishing                          *
* See COPYRIGHT for license information.                               *
***********************************************************************/

#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
#include <netinet/in.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <unistd.h>

#include "server.h"

/* HTTP response and header for a successful request.  */

static char* ok_response =
  "HTTP/1.0 200 OK\n"
  "Content-type: text/html\n"
  "\n";

/* HTTP response, header, and body indicating that the we didn't
   understand the request.  */

static char* bad_request_response = 
  "HTTP/1.0 400 Bad Request\n"
  "Content-type: text/html\n"
  "\n"
  "<html>\n"
  " <body>\n"
  "  <h1>Bad Request</h1>\n"
  "  <p>This server did not understand your request.</p>\n"
  " </body>\n"
  "</html>\n";

/* HTTP response, header, and body template indicating that the
   requested document was not found.  */

static char* not_found_response_template = 
  "HTTP/1.0 404 Not Found\n"
  "Content-type: text/html\n"
  "\n"
  "<html>\n"
  " <body>\n"
  "  <h1>Not Found</h1>\n"
  "  <p>The requested URL %s was not found on this server.</p>\n"
  " </body>\n"
  "</html>\n";

/* HTTP response, header, and body template indicating that the
   method was not understood.  */

static char* bad_method_response_template = 
  "HTTP/1.0 501 Method Not Implemented\n"
  "Content-type: text/html\n"
  "\n"
  "<html>\n"
  " <body>\n"
  "  <h1>Method Not Implemented</h1>\n"
  "  <p>The method %s is not implemented by this server.</p>\n"
  " </body>\n"
  "</html>\n";

/* Handler for SIGCHLD, to clean up child processes that have
   terminated.  */

static void clean_up_child_process (int signal_number)
{
  int status;
  wait (&status);
}

/* Process an HTTP "GET" request for PAGE, and send the results to the
   file descriptor CONNECTION_FD.  */

static void handle_get (int connection_fd, const char* page)
{
  struct server_module* module = NULL;

  /* Make sure the requested page begins with a slash and does not
     contain any additional slashes -- we don't support any
     subdirectories.  */
  if (*page == '/' && strchr (page + 1, '/') == NULL) {
    char module_file_name[64];

    /* The page name looks OK.  Construct the module name by appending
       ".so" to the page name.  */
    snprintf (module_file_name, sizeof (module_file_name),
       "%s.so", page + 1);
    /* Try to open the module.  */
    module = module_open (module_file_name);
  }

  if (module == NULL) {
    /* Either the requested page was malformed, or we couldn't open a
       module with the indicated name.  Either way, return the HTTP
       response 404, Not Found.  */
    char response[1024];

    /* Generate the response message.  */
    snprintf (response, sizeof (response), not_found_response_template, page);
    /* Send it to the client.  */
    write (connection_fd, response, strlen (response));
  }
  else {
    /* The requested module was loaded successfully.  */

    /* Send the HTTP response indicating success, and the HTTP header
       for an HTML page.  */
    write (connection_fd, ok_response, strlen (ok_response));
    /* Invoke the module, which will generate HTML output and send it
       to the client file descriptor.  */
    (*module->generate_function) (connection_fd);
    /* We're done with the module.  */
    module_close (module);
  }
}

/* Handle a client connection on the file descriptor CONNECTION_FD.  */

static void handle_connection (int connection_fd)
{
  char buffer[256];
  ssize_t bytes_read;

  /* Read some data from the client.  */
  bytes_read = read (connection_fd, buffer, sizeof (buffer) - 1);
  if (bytes_read > 0) {
    char method[sizeof (buffer)];
    char url[sizeof (buffer)];
    char protocol[sizeof (buffer)];

    /* Some data was read successfully.  NUL-terminate the buffer so
       we can use string operations on it.  */
    buffer[bytes_read] = '\0';
    /* The first line the client sends is the HTTP request, which is
       composed of a method, the requested page, and the protocol
       version.  */
    sscanf (buffer, "%s %s %s", method, url, protocol);
    /* The client may send various header information following the
       request.  For this HTTP implementation, we don't care about it.
       However, we need to read any data the client tries to send.  Keep
       on reading data until we get to the end of the header, which is
       delimited by a blank line.  HTTP specifies CR/LF as the line
       delimiter.  */
    while (strstr (buffer, "\r\n\r\n") == NULL)
      bytes_read = read (connection_fd, buffer, sizeof (buffer));
    /* Make sure the last read didn't fail.  If it did, there's a
       problem with the connection, so give up.  */
    if (bytes_read == -1) {
      close (connection_fd);
      return;
    }
    /* Check the protocol field.  We understand HTTP versions 1.0 and
       1.1.  */
    if (strcmp (protocol, "HTTP/1.0") && strcmp (protocol, "HTTP/1.1")) {
      /* We don't understand this protocol.  Report a bad response.  */
      write (connection_fd, bad_request_response, 
      sizeof (bad_request_response));
    }
    else if (strcmp (method, "GET")) {
      /* This server only implements the GET method.  The client
  specified some other method, so report the failure.  */
      char response[1024];

      snprintf (response, sizeof (response),
  bad_method_response_template, method);
      write (connection_fd, response, strlen (response));
    }
    else 
      /* A valid request.  Process it.  */
      handle_get (connection_fd, url);
  }
  else if (bytes_read == 0)
    /* The client closed the connection before sending any data.
       Nothing to do.  */
    ;
  else 
    /* The call to read failed.  */
    system_error ("read");
}


void server_run (struct in_addr local_address, uint16_t port)
{
  struct sockaddr_in socket_address;
  int rval;
  struct sigaction sigchld_action;
  int server_socket;

  /* Install a handler for SIGCHLD that cleans up child processes that
     have terminated.  */
  memset (&sigchld_action, 0, sizeof (sigchld_action));
  sigchld_action.sa_handler = &clean_up_child_process;
  sigaction (SIGCHLD, &sigchld_action, NULL);

  /* Create a TCP socket.  */
  server_socket = socket (PF_INET, SOCK_STREAM, 0);
  if (server_socket == -1)
    system_error ("socket");
  /* Construct a socket address structure for the local address on
     which we want to listen for connections.  */
  memset (&socket_address, 0, sizeof (socket_address));
  socket_address.sin_family = AF_INET;
  socket_address.sin_port = port;
  socket_address.sin_addr = local_address;
  /* Bind the socket to that address.  */
  rval = bind (server_socket, &socket_address, sizeof (socket_address));
  if (rval != 0)
    system_error ("bind");
  /*  Instruct the socket to accept connections.  */
  rval = listen (server_socket, 10);
  if (rval != 0)
    system_error ("listen");

  if (verbose) {
    /* In verbose mode, display the local address and port number
       we're listening on.  */
    socklen_t address_length;
    
    /* Find the socket's local address.  */
    address_length = sizeof (socket_address);
    rval = getsockname (server_socket, &socket_address, &address_length);
    assert (rval == 0);
    /* Print a message.  The port number needs to be converted from
       network byte order (big endian) to host byte order.  */
    printf ("server listening on %s:%d\n", 
     inet_ntoa (socket_address.sin_addr), 
     (int) ntohs (socket_address.sin_port));
  }

  /* Loop forever, handling connections.  */
  while (1) {
    struct sockaddr_in remote_address;
    socklen_t address_length;
    int connection;
    pid_t child_pid;

    /* Accept a connection.  This call blocks until a connection is
       ready.  */
    address_length = sizeof (remote_address);
    connection = accept (server_socket, &remote_address, &address_length);
    if (connection == -1) {
      /* The call to accept failed.  */
      if (errno == EINTR)
 /* The call was interrupted by a signal.  Try again.  */
 continue;
      else
 /* Something else went wrong.  */
 system_error ("accept");
    }

    /* We have a connection.  Print a message if we're running in
       verbose mode.  */
    if (verbose) {
      socklen_t address_length;

      /* Get the remote address of the connection.  */
      address_length = sizeof (socket_address);
      rval = getpeername (connection, &socket_address, &address_length);
      assert (rval == 0);
      /* Print a message.  */
      printf ("connection accepted from %s\n",
       inet_ntoa (socket_address.sin_addr));
    }

    /* Fork a child process to handle the connection.  */
    child_pid = fork ();
    if (child_pid == 0) {
      /* This is the child process.  It shouldn't use stdin or stdout,
  so close them.  */
      close (STDIN_FILENO);
      close (STDOUT_FILENO);
      /* Also this child process shouldn't do anything with the
  listening socket.  */
      close (server_socket);
      /* Handle a request from the connection.  We have our own copy
  of the connected socket descriptor.  */
      handle_connection (connection);
      /* All done; close the connection socket, and end the child
  process.  */
      close (connection);
      exit (0);
    }
    else if (child_pid > 0) {
      /* This is the parent process.  The child process handles the
  connection, so we don't need our copy of the connected socket
  descriptor.  Close it.  Then continue with the loop and
  accept another connection.  */
      close (connection);
    }
    else
      /* Call to fork failed.  */
      system_error ("fork");
  }
}

server_run()基本上只做了幾件事：

1. 開啟一個TCP socket，然後bind() -> listen() -> accept()，accept()會block process，直到有一個connection進來。
2. connection進來後，便fork()出一個child，在child中關閉標準輸出與標準錯誤輸出，這個動作是為了要進行輸出入的重新導向，讓後續的程式可以不用關心要對哪個socket fd進行讀寫，只要保持對標準輸入輸出進行讀寫就好。
3. fork()後，parent回到accept()，等待下一個connection建立。

當然，除了為每一個connection建立一個process去處理以外，我們也可以考慮用thread來進行多工，thread可以考慮user thread或kernel thread。對socket fd的多工處理，也可以考慮用select()/poll()/epoll()。不同多工模式的優缺點在更完整的書籍裏面有完整的討論。

由於我們的目的是快速獲得http server的處理概觀，所以我們先不特別考慮各種設計上的取捨，只先挑選一種可work的版本即可。OK，handle_connection()作了哪些事？很簡單，就是根據http的協定，處理第一筆request，並且目前只處理GET，如果一切如預期，接著就將實際工作遞交給handle_get()。

handle_get()會根據網址部份的最後一個欄位去判斷要對應處理的模組，這個function寫得頗漂亮，依賴的假設僅僅在於module在此處所需的介面，將實際工作以callback的方式讓模組自行處理。這種手法在C語言中算是最重要的抽象化手法了：透過callback，實現間接性。

所以，接著就讓我們來看看這種間接性是如何產生的吧：

/***********************************************************************
* Code listing from "Advanced Linux Programming," by CodeSourcery LLC  *
* Copyright (C) 2001 by New Riders Publishing                          *
* See COPYRIGHT for license information.                               *
***********************************************************************/

#include <dlfcn.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "server.h"

char* module_dir;

struct server_module* module_open (const char* module_name)
{
  char* module_path;
  void* handle;
  void (* module_generate) (int);
  struct server_module* module;

  /* Construct the full path of the module shared library we'll try to
     load.  */
  module_path = 
    (char*) xmalloc (strlen (module_dir) + strlen (module_name) + 2);
  sprintf (module_path, "%s/%s", module_dir, module_name);

  /* Attempt to open MODULE_PATH as a shared library.  */
  handle = dlopen (module_path, RTLD_NOW);
  free (module_path);
  if (handle == NULL) {
    /* Failed; either this path doesn't exist, or it isn't a shared
       library.  */
    return NULL;
  }

  /* Resolve the module_generate symbol from the shared library.  */
  module_generate = (void (*) (int)) dlsym (handle, "module_generate");
  /* Make sure the symbol was found.  */
  if (module_generate == NULL) {
    /* The symbol is missing.  While this is a shared library, it
       probably isn't a server module.  Close up and indicate failure.  */
    dlclose (handle);
    return NULL;
  }

  /* Allocate and initialize a server_module object.  */
  module = (struct server_module*) xmalloc (sizeof (struct server_module));
  module->handle = handle;
  module->name = xstrdup (module_name);
  module->generate_function = module_generate;
  /* Return it, indicating success.  */
  return module;
}

void module_close (struct server_module* module)
{
  /* Close the shared library.  */
  dlclose (module->handle);
  /* Deallocate the module name.  */
  free ((char*) module->name);
  /* Deallocate the module object.  */
  free (module);
}

雖然這部份的程式只有短短6x行，但幾乎可以算是最常見的C語言技法了。其實從另一個角度看，這就是用C來實現基本的物件導向。在這個例子中，實現的是物件導向中的interface：module本身無法具現任何實作，但提供一個一致介面給實作者定義。用C語言實現物件導向的其他功能也是可以作到的，但那就是另一個有趣話題了。 :-)

上述程式中用到了dl程式庫(dynamic linking)，這是在glibc中提供的一組API，可以讓我們輕鬆地使用.so中的symbol，並在執行期載入其功能。當然啦，並不是一定非得要.so (position-independent code)才能實現動態載入，一般的.o檔也能夠以載入時重定位的方式實現，實際範例可以參考jserv大俠的"親手打造Dynamic Library Loader"，或是Linux的.ko載入機制。

如果你非常好奇執行期如何載入object files以及其引用symbol的流程，可參考"程式設計師的自我修養"一書，以及eli大俠的好文章。

好了，還缺什麼呢？嗯，只缺實際提供服務的module了！！ALP提供了好幾個module的實作，我們先挑一個最簡單的來看：

/***********************************************************************
* Code listing from "Advanced Linux Programming," by CodeSourcery LLC  *
* Copyright (C) 2001 by New Riders Publishing                          *
* See COPYRIGHT for license information.                               *
***********************************************************************/

#include <assert.h>
#include <stdio.h>
#include <sys/time.h>
#include <time.h>

#include "server.h"

/* A template for the HTML page this module generates.  */

static char* page_template =
  "<html>\n"
  " <head>\n"
  "  <meta http-equiv=\"refresh\" content=\"5\">\n"
  " </head>\n"
  " <body>\n"
  "  <p>The current time is %s.</p>\n"
  " </body>\n"
  "</html>\n";

void module_generate (int fd)
{
  struct timeval tv;
  struct tm* ptm;
  char time_string[40];
  FILE* fp;
  
  /* Obtain the time of day, and convert it to a tm struct.  */
  gettimeofday (&tv, NULL);
  ptm = localtime (&tv.tv_sec);
  /* Format the date and time, down to a single second.  */
  strftime (time_string, sizeof (time_string), "%H:%M:%S", ptm);

  /* Create a stream corresponding to the client socket file
     descriptor.  */
  fp = fdopen (fd, "w");
  assert (fp != NULL);
  /* Generate the HTML output.  */
  fprintf (fp, page_template, time_string);
  /* All done; flush the stream.  */
  fflush (fp);
}

這個module提供目前伺服器時間的資訊。我們可以看到，module本身非常獨立，僅僅需要對fd做基本讀寫即可，漂亮吧？

閱讀這組程式給我們帶來什麼收穫呢？嗯，或許有人會認為這根本就是太簡單的程式了，不值一提。但我認為仔細去思考每一個環節是很重要的，尤其是簡單的表像背後所隱藏的意義。舉例來說，從這組程式碼，我們至少要學習到：

1. Unix的Everything is file的觀念是如何被應用。
2. 動態module如何實現
3. 如何以C語言實現出大程式所需的抽象性。
4. 程式架構的決定性重點在於分析與設計，而非所用語言。

綜合這4點，我們便可用幾百行程式碼實作出有趣的功能，這可不是憑空而來的。不信？你可以試試看寫一遍同樣功能的程式，看看結果如何？ :P

寫這篇文章是為了向已故的C語言與Unix之父 - Dennis Ritchie學習簡約之道。Really fun!! :-)

軟體學徒forever

搜尋此網誌

淺讀Advanced Linux Programming的最後一章

標籤

留言

張貼留言

這個網誌中的熱門文章

誰在呼叫我？不同的backtrace實作說明好文章

淺讀Linux root file system初始化流程

kernel panic之後怎麼辦？