ó
0
Dc           @   sÝ  d  Z  y e Wn e k
 r- d Z d Z n Xd d l Z d d l Z d d l Z d d l Z d d l m	 Z	 d d l
 m Z m Z m Z m Z m Z d Z d Z e j d	 ƒ Z d
 „  Z e e e f Z d „  Z d „  Z d d e e e d d „ Z d d$ d „  ƒ  YZ d d% d „  ƒ  YZ d d& d „  ƒ  YZ d d' d „  ƒ  YZ d d( d „  ƒ  YZ d e f d „  ƒ  YZ d e f d „  ƒ  YZ  d d) d „  ƒ  YZ! d e" f d „  ƒ  YZ# d  e" f d! „  ƒ  YZ$ d" e j f d# „  ƒ  YZ% d S(*   s\  HTML table parsing.

ClientTable is a Python module for generic HTML table parsing.  It is
most useful when used in conjunction with other parsers (htmllib or
HTMLParser, regular expressions, etc.), to divide up the parsing work
between your own code and ClientTable.

RFC 1866: HTML 2.0
RFC 1942: HTML Tables
HTML 4.01 Specification, W3C Recommendation 24 December 1999

Requires Python 2.2.


Copyright 2002-2003 John J. Lee <jjl@pobox.com>

This code is free software; you can redistribute it and/or modify it
under the terms of the MIT License (see the file COPYING included with
the distribution).

i   i    iÿÿÿÿN(   t
   entitydefs(   t
   StringTypet   UnicodeTypet   IntTypet   LongTypet	   FloatTypes   0.0.1ai   s   \s+c         C   s   t  j d |  ƒ S(   Nt    (   t   WHITESPACE_REt   sub(   t   text(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   collapse_whitespaceO   s    c         C   s;   y |  d Wn( t  t f k
 r& d St k
 r6 n Xd S(   s"   Test whether object is a sequence.i    i   (   t	   TypeErrort   KeyErrort
   IndexError(   t   object(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt
   issequenceS   s    c         C   s&   t  |  t ƒ s t  |  t ƒ r" t St S(   N(   t
   isinstanceR   R   t   Truet   False(   R   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   isstringlike]   s    c   
      C   s‘   t  | | | | | | ƒ } xN |  j t ƒ } y | j | ƒ Wn t k
 rR Pn Xt | ƒ t k r Pq q x | j D] }	 |	 j ƒ  qv W| j S(   sƒ  Parse HTML tables and return a list of HTMLTable objects.

    file: file object
    nr_to_parse: stop after parsing this many tables; negative means parse all
     of them
    single_span: return cells that span multiple columns or rows only
     once, rather than once for every row / column they span
    strip_tags: remove HTML tags from cell contents
    collapse_whitespace: collapse consecutive whitespace characters (anything
     matching r"\s")to a single space
    recode_entities: recode HTML entities according to this dict

    Note that passing in the return value of urllib2.urlopen here as the file
    argument is fine.

    (   t   TableParsert   readt   CHUNKt   feedt   ParseFinishedt   lent   tablest   fixup(
   t   filet   nr_to_parset   nr_toplevel_to_parset   single_spant
   strip_tagsR
   t   recode_entitiest   tpt   datat   table(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt	   ParseFileb   s"    	 t	   HTMLTablec           B   s§   e  Z d  Z d Z e d „ Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d „  Z d „  Z d	 „  Z d
 „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   s€  Represents (surprise!) an HTML table.

    At the moment, tables may not be nested.

    HTMLTable instances are iterators over TableRows.  This includes any
    header rows.

    Public attributes: headers_row, headers_col.

    headers_row is a TableRow instance used to index columns in the table.
    headers_col is a TableColumn instance used to index columns in the table.

    c         C   s   | |  _  g  |  _ g  |  _ d  S(   N(   t   _single_spant   _datat   _sub_tables(   t   selfR   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __init__œ   s    		c         C   s   |  j  j | ƒ d  S(   N(   R(   t   append(   R*   t   row(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   push_row¡   s    c         C   s   |  j  j | ƒ d  S(   N(   R(   R,   (   R*   R$   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt
   push_table¤   s    c         C   s   | |  _  d  S(   N(   t	   _max_cols(   R*   t   max_cols(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   set_max_cols§   s    c         C   s  x |  j  D] } | j ƒ  q
 Wi  } t |  j  ƒ } d  g | } xô t | ƒ D]æ } |  j  | } t | d ƒ r› | } t ƒ  } | j | ƒ | |  j  | <n[ xX t t | ƒ ƒ D]D } | | }	 t |	 d ƒ rÓ q® n  | j |	 ƒ rè q® n  d  | |	 <q® W|  j	 t | ƒ }
 x' |
 d k r2| j
 d  ƒ |
 d 8}
 qWqM Wd  |  _ x' |  j  D] } | j rJ| |  _ PqJqJWx |  j  D] } |  j | _ qtWt |  j  d ƒ } |  j  } x8 t | ƒ D]* } |  j | ƒ r³|  j | ƒ |  _ q³q³Wx |  j D] } | j ƒ  qëWd  S(   Nt   is_tablei    i   (   R(   R   R   t   Nonet   ranget   hasattrt   TableRowR/   t   has_keyR0   t	   push_cellt   headers_rowt	   is_headert   _col_is_headert   _col_from_indext   headers_colR)   (   R*   t   objt
   cells_donet   nr_rowst   rowst   iR-   R$   t   jt   cellt   to_padt   nr_colsR#   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   ª   sJ    	
  				c         C   sN   xG |  j  D]< } y | | j } Wn t k
 r5 t SX| d k r
 t Sq
 Wt S(   Nt   th(   R(   t   element_typet   AttributeErrorR   R   (   R*   t   colnrR-   t   el(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR<   á   s    c         C   sm   t  |  j | ƒ ƒ } xG |  j D]< } | | } t | d ƒ rN | j | ƒ q | j | ƒ q W| j ƒ  | S(   NR3   (   t   TableColumnR<   R(   R6   R/   R9   R   (   R*   RK   t   tcR-   R?   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR=   î   s    

c         C   s$   t  | ƒ r |  j | ƒ S|  j | S(   N(   R   t   get_col_by_nameR(   (   R*   t   key(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __getitem__ù   s    c         C   s:   |  j  d k r t d ‚ n  |  j  j | ƒ } |  j | ƒ S(   s<   Get column by finding its name in the headers_row attribute.s   no header row has been setN(   R:   R4   R   t   indexR=   (   R*   t   colnameRC   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRO   þ   s    c         C   s8   |  j  d k r t d ‚ n  |  j  j | ƒ } |  j | S(   s9   Get row by finding its name in the headers_col attribute.s   no header column has been setN(   R>   R4   R   RR   R(   (   R*   t   rownameRC   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   get_row_by_name  s    c         C   sa   t  ƒ  } xG |  j D]< } | | } t | d ƒ rB | j | ƒ q | j | ƒ q W| j ƒ  | S(   s   Get column by integer index.R3   (   RM   R(   R6   R/   R9   R   (   R*   RK   RN   R-   R?   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   get_col_by_nr  s    	

c         C   s   |  j  | S(   s   Get row by integer index.(   R(   (   R*   t   rownr(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   get_row_by_nr  s    c         C   s   t  |  j ƒ S(   s&   Return iterator over columns of table.(   t   iteratorRV   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   col_iter  s    c         C   s   t  |  j ƒ S(   N(   RY   RX   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __iter__!  s    c         C   sK   g  } x" |  j  D] } | j d | ƒ q Wd |  j j t j | d ƒ f S(   Ns       %ss   %s[
%s]s   
(   R(   R,   t	   __class__t   __name__t   stringt   join(   R*   t   repR-   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __str__#  s    c         C   s   t  |  j ƒ S(   N(   R   R(   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __len__)  s    N(   R]   t
   __module__t   __doc__R4   R3   R   R+   R.   R/   R2   R   R<   R=   RQ   RO   RU   RV   RX   RZ   R[   Ra   Rb   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR&      s$   				7										RY   c           B   s#   e  Z d  „  Z d „  Z d „  Z RS(   c         C   s   d |  _  | |  _ d S(   s  
        index_fn: function behaving like __getitem__ for simple sequence
         object (ie. taking integer argument and returning a corresponding
         object, and raising IndexError if argument is out-of-bounds; valid
         indices must be consecutive)
        i    N(   t   _iterator__it   _iterator__index_fn(   R*   t   index_fn(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   .  s    	c         C   s   |  S(   N(    (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR[   7  s    c         C   sC   y |  j  |  j ƒ } Wn t k
 r/ t ‚ n X|  j d 7_ | S(   Ni   (   Rf   Re   R   t   StopIteration(   R*   t   r(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   next8  s    
(   R]   Rc   R+   R[   Rj   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRY   -  s   			t   Cellholdc           B   s>   e  Z d  d d d „ Z d „  Z d „  Z d „  Z d „  Z RS(   t   tdi   c         C   sv   t  | ƒ s t d ‚ n  t | t ƒ s3 t d ‚ n  t | t ƒ sN t d ‚ n  | |  _ | |  _ | |  _ | |  _ d  S(   Ns)   a string-like object is required for datas"   an integer is required for rowspans"   an integer is required for colspan(   R   R   R   t   NumericTypesR#   RI   t   rowspant   colspan(   R*   R#   RI   Rn   Ro   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   B  s    			c         C   s?   t  | t ƒ r( |  j | j k r; d Sn |  j | k r; d Sd S(   Ni    i   (   R   Rk   R#   (   R*   t   other(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __cmp__N  s      c         C   s   |  j  S(   N(   R#   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRa   T  s    c         C   sN   |  j  d k s |  j d k r7 d |  j  |  j f } n d } d | |  j f S(   Ni   s   %dx%d t    s   <%sCellhold[%s]>(   Rn   Ro   R#   (   R*   t   span(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __repr__U  s    c         C   s
   t  |  ƒ S(   N(   t   id(   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __hash__\  s    (   R]   Rc   R+   Rq   Ra   Rt   Rv   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRk   A  s   			t   Cellc           B   sD   e  Z d  Z d d d d „ Z d „  Z d „  Z d „  Z d „  Z RS(   s²   A single cell of an HTML table.

    Note that a single cell may span many rows or columns (or both).

    Public readable attributes: data, rowspan, colspan, element_type.

    Rl   i   c         C   sv   t  | ƒ s t d ‚ n  t | t ƒ s3 t d ‚ n  t | t ƒ sN t d ‚ n  | |  _ | |  _ | |  _ | |  _ d S(   s  
        data: string-lke object: contents of cell
        element_type: HTML element type; should be "td" or "th"
        rowspan: horizontal span of cell (nr. of rows occupied by cell)
        rowspan: horizontal span of cell (nr. of columns occupied by cell)

        s)   a string-like object is required for datas"   an integer is required for rowspans"   an integer is required for colspanN(   R   R   R   Rm   R#   RI   Rn   Ro   (   R*   R#   RI   Rn   Ro   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   g  s    				c         C   s?   t  | t ƒ r( |  j | j k r; d Sn |  j | k r; d Sd S(   Ni    i   (   R   Rw   R#   (   R*   Rp   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRq   {  s      c         C   s   |  j  S(   N(   R#   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRa     s    c         C   sN   |  j  d k s |  j d k r7 d |  j  |  j f } n d } d | |  j f S(   Ni   s   %dx%d Rr   s   <%sCell[%s]>(   Rn   Ro   R#   (   R*   Rs   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRt   ‚  s    c         C   s
   t  |  ƒ S(   N(   Ru   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRv   ‰  s    (   R]   Rc   Rd   R+   Rq   Ra   Rt   Rv   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRw   _  s   			t   TableSeqc           B   st   e  Z d  Z e d „ Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d „  Z d	 „  Z d
 „  Z d „  Z RS(   s1   Abstract base class for TableRow and TableColumn.c         C   s1   | r t  |  _ n	 t |  _ d  |  _ g  |  _ d  S(   N(   R   R;   R   R4   t   headersR(   (   R*   R;   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   Ž  s
     		c         C   s   d  S(   N(    (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   ”  s    c         C   s5   t  | t ƒ s! t | j j ƒ ‚ |  j j | ƒ d  S(   N(   R   R&   t   AssertionErrorR\   R]   R(   R,   (   R*   R$   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR/   –  s    !c         C   sJ   g  } x$ |  j  D] } | j t | ƒ ƒ q Wd |  j j d j | ƒ f S(   Ns   %s[%s]s   , (   R(   R,   t   reprR\   R]   R_   (   R*   R`   t   line(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRa   š  s    c         C   s   | |  j  k S(   N(   R(   (   R*   t   item(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   __contains__Ÿ  s    c         C   s   t  |  j ƒ S(   N(   R   R(   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRb      s    c         C   s   |  j  | S(   N(   R(   (   R*   RC   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRQ   ¡  s    c         C   s8   t  | t ƒ s! t | j j ƒ ‚ |  j j | | ƒ d  S(   N(   R   Rw   Rz   R\   R]   R(   t   insert(   R*   RC   R}   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   ¢  s    !c         C   sA   t  | t ƒ s! t | j j ƒ ‚ | t _ |  j j | | ƒ d  S(   N(   R   Rw   Rz   R\   R]   R#   R(   R   (   R*   RC   R}   R^   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt
   insertdata¥  s    !	c         C   s   |  j  j | ƒ S(   N(   R(   t   count(   R*   R}   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   ª  s    c         C   s   |  j  j | ƒ S(   N(   R(   RR   (   R*   R}   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRR   «  s    (   R]   Rc   Rd   R   R+   R   R/   Ra   R~   Rb   RQ   R   R€   R   RR   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRx   Œ  s   									R7   c           B   s>   e  Z d  Z e d „ Z d „  Z d „  Z d „  Z d „  Z RS(   sŽ   Row of an HTML table.

    Indexing with a string gets a cell using the headers_row attribute.
    Indexing with an integer gets a cell.

    c         C   s   t  j |  | ƒ d  |  _ d  S(   N(   Rx   R+   R4   R:   (   R*   R;   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   ¸  s    c         C   sf   | d  k	 r@ t | t ƒ s- t | j j ƒ ‚ |  j j | ƒ n" | d  k sR t ‚ |  j j | ƒ d  S(   N(   R4   R   Rw   Rz   R\   R]   R(   R,   (   R*   RE   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR9   ¼  s
    !c         C   s$   t  | ƒ r |  j | ƒ S|  j | S(   N(   R   t   get_cell_by_nameR(   (   R*   RP   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRQ   Å  s    c         C   s   |  j  | S(   s   Get cell by integer index.(   R(   (   R*   RK   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   get_cell_by_nrÊ  s    c         C   s<   |  j  d k r t d ‚ n |  j  j | ƒ } |  j | Sd S(   s9   Get row by finding its name in the headers_row attribute.s   no header row has been setN(   R:   R4   R   RR   R(   (   R*   RS   RC   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR‚   Î  s    (	   R]   Rc   Rd   R   R+   R9   RQ   Rƒ   R‚   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR7   ±  s   				RM   c           B   s>   e  Z d  Z e d „ Z d „  Z d „  Z d „  Z d „  Z RS(   sŽ   Row of an HTML table.

    Indexing with a string gets a cell using the headers_col attribute.
    Indexing with an integer gets a cell.

    c         C   s   t  j |  | ƒ d  |  _ d  S(   N(   Rx   R+   R4   R>   (   R*   R;   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   Þ  s    c         C   s   |  j  j | ƒ d  S(   N(   R(   R,   (   R*   RE   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR9   â  s    c         C   s$   t  | ƒ r |  j | ƒ S|  j | S(   N(   R   R‚   R(   (   R*   RP   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRQ   æ  s    c         C   s   |  j  | S(   s   Get cell by integer index.(   R(   (   R*   RW   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRƒ   ë  s    c         C   s<   |  j  d k r t d ‚ n |  j  j t ƒ } |  j | Sd S(   s<   Get column by finding its name in the headers_col attribute.s   no header column has been setN(   R>   R4   R   RR   RT   R(   (   R*   RS   RC   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR‚   ï  s    (	   R]   Rc   Rd   R   R+   R9   RQ   Rƒ   R‚   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyRM   ×  s   			t
   ParseStatec           B   s   e  Z d  „  Z RS(   c         C   s¦   g  |  _  d  |  _ d  |  _ t |  _ t |  _ t |  _ t |  _ t |  _	 d |  _
 d |  _ t |  _ t |  _ d |  _ d |  _ d |  _ d |  _ d |  _ d |  _ d  S(   Ni    i   (   t	   cell_dataR4   R$   t   tablerowR   t   in_tablet   in_trt   in_tht   in_tdt   in_colgroupt   colR-   t   in_header_rowt   have_colt
   tx_colspant
   tx_rowspant   tx_colst   colgroup_spant   colgroup_colst   col_cols(   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+     s$    																	(   R]   Rc   R+   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR„   
  s   t
   ParseErrorc           B   s   e  Z RS(    (   R]   Rc   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR•   -  s    R   c           B   s   e  Z RS(    (   R]   Rc   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   .  s    R   c           B   s¶   e  Z d  Z d Z d d e e e d d „ Z d	 „  Z d
 „  Z d „  Z	 d „  Z
 d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z RS(   s   HTML Table parser.R$   t   trRl   RH   RŒ   t   colgroupiÿÿÿÿc         C   s×   t  j  j |  ƒ | d k r( t d ‚ n  | t k r@ t d ‚ n  | |  _ | |  _ | rd t |  _ n	 t |  _ | r t |  _ n	 t |  _ | rš t |  _	 n	 t |  _	 | |  _
 g  |  _ g  |  _ d |  _ t ƒ  |  _ d S(   s  
        nr_to_parse: only parse this number of tables, then stop; if negative,
         parse all tables in the document
        single_span: return cells that span multiple columns or rows only
         once, rather than once for every row / column they span
        collapse_whitespace: convert all consecutive whitespace characters
         to a single space
        # XXX name recode_entities is poor if this is a dict
        recode_entities: recode HTML entities according to this dict
        depth: internal use only

        i    s(   nr_toplevel_to_parse not yet implementeds   single_span not yet implementedN(   t
   HTMLParserR+   t   NotImplementedErrorR   t   _nr_to_parset   _nr_toplevel_to_parseR   R'   t   _strip_tagst   _collapse_whitespacet   _recode_entitiesR   t   _stackt   _depthR„   t   _ps(   R*   R   R   R   R    R
   R!   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR+   8  s.    				 	 	 					c         C   sõ   |  j  j r% | d k r% |  j ƒ  n  |  j st |  j  j sF |  j  j r‡ | |  j k r‡ |  j  j j |  j	 ƒ  ƒ q‡ n |  j  j j d ƒ y t
 |  d | ƒ } WnF t k
 ræ y t
 |  d | ƒ } Wn t k
 rØ qñ X| | ƒ n X| | ƒ d  S(   NRŒ   R   t   start_t   do_(   R¡   R‹   t   end_colgroupRœ   RŠ   R‰   t
   table_tagsR…   R,   t   get_starttag_textt   getattrRJ   (   R*   t   tagt   attrst   method(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   handle_starttagm  s     	c         C   sÊ   |  j  j r% | d k r% |  j ƒ  n  |  j sr |  j  j sF |  j  j r… | |  j k r… |  j  j j d | ƒ q… n |  j  j j d ƒ y t	 |  d | ƒ } Wn t
 k
 rµ d  } n X| rÆ | ƒ  n  d  S(   NRŒ   s   </%s>R   t   end_(   R¡   R‹   R¤   Rœ   RŠ   R‰   R¥   R…   R,   R§   RJ   R4   (   R*   R¨   Rª   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   handle_endtag‹  s    	
c         C   s¼   |  j  j rA |  j  j r% |  j ƒ  n  |  j  j rA |  j ƒ  qA n  |  j d k rY t ‚ n  |  j d 7_ |  j	 j
 |  j  ƒ t ƒ  |  _  t ƒ  |  j  _ t |  j  _ d |  j  _ |  j  _ d  S(   Ni    i   (   R¡   Rˆ   RŠ   t   end_tdR‰   t   end_thRš   R   R    RŸ   R,   R„   R&   R$   R   R‡   R-   RŒ   (   R*   R©   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   start_tableŸ  s      	c         C   s.  |  j  j s t d ‚ n  |  j  j rc |  j  j r= |  j ƒ  n  |  j  j rV |  j ƒ  n  |  j ƒ  n  |  j  j	 } |  j  j
 rŠ |  j  j } n |  j  j } | j | ƒ |  j d 8_ |  j d k rÔ |  j j | ƒ nG |  j j ƒ  |  _  |  j  j r|  j  j j | ƒ n |  j  j	 j | ƒ |  j d 8_ d  S(   Ns   end of TABLE before starti   i    (   R¡   R‡   R•   Rˆ   RŠ   R®   R‰   R¯   t   end_trR$   RŽ   R”   R‘   R2   R    R   R,   RŸ   t   popR†   R/   Rš   (   R*   R$   R1   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt	   end_table°  s*        c         C   su   |  j  j s t d ‚ n  d } x2 | D]* \ } } | d k r% t | d ƒ } q% q% Wt |  j  _ |  j  j | 7_ d  S(   Ns   COL outside of TABLEi   Rs   i
   (   R¡   R‡   R•   t   intR   RŽ   R“   (   R*   R©   Rs   t   kt   v(    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt	   start_colË  s      c         C   s‡   |  j  j s t d ‚ n  t |  j  _ t |  j  _ d |  j  _ d } x2 | D]* \ } } | d k rI t | d ƒ } qI qI W| |  j  _ d  S(   Ns   COL outside of TABLEi    i   Rs   i
   (	   R¡   R‡   R•   R   R‹   RŽ   R“   R´   R’   (   R*   R©   Rs   Rµ   R¶   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   start_colgroupÓ  s      c         C   sm   |  j  j s t d ‚ n  t |  j  _ |  j  j d k rQ |  j  j |  j  j 7_ n |  j  j |  j  j 7_ d  S(   Ns   end of COLGROUP before starti    (   R¡   R‹   R•   R   R“   R”   R’   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR¤   Ü  s    c         C   sŽ   |  j  j s t d ‚ n  |  j  j rc |  j  j r= |  j ƒ  n  |  j  j rV |  j ƒ  n  |  j ƒ  n  t	 ƒ  |  j  _
 t |  j  _ d |  j  _ d  S(   Ns$   start of TR element outside of TABLEi    (   R¡   R‡   R•   Rˆ   RŠ   R®   R‰   R¯   R±   R7   R†   R   RŒ   (   R*   R©   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   start_trå  s      c         C   sÍ   |  j  j s t d ‚ n  |  j  j r1 |  j ƒ  n  |  j  j rJ |  j ƒ  n  t |  j  j t	 |  j  j
 ƒ ƒ |  j  _ |  j  j |  j  j
 _ |  j  j j |  j  j
 ƒ t |  j  _ t |  j  _ |  j  j d 7_ d  S(   Ns"   end of TR element outside of TABLEi   (   R¡   R‡   R•   RŠ   R®   R‰   R¯   t   maxR‘   R   R†   R   R;   R$   R.   R   Rˆ   R-   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR±   ð  s      'c         C   s§   |  j  r3 d j |  j j ƒ j ƒ  } t | ƒ } n d j |  j j ƒ } |  j j r] d } n d } t | | |  j j |  j j	 ƒ } |  j j
 j | ƒ g  |  j _ d S(   s<   Consolidate data for single table entry (including headers).Rr   RH   Rl   N(   R   R_   R¡   R…   t   stripR
   R‰   Rw   R   R   R†   R9   (   R*   R#   RL   RE   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   _process_cell_dataý  s    	 	!c         C   sÛ   |  j  j s t d ‚ n  |  j  j r1 |  j ƒ  n  |  j  j rJ |  j ƒ  n  t |  j  _ t |  j  _ d |  j  _	 |  j  _
 x\ | D]T \ } } | d k r¯ t | d ƒ |  j  _	 n  | d k r t | d ƒ |  j  _
 q q Wd  S(   Ns   TH outside of TABLEi   Ro   i
   Rn   (   R¡   R‡   R•   R‰   R¯   RŠ   R®   R   R   R   R   R´   (   R*   R©   Rµ   R¶   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   start_th  s         c         C   sV   |  j  j s t d ‚ n  |  j  j s* t ‚ |  j ƒ  t |  j  _ |  j  j d 7_ d  S(   Ns   end of TH before starti   (   R¡   R‰   R•   R‡   Rz   R¼   R   RŒ   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR¯     s     
c         C   sÏ   |  j  j s t d ‚ n  |  j  j r1 |  j ƒ  n  |  j  j rJ |  j ƒ  n  t |  j  _ d |  j  _ |  j  _	 x\ | D]T \ } } | d k r£ t
 | d ƒ |  j  _ n  | d k rs t
 | d ƒ |  j  _	 qs qs Wd  S(   Ns   TD outside of TABLEi   Ro   i
   Rn   (   R¡   R‡   R•   RŠ   R®   R‰   R¯   R   R   R   R´   (   R*   R©   Rµ   R¶   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   start_td   s         c         C   sV   |  j  j s t d ‚ n  |  j  j s* t ‚ |  j ƒ  t |  j  _ |  j  j d 7_ d  S(   Ns   end of TD before starti   (   R¡   RŠ   R•   R‡   Rz   R¼   R   RŒ   (   R*   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR®   *  s     
c         C   sB   |  j  j s d  S|  j  j s( |  j  j r> |  j  j j | ƒ n  d  S(   N(   R¡   R‡   RŠ   R‰   R…   R,   (   R*   R#   (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   handle_data2  s     (   s   tables   trs   tds   ths   colR—   N(   R]   Rc   Rd   R¥   R   R4   R+   R«   R­   R°   R³   R·   R¸   R¤   R¹   R±   R¼   R½   R¯   R¾   R®   R¿   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyR   5  s.   0															
	(    (    (    (    (    (    (&   Rd   R   t	   NameErrorR   R˜   t   ret   copyR^   t   htmlentitydefsR    t   typesR   R   R   R   R   t   VERSIONR   t   compileR   R
   Rm   R   R   R4   R%   R&   RY   Rk   Rw   Rx   R7   RM   R„   t	   ExceptionR•   R   R   (    (    (    s9   /opt/htdocs/wiki/en/extensions/wikipdf/src/ClientTable.pyt   <module>   s@   , 
0(		
	% -%&3#