Re: (Updated) Table File Format

Поиск
Список
Период
Сортировка
От Hannu Krosing
Тема Re: (Updated) Table File Format
Дата
Msg-id 3A38C4BA.97812EDF@tm.ee
обсуждение исходный текст
Ответ на (Updated) Table File Format  ("Michael Richards" <miker@interchange.ca>)
Список pgsql-hackers
Michael Richards wrote:
> 
>  I need a little help on the format of the postgres tables.
> 
>  I've got this wonderfully corrupted database where just about everything is
>  fubar. I've tried a number of things to get it back using postgres and
>  related tools with no success. It looks like most of the data is there, but
>  there may be a small amount of corruption that's causing all kinds of
>  problems.

Find attached a python script that I used to get deleted (actually all
;) 
records from a database table.

It was not corrupted, just a simple programming error in client software 
had deleted more than needed. 

Fortunately it was not vacuumed so the data itself (info for web-based
paper 
postcard sending system) was there 

It works as-is only for my table as the field extraction code is
hard-coded, but 
it should be quite easy to modify for your needs

It worked 1 year ago probably on 6.4.x . I hope that the structure had
not 
changed since.

sendcard.py is the actual script used, pgtabdump.py is a somewhat
cleaned-up version


---------------
Hannu#!/usr/bin/python

import sys,os,struct,string

site_base = "/var/lib/pgsql/base"
db_name = "betest"
table_name = "test"

db_path = os.path.join(site_base,db_name)
table_path = os.path.join(db_path,table_name)

page_size = 8*1024

def strbits(s,len):   bits = []   while s:       c = s[0]       s = s[1:]       b = struct.unpack('B',c)[0]       for i
inrange(8):           if b & (1<<i): bits.append(1)           else: bits.append(0)   return
string.join(map(str,bits),'')[:len]


class table_page:   "class to represent a database table page"   def __init__(self,fd,page_nr):
fd.seek(page_nr*page_size)      self.rawdata = fd.read(page_size)       self.lower,\       self.upper,\
self.special,\      self.opaque = struct.unpack('HHHH',self.rawdata[:8])       self.item_pointers=[]
self.items=[]      for i in range(8,self.lower,4):           rawItemIdData = self.rawdata[i:i+4]
ItemIdData_I32= struct.unpack('L',rawItemIdData)[0]           if not ItemIdData_I32: break           lp_len =
int(ItemIdData_I32>> 17)           lp_flags = int((ItemIdData_I32 >> 15) & 3)           lp_off = int(ItemIdData_I32 &
0x7fff)
           self.item_pointers.append(hex(ItemIdData_I32),lp_off,lp_flags,lp_len)           rawItemData =
self.rawdata[lp_off:lp_off+lp_len]          t_oid  = struct.unpack('L', rawItemData[ 0: 4])[0]           t_001  =
struct.unpack('L',rawItemData[ 4: 8])[0]           t_002  = struct.unpack('L', rawItemData[ 8:12])[0]           t_xmin
=struct.unpack('L', rawItemData[12:16])[0]           t_xmax = struct.unpack('L', rawItemData[16:20])[0]
t_ctid= struct.unpack('LH', rawItemData[20:26])             t_fcnt = struct.unpack('H', rawItemData[26:28])[0]
t_xxx  = struct.unpack('2B', rawItemData[28:30])             t_doff = struct.unpack('B', rawItemData[30:31])[0]
 t_mask = strbits(rawItemData[31:t_doff],t_fcnt)   
 
#            t_mask = rawItemData[31:t_doff]           t_data = rawItemData[t_doff:]
self.items.append(t_oid,t_001,t_002,t_xmin,t_xmax,t_ctid,t_fcnt,t_xxx,t_doff,t_mask,t_data)  def __str__(self):
strval= [str((self.lower, self.upper, self.special, self.opaque))]
strval.append(string.join(map(str,self.item_pointers),'\n'))      strval.append(string.join(map(repr,self.items),'\n'))
     return string.join(strval,'\n------------------\n')  
 


if __name__=="__main__":   print '# dumping %s' % table_path   fd = open(table_name)   page = table_page(fd,0)   print
page
#!/usr/bin/python

import sys,os,struct,string

table_name = "sendcard"

page_size = 8*1024

def strbits(s,len):   bits = []   while s:       c = s[0]       s = s[1:]       b = struct.unpack('B',c)[0]       for i
inrange(8):           if b & (1<<i): bits.append(1)           else: bits.append(0)   bits =
string.join(map(str,bits[:len]),'')   return bits
 

"""
rapos=> \d sendcard
Table    = sendcard
+----------------------------------+----------------------------------+-------+
|              Field               |              Type                | Length|
+----------------------------------+----------------------------------+-------+
| card_id                          | int4 default nextval ( 'card_id_ |     4 |
| card_name                        | text                             |   var |
| recipient_name                   | text                             |   var |
| recipient_street_name            | text                             |   var |
| recipient_house_nr               | text                             |   var |
| recipient_apartment_nr           | text                             |   var |
| recipient_city                   | text                             |   var |
| recipient_village                | text                             |   var |
| recipient_state                  | text                             |   var |
| recipient_zip_code               | text                             |   var |
| sender_name                      | text                             |   var |
| sender_email                     | text                             |   var |
| message                          | text                             |   var |
| bank_ref_id                      | text                             |   var |
| delivery_nr                      | text                             |   var |
| delivery_route                   | text                             |   var |
| sender_remote_addr               | text                             |   var |
| card_cost                        | float8                           |     8 |
| cookie_id                        | int4 default nextval ( 'cookie_i |     4 |
| is_payed                         | text                             |   var |
| printing_date                    | datetime                         |     8 |
| delivery_date                    | date                             |     4 |
| payment_date                     | date                             |     4 |
| entered_at                       | timestamp default now ( )        |     4 |
| send_bill_to                     | text                             |   var |
+----------------------------------+----------------------------------+-------+
"""
create_statement = """\
create table savecard(    u_oid                  int,   u_xmin                 int,   u_xmax                 int,
card_id               int4,   card_name              text,   recipient_name         text,   recipient_street_name
text,  recipient_house_nr     text,   recipient_apartment_nr text,   recipient_city         text,   recipient_village
  text,   recipient_state        text,   recipient_zip_code     text,   sender_name            text,   sender_email
     text,   message                text,   bank_ref_id            text,   delivery_nr            text,
delivery_route        text,   sender_remote_addr     text 
 
);
copy savecard from stdin;
"""  
def bin2text(buf,offset):   offset = ((offset + 3)/4)*4   len = int(struct.unpack('L',buf[offset:offset+4])[0])   str =
buf[offset+4:offset+len]  newoff = offset+len   return str,newoff  
 
class table_page:   "class to represent a database table page"   def __init__(self,fd,page_nr):
fd.seek(page_nr*page_size)      self.rawdata = fd.read(page_size)       self.lower,\       self.upper,\
self.special,\      self.opaque = struct.unpack('HHHH',self.rawdata[:8])       self.item_pointers=[]
self.items=[]      for i in range(8,self.lower,4):           rawItemIdData = self.rawdata[i:i+4]
ItemIdData_I32= struct.unpack('L',rawItemIdData)[0]           if not ItemIdData_I32: break           lp_len =
int(ItemIdData_I32>> 17)           lp_flags = int((ItemIdData_I32 >> 15) & 3)           lp_off = int(ItemIdData_I32 &
0x7fff)
           self.item_pointers.append(hex(ItemIdData_I32),lp_off,lp_flags,lp_len)           rawItemData =
self.rawdata[lp_off:lp_off+lp_len]          t_oid  = struct.unpack('L', rawItemData[ 0: 4])[0]           t_001  =
struct.unpack('L',rawItemData[ 4: 8])[0]           t_002  = struct.unpack('L', rawItemData[ 8:12])[0]           t_xmin
=struct.unpack('L', rawItemData[12:16])[0]           t_xmax = struct.unpack('L', rawItemData[16:20])[0]
t_ctid= struct.unpack('LH', rawItemData[20:26])             t_fcnt = struct.unpack('H', rawItemData[26:28])[0]
t_xxx  = struct.unpack('2B', rawItemData[28:30])             t_doff = struct.unpack('B', rawItemData[30:31])[0]
 t_mask = strbits(rawItemData[31:t_doff],t_fcnt)              t_data = rawItemData[t_doff:]           id =
int(struct.unpack('L',t_data[:4])[0])          values = [int(t_oid),int(t_xmin),int(t_xmax),id]           texts = []
      offset = 4           for c in t_mask[1:17]:               if not int(c):                   values.append('')
            continue               text,offset = bin2text(t_data,offset)               values.append(repr(text))
  values=values+texts           self.items.append(values)
 
#            self.items.append(t_oid,t_001,t_002,t_xmin,t_xmax,t_ctid,t_fcnt,t_xxx,t_doff,t_mask,t_data)   def
__str__(self):
#        strval = [str((self.lower, self.upper, self.special, self.opaque))]
#        strval.append(string.join(map(str,self.item_pointers),'\n'))
#        strval.append(string.join(map(repr,self.items),'\n'))
#        return string.join(strval,'\n------------------\n')         rows = []       for record in self.items:
rows.append(string.join(map(str,record),'\t'))       return string.join(rows,'\n')
 

if __name__=="__main__":
#    print '# dumping %s' % table_name   print create_statement   fd = open(table_name)    page_nr = 0   while 1:
  try:              page = table_page(fd,page_nr)           print page       except:           break       page_nr =
page_nr+ 1   print '\\.\n' 

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Philip Hofstetter
Дата:
Сообщение: Thanks!
Следующее
От: Zeugswetter Andreas SB
Дата:
Сообщение: AW: Why vacuum?