Hello, Background: I'm trying to recover text data from my ext3 filesystem that got seriously corrupted. I have dd'ed it to another machine, and I'm working on a spare disk, where I have Linux installed. The metadata seems corrupt, but the data is somewhat intact. I have managed to grep for specific strings in the filesystem, and can see at least parts of my data. I'm trying to do something similar to: http://tldp.org/HOWTO/Tips-HOWTO-3.html#ss3.2 so that I can extract text data, such as C source code. Approach so far: I have a shell script that uses dd to grab each block from the filesystem, and then I pass that to a C program, that determins if the block is binary or text. It copies the block in a buffer and strips certain characters that I'm not interested in. The buffer is then written out to my spare disk, for further analysis. This is done per group block. Currently I have about 32768 files in the output directory, per group block (total filesystem length is 48GB, ext3 blocksize is 4096 byte). This is a PITA to look over, especially since there are in total 384 group blocks to look at (at most 384 * 32768 files). :'( Theory: In _Understanding the Linux Kernel_ 3rd edition book, chapter 18, page 740 (The Ext2 and Ext3 Filesystems), the authors mention that block fragmentation is being considered for ext3. The book is based on kernel 2.6.11, and I was running 2.6.13.4 when the filesystem corruption happened. Questions: a) Is the book correct that there is no block fragmentation for files? ie. one block only contains a fragment of *one file*. b) Would kernel 2.6.13.4 have had block fragmentation support? i.e. ext3 block fragmentation added after kernel 2.6.11 c) If question 'a' above is true, is it a safe assumption that if a block has binary characters, the whole block belongs to a biary file? d) When the filesystem writes a file smaller than the blocksize, to a block, does the remainder of the block get zeroed out? I am hoping that question c and d would be true, because then I can disregard any block with even the slight bit of binary, and I would have a lot less files to deal with. Code is below. Yes, I know it's very ugly... :^) Many many thanks, Srdjan Todorovic ----------------- block_iterate.sh ---------------- #!/bin/bash # # Iterate through each block group, grabbing each block. A test is performed # on the block to see if it is binary or text data. Text data is kept. # BLOCK_OUTPUT='/data/rescue-blocks' GROUP_DIR='/data/grp_blocks' #BLK_GROUPS=384 # This many block groups BLK_GROUPS=0 START_GROUP=0 END_GROUP=$BLK_GROUPS BLK_SIZE=4096 # block size in bytes #FS_SIZE=51210592272 # Size of the file BYTE_PER_GROUP=134217728 # 128 MB FS_SIZE=$((2 * $BYTE_PER_GROUP)) BLOCKS=$(($FS_SIZE / $BLK_SIZE)) BLOCKS_PER_GROUP=$(($BYTE_PER_GROUP / $BLK_SIZE)) if [ $# -eq 1 ]; then INFILE="$1" fi if [ $# -eq 3 ]; then INFILE="$1" START_GROUP="$2" END_GROUP="$3" fi function pull_out_group() { echo "Trying to pull out group $1" LOCALSKIP=$(($BYTE_PER_GROUP / $((1024*1024)))) dd if="$INFILE" of="$GROUP_DIR/$1.group" skip=$(($1 * $LOCALSKIP)) bs=$((1024*1024)) count=128 } # $1 = block group number # $2 = block number function pull_out_block() { OUT_LOCAL="$BLOCK_OUTPUT/group_$1/group_$1_block_$2.block" # Write block using dd if [ ! -r "$GROUP_DIR/$1.group" ]; then return 1 fi dd if="$GROUP_DIR/$1.group" of="$OUT_LOCAL" skip=$(($2 * 4)) conv=notrunc bs=1024 count=4 sync if [ ! -s "$OUT_LOCAL" ]; then echo "File is 0 bytes..." rm "$OUT_LOCAL" return 0 fi # pass block to block_strings.php type=`block_strings "$OUT_LOCAL" "$OUT_LOCAL.out.txt"` if [ "$type" = "b" ]; then rm "$OUT_LOCAL.out.txt" return 1 fi rm "$OUT_LOCAL" echo "Written group $1 block $2 file." } # Iterate through the group block, extracting blocks, and running # pull_out_block() # $1 = group block function iterate_blocks() { for b in `seq 0 "$BLOCKS_PER_GROUP"`; do pull_out_block $1 $b done } if [ ! -r "$INFILE" ]; then echo "Input file ($INFILE) not readable or does not exist." exit 1 fi for i in `seq "$START_GROUP" "$END_GROUP"`; do echo "Starting block $i..." pull_out_group $i mkdir "$BLOCK_OUTPUT/group_$i/" # iterate per block, iterator calls pull_out_block iterate_blocks $i # delete the group file rm "$GROUP_DIR/$i.group" done ---------------- block_strings.c ----------------- #include <stdio.h> #include <stdlib.h> int main(int argc, char **argv) { FILE *in = NULL; FILE *out = NULL; unsigned char buffer[4096]; int flag = 0; int i; int s; unsigned char c; if(argc != 3){ exit(2); } in = fopen(argv[1], "r"); out = fopen(argv[2], "w"); if(!in){ exit(4); } if(!out){ fclose(in); exit(5); } s = fread(buffer, 1024, 4, in); /* process the buffer */ for(i = 0; i < 4096; i++){ c = *(buffer + i); if(c > 128) continue; if(c == 0x7f) continue; if(c >= 0x00 && c < 0x09 ) continue; if(c >= 0x0e && c < 0x20 ) continue; s = fwrite(buffer + i, 1, 1, out); flag = 1; } fclose(in); fclose(out); if(flag) printf("t\n"); else printf("b\n"); return flag; } -- Kernelnewbies: Help each other learn about the Linux kernel. Archive: http://mail.nl.linux.org/kernelnewbies/ FAQ: http://kernelnewbies.org/faq/