Re: How different is AWS-RDS postgres?

Philip Semanchuk <philip@xxxxxxxxxxxxxxxxxxxxx> · Thu, 27 May 2021 09:45:30 -0400

> On May 26, 2021, at 10:04 PM, Rob Sargent <robjsargent@xxxxxxxxx> wrote:
> 
> 
> 
>> On May 26, 2021, at 4:37 PM, Ian Harding <harding.ian@xxxxxxxxx> wrote:
>> 
>> 
>> There is an option to send the logs to cloudwatch which makes it less awful to look at them. 
> I have that but precious little of interest there. Lots of autovac, a smattering of hints to increase wal size!?  I have yet to spot anything which corresponds to the “I/O failure” which the middle ware gets. 
> 
> I don’t have query logging on, but I do see reports from my psql session fat-fingering.
> 
> As to the logs UI, the search is pretty feeble; I don’t understand why there are four  channels of logs; the graphs are wearing the same rose-coloured as the logs.  
> And 24 hours without a peep from AWS support. (I don’t call mailing me what I sent them “contact”.)
> 
> My guess right now is that the entire tomcat connection pool is in a single transaction? That’s the only way the tables could disappear.  I am making separate calls to JDBC getConnection () for each doPost. 

We used Aurora (AWS hosted Postgres) and I agree that Cloudwatch search is pretty limited. I wrote a Python script to download cloudwatch logs to my laptop where I can use proper tools like grep to search them. It’s attached to this email. It’s hacky but not too terrible. I hope you find it useful. 

Cheers
Philip

import pathlib
import operator
import logging
from collections import namedtuple
import subprocess
import datetime
import json

import boto3

DB_IDENTIFIER = 'your-db-name-here'

PATH = './logs'

Config = namedtuple('Config', ['access_key', 'secret_key', 'region', 'db_identifier', 'rds_client'])

boto_session = boto3.session.Session()

config = Config(
    access_key=boto_session._session.get_credentials().access_key,
    secret_key=boto_session._session.get_credentials().secret_key,
    region=boto_session._session.get_config_variable('region'),
    db_identifier=DB_IDENTIFIER,
    rds_client=boto_session.client('rds'),
)

class LogFile:
    def __init__(self, aws_name, timestamp, size):
        self.aws_name = aws_name
        self.last_written = datetime.datetime.fromtimestamp(timestamp / 1000)
        self.size = int(size)

        # typical aws_name = error/postgresql.log.2019-06-21-16
        self.local_path = pathlib.Path(PATH, pathlib.Path(aws_name).name + '.txt')

    def download(self, config):
        # aws rds download-db-log-file-portion \
        #   --db-instance-identifier wylan-sql \
        #   --log-file-name error/postgresql.log.2019-06-24-14 \
        #   --no-paginate  --output text
        cmd = [
            'aws',
            'rds',
            'download-db-log-file-portion',
            '--db-instance-identifier',
            config.db_identifier,
            '--log-file-name',
            self.aws_name,
            '--no-paginate',
        ]

        with open(self.local_path, 'wb') as f:
            self._proc = subprocess.Popen(cmd, stdout=f)
            return_code = self._proc.wait()

        if return_code == 0:
            # Great, the data were written. It's actually in JSON format. All of the interesting
            # info is in the LogFileData element. Grab that and replace the file contents with it.
            with open(self.local_path) as f:
                d = json.load(f, encoding='utf-8')
                log_text = d['LogFileData']

            with open(self.local_path, 'w') as f:
                f.write(log_text)
        else:
            # FIXME provide a more helpful exception
            raise ValueError

def _get_log_files(config, root_directory):
    result = config.rds_client.describe_db_log_files(DBInstanceIdentifier=config.db_identifier)

    # FIXME filter out logs where 'Size' == 0?
    rds_logs = [LogFile(d['LogFileName'], d['LastWritten'], d['Size'])
                for d in result['DescribeDBLogFiles']]

    # rds_logs[0].download(config)

    # import pprint; pp=pprint.pprint
    # import pdb; pdb.set_trace()

    rds_logs.sort(key=lambda rds_log: rds_log.aws_name)

    if not rds_logs:
        print('No RDS logs found')
    else:
        for rds_log in rds_logs:
            print(f'downloading {rds_log.aws_name}...')
            rds_log.download(config)

if __name__ == '__main__':
    _get_log_files(config, None)