Extraction et « reporting » via ODBC sous Python
Extraction de données depuis Sybase et « reporting » via pyodbc
Prérequis:
- Python 2.5
- La librairie Python config: Librairie Python config
- La librairie pyodbc: pyodbc
Voici un exemple de script qui va ouvrir une connexion via « pyodbc » sous Windows à Sybase et produire une extraction csv simple.
Tout d’abord il faut préparer les librairies à importer:
... ... # Import lib import os import csv import sys import pyodbc import textwrap import traceback # Lib for config variable try: from lib.config.configobj import ConfigObj obj_config = ConfigObj('config/config.cfg') except ImportError: ConfigObj = None sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() # Lib for helpers try: from lib.helper.helper_misc import LogStats, ToolsBox, ToolsValidator except ImportError: LogStats = None ToolsBox = None sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() ...
Contenu de la classe « LogStats », cette classe va permettre les logs sur la console et un fichier plat en même temps:
... ... """Begin LogStats class""" class LogStats(object): def __init__(self, output_file=os.getcwd()+'\\log\\'+'stats.log'): """ Description: Constructor, init stats log file Call sample: sys.stdout = LogStats() @param output_file: Path to stats log file @return: void """ self.console = sys.stdout self.file = open(output_file, 'w') def write(self, flow): """ Description: Write flow to console and file @param flow: String to write @return: int """ try: if flow <> '': self.console.write(flow) self.file.write(flow) return 0 except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() """End LogStats class""" ...
Voici le contenu des classes ToolsBox et ToolsValidator utilisées dans ce script:
... ... """Begin ToolsBox class""" class ToolsBox(object): @staticmethod def set_type(item): """ Description: Set type for CSV from Sybase SGBD Call sample: ToolsBox.set_type('FRA') @param item: Value from Sybase to cast @return: string """ try: if type(item) <> str: if not item: return 'NULL' else: return str(item) return item.lstrip().rstrip() except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() @staticmethod def set_log(level, key, value, extra_value=''): """ Description: Return log string Call sample: ToolsBox.set_log('ERROR', 'label_section', 'notice_starting_module', 'optional free text') @param level: Level of log, ERROR, INFO ... @param key: Config key from config file @param value: Config value @param extra_value: Extra free text value if needed @return: string """ try: if level and key and value: return time.strftime('%Y/%m/%d - %H:%M:%S') + ' | ' + level + ': %s' % obj_config[key][value] + extra_value except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() @staticmethod def create_sys_dsn(driver, **kwargs): """ Description: Create a system DSN @param driver: ODBC driver name @param kwargs: Driver attributes @return: int(0 - DSN not created / 1 - DSN created) """ try: null = chr(0) attributes = [] for item in kwargs.keys(): attributes.append("%s=%s" % (item, kwargs[item])) return ctypes.windll.ODBCCP32.SQLConfigDataSource(0, ODBC_ADD_SYS_DSN, driver, null.join(attributes)) except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() @staticmethod def init_error_log(): """ Description: Init error log file Call sample: ToolsBox.init_error_log() @return: int """ try: if not os.path.exists('log'): os.makedirs('log') file_socket_error = open(os.getcwd() + '\\log\\' + 'error.log', 'w') sys.stderr = file_socket_error return 0 except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() @staticmethod def init_folder_tree(*kwargs): """ Description: Create folder tree for module Call sample: ToolsBox.init_folder_tree(folder1, folder2, ...) @param *kwargs: folder1, folder2, ... @return: int """ try: for element in kwargs: if not os.path.exists(element): os.mkdir(element) return 0 except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() """End ToolsBox class""" """Begin ToolsValidator class""" class ToolsValidator(object): # Description: Constructor # Init validator # @param self: Constructor parameter # @param from_date: From date # @return: void def __init__(self, from_date=''): self.from_date = from_date # Description: DATE validation # # @param self: Constructor parameter # @return: int def valid_date(self): try: if not self.from_date: raise Exception('%s' % 'DATE cannot be empty') elif len(self.from_date) <> 10: raise Exception('%s' % 'Wrong DATE length' + ' => ' + self.from_date) elif not self.from_date.replace('-', '').isdigit(): raise Exception('%s' % 'Wrong DATE type, should be only numeric => ' + self.from_date) try: datetime.strptime(self.from_date, '%Y-%m-%d') except ValueError: raise ValueError('%s' % 'Incorrect DATE format, should be YYYY-MM-DD, or wrong DATE => ' + self.from_date) return 0 except Exception: sys.stderr.write('ERROR: %s\n' % traceback.format_exc()) sys.exit() """End ToolsValidator class""" ...
Enfin, voici le script principal qui va produire le rapport, le programme va créer un DSN sous Windows (pour la connexion à Sybase) si il n’existe pas déjà:
... ... def exec_module(navire_deb_val_dt): """ Description: Execute Python module - Validate input date - Config object - DSN if not exist - Folder tree - Log files (stats, error) - Build vessels report @return void """ try: # Pre-check, date must be valid if not navire_deb_val_dt: navire_deb_val_dt = '1989-01-01' # Min date validator = ToolsValidator(navire_deb_val_dt) validator.valid_date() # Initialize log files ToolsBox.init_error_log() # Error log sys.stdout = LogStats() # Stats log # Init folder tree for this module ToolsBox.init_folder_tree(obj_config['file_section']['output_folder_results']) # Set output result file variable output_file = obj_config['file_section']['output_folder_results'] + '/' + obj_config['file_section']['output_file_results'] # SQL script file variable sql_file = obj_config['sgbd_section']['sgbd_query_file'] # Create DSN if not exist result = ToolsBox.create_sys_dsn( "Adaptive Server Enterprise", SERVER = obj_config['sgbd_section']['sgbd_server'], PORT = obj_config['sgbd_section']['sgbd_port'], DESCRIPTION = obj_config['sgbd_section']['sgbd_dsn_name'], DSN = obj_config['sgbd_section']['sgbd_dsn_name'], DATABASE = obj_config['sgbd_section']['sgbd_database'], UID = obj_config['sgbd_section']['sgbd_uid'], PASSWORD = obj_config['sgbd_section']['sgbd_password'] ) if result: i = 0 print ToolsBox.set_log('INFO', 'label_section', 'notice_dsn_created_ok') print ToolsBox.set_log('INFO', 'label_section', 'notice_query_running') # Open ODBC connection odbc_connection = pyodbc.connect('DSN=' + obj_config['sgbd_section']['sgbd_dsn_name']) cursor = odbc_connection.cursor() # CSV read / write process file_handle = open(output_file, 'wb') obj_writer = csv.writer(file_handle, delimiter = ';') # Open, read SQL script and replace navire_deb_val_dt date if any sql_file_handle = open(sql_file, 'r').read().replace('YYYY-MM-DD', navire_deb_val_dt) # Execute SQL statement cursor.execute(sql_file_handle) rows = cursor.fetchall() # Write lines from SQL dictionary = {} for line in rows: # Iterate, feed dictionary, remove duplicate on the whole line dictionary[line[1]] = ToolsBox.set_type(line[0]) + '_' + ToolsBox.set_type(line[2]) + '_' + ToolsBox.set_type(line[3]) + '_' + ToolsBox.set_type(line[4]) + '_' + ToolsBox.set_type(line[5]) + '_' + ToolsBox.set_type(line[6]) + '_' + ToolsBox.set_type(line[7]) + '_' + ToolsBox.set_type(line[8]) + '_' + ToolsBox.set_type(line[9])+ '_' + ToolsBox.set_type(line[10]) + '_' + ToolsBox.set_type(line[11])+ '_' + ToolsBox.set_type(line[12]) + '_' + ToolsBox.set_type(line[13]) + '_' + ToolsBox.set_type(line[14])+ '_' + ToolsBox.set_type(line[15]) # Iterate through sorted dictionary obj_writer.writerow(['header_column_01', 'header_column_02', ...]) # You have to change this if you need header into your CSV output file for key in dictionary: row = dictionary[key].split('_') i += 1 obj_writer.writerow([row[0], key, row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11], row[12], row[13], row[14]]) # Close files handle file_handle.close() # Check if result is not empty if i <> 0: print ToolsBox.set_log('INFO', 'label_section', 'notice_report_done_ok', ' ' + output_file) print ToolsBox.set_log('INFO', 'label_section', 'notice_nb_lines', ' ' + '(' + str(i) + ')') else: print ToolsBox.set_log('INFO', 'label_section', 'notice_empty_file') os.remove(output_file) # Remove empty file sys.exit() else: print ToolsBox.set_log('ERROR', 'label_section', 'error_dsn_created_ko') except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() def main(): """ Description: Main entry point for this module - Build report for vessels @return void """ try: # Display menu for user while 1: sys.stdout.write( textwrap.dedent("""\ Valid commands are: ------------------------------------------- [E] to exit [get_vessels_report] [YYYY-MM-DD] will produce vessels report Help: If you avoid [YYYY-MM-DD], this will produce all vessels from the beginning """) ) # Read input oCommandLine = raw_input() if oCommandLine == 'E': break elif 'get_vessels_report' in oCommandLine: exec_module(oCommandLine[19:29]) else: print 'Unknown command' sys.exit() except Exception: sys.stderr.write('ERROR: %s' % traceback.format_exc()) sys.exit() if __name__ == '__main__': main() ...
Voici également mon fichier de configuration pour exemple (j’utilise la librairie: ConfigObj):
[file_section]
output_folder_results = ResultFolder
output_file_results = ReportFileName.csv
[label_section]
error_dsn_created_ko = DSN not created => KO
notice_empty_file = No result for this request
notice_dsn_created_ok = DSN created successfully => OK
notice_report_done_ok = Report has been done successfully:
notice_query_running = The SQL statement is running please wait...
notice_nb_lines = line numbers
[sgbd_section]
sgbd_uid = your_login
sgbd_password = your_password
sgbd_server = your_server
sgbd_dsn_name = PRODUCTION_SYBASE_DSN
sgbd_port = your_port_number
sgbd_database = your_database
sgbd_query_file = config/query.sql
Et pour finir un exemple de requête SQL avec une date pour le remplacement à partir du menu utilisateur Python, c’est votre source de données:
SELECT
T.FIELD1,
E.FIELD2,
...
FROM
DB..T_TABLE1 T
LEFT JOIN DB..T_TABLE2 E
ON T.PK = E.FK
...
WHERE
...
AND T.FIELD_DATE >= 'YYYY-MM-DD'
...