Mighty Perl-like regular expressions (Regex) with Glib and Vala

A new version of Glib 2.32 has been released just about a week ago. Inspired by this fact, or by another one I started to read the glib documentation, regarding the regular expression syntax and play around with it. To do so the GRegex structure has to be created by calling g_regex_new function, where the first parameter received is the regular expression pattern, followed by compile and match flags. The last parameter is the address of the error pointer, which is created, in the case that, the function fails to interpret the pattern. In the example I want to catch the occurrence of words “moon, soon and noon” summarized by the pattern “(m|n|s)oon” in the given string.

#include <glib.h>
#include <stdlib.h>


typedef void (*HandleFunc)( void );

void 
handle_error( GError **error, HandleFunc func_pointer );

void exit_app( void ) { exit( 1 ); }

int
main( int argc, char **argv )
{
	GError *error = NULL;
	GRegex *regex;
	GMatchInfo *match_info;
	
	gchar *input_string = "soon you will see the moon in the afternoon";
	gchar *pattern_string = "(m|n|s)oon";
	
	regex = g_regex_new( pattern_string, 0, 0, &error );
	
	handle_error( &error, exit_app );
	
	g_regex_match( regex, input_string, 0, &match_info );
	
	g_print( "searching for a match \"%s\" in the string:\n\"%s\"\n\n", pattern_string, input_string );
	
	
	while( g_match_info_matches( match_info ) ) {
		gchar *match = g_match_info_fetch( match_info, 0 );
		
		g_print( "found match: \"%s\"\n", match );
		
		g_match_info_next( match_info, &error );
		
		handle_error( &error, NULL );
	
		g_free( match );
	}
	
	g_match_info_free( match_info );
	g_regex_unref( regex );
	
	return 0;
}

void 
handle_error( GError **error, HandleFunc func_pointer )
{
	if( error != NULL && *error != NULL ) {
		g_print( "%s\n", (*error)->message );
		g_clear_error( error );
		
		if( func_pointer != NULL ) func_pointer( );	
	}
}

The above example can be compiled with the following makefile:

APP = regex-test

${APP}: ${APP}.c
	gcc -o $@ $^ $(shell pkg-config --cflags --libs glib-2.0) -Wall -pedantic
	
clean:
	rm -rf ${APP}

Here is one more example written in Vala, dealing with parentheses issues, containing the balanced number of opening and closing brackets:

void main( ) {
	string exp1_pattern = "(?<exp1>( [^()]* | (?>[^()]*) (?<pn> \\( ( (?>[^()]*) | (?&pn) )* \\) ) [^()]* ))";
	string exp2_pattern = "(?<exp2>( [^()]* | (?>[^()]*) (?<pm> \\( ( (?>[^()]*) | (?&pm) )* \\) ) [^()]* ))";
	
	string pattern = "^" + exp1_pattern + "\\s(?<opn>(and|or|xor))\\s" + exp2_pattern + "$";
	
	string[] text_lines = new string[]
	{ "true and (x)", "a and b", "(ab(cd)e or y", "(a(b)cde) xor ( s )",
	  "(true and x) or (y and (not x ))", "xandy", " and b", 
	  " (x  and y) or ( true or y()) ", "a and b or c" };
	
	Regex regex;
	MatchInfo match_info;
	
	try {
		regex = new Regex( pattern, RegexCompileFlags.EXTENDED );
		foreach( string text in text_lines ) {
			stdout.printf( "\"%s\"\n", text );
			regex.match( text, 0, out match_info );
	
			while( match_info.matches( ) ) {
				stdout.printf( "\t\"%s\"\n", match_info.fetch( 0 ) );
				stdout.printf( "\toperator name: \"%s\"\n", match_info.fetch_named( "opn" ) );
				stdout.printf( "\t     left exp: \"%s\"\n", match_info.fetch_named( "exp1" ) );
				stdout.printf( "\t    right exp: \"%s\"\n", match_info.fetch_named( "exp2" ) );
				match_info.next( );
			}
		} 
	} catch( RegexError re ) {
			stderr.printf( "%s\n", re.message );	
	}	
}

This one can be compiled with:

# valac regex-test.vala

Leave a comment