# Last-edition: 20060505

#
# Html links
#
$links= {
}

$rules={

#
# Entry
#
entry:	~ { $In:[file:text] $Out pre {$Skip:0} }
	('GENE_ID' {$entryFip=$Fip $Wrt} toln {$App}
	('GENE_ID' {$Not} toln {$App})+ )?
~

#
# Fields
#
fields:		~ {$In:entry $Out} gene_id date author gene product taxon
	species keywords activity promoter_id sequence_id
	(reference pubmed?)+ end
~

gene_id:	~ {$Wrt:gene_id} 'GENE_ID' sp_toln ~
date:		~ {$Wrt:date} 'DATE' sp_toln ~
author:		~ {$Wrt:author} 'AUTHOR' sp_toln ~
gene:		~ {$Wrt:gene} 'GENE' sp_toln ~
product:	~ {$Wrt:product} 'PRODUCT' sp_toln+ ~
taxon:		~ {$Wrt:taxon} 'TAXON' sp_toln+ ~
species:	~ {$Wrt:species} 'SPECIES' sp_toln ~
keywords:	~ {$Wrt:keywords} 'KEYWORDS' sp_toln+ ~
activity:	~ {$Wrt:activity} 'ACTIVITY' sp_toln+ ~
promoter_id:	~ {$Wrt:promoter_id} 'PROMOTER_ID' sp_toln+ ~
sequence_id:	~ {$Wrt:sequence_id} 'SEQUENCE_ID' sp_toln+ ~
reference:	~ {$Wrt:reference} 'REFERENCE' sp_toln+ ~
pubmed:		~ {$Wrt:pubmed} 'PUBMED' sp_toln ~
end:		~ {$Wrt} 'END' toln ~


#
# Indexing
#
i_gene_id:	~ {$In:[fields c:gene_id] $Out:gene_id} tag_sp
	id_ext {$Wrt} ln ~
i_date:		~ {$In:[fields c:date] $Out:date} tag_sp
	/([0-9]+)\.([0-9]+)\.([0-9]+)/ { $Wrt:[s:($3*10000+$2*100+$1)] }
	ln
~
i_gene: 	~ {$In:[fields c:gene] $Out:gene} tag_sp
	( wordext2 { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
~



i_product: 	~ {$In:[fields c:product] $Out:product} tag_sp
	( wordext2  { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
#	(/[^;,\n]+/  {if:($Ct.!='')$Wrt} /[;, ]*/)* 
~

i_taxon: 	~ {$In:[fields c:taxon] $Out:taxon} tag_sp
	( wordext { if:($StrLen:$Ct >= 3) {$Wrt} } | nonword )*
~
i_species: 	~ {$In:[fields c:species] $Out:species} tag_sp
	( wordext2 { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
~
i_keywords: 	~ {$In:[fields c:keywords] $Out:keywords} tag_sp
	( wordext2 { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
~
i_activity: 	~ {$In:[fields c:activity] $Out:activity} tag_sp
	( wordext { if:($StrLen:$Ct >= 3) {$Wrt} } | nonword )*
~
i_promoter_id: 	~ {$In:[fields c:promoter_id] $Out:promoter_id} tag_sp
	( id_ext {$Wrt} | sp_or_ln )*
~
i_sequence_id: 	~ {$In:[fields c:sequence_id] $Out:sequence_id} tag_sp
	( id_ext {$Wrt} | sp_or_ln )*
~

i_pubmed: 	~ {$In:[fields c:pubmed] $Out:pubmed} tag_sp
	( wordext2 { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
~

i_reference: 	~ {$In:[fields c:reference] $Out:reference} tag_sp
	( wordext2 { if:($StrLen:$Ct >= 3) {$Wrt} } | /[,\n]+/ )*
~
#
# Html
#
h_promoter_id:	~ { $In:[fields c:promoter_id t:html] pre {if:$ParInt:isTable $Fail} }
	tag ( id_ext
	{$Rep:$Hlink:[entryR p:{"($ParStr:userIdOpt)" 'TGP_PROMOTER' 'PromoterID' $Ct $Ct} ]}
	| sp_or_ln )*
~
h_sequence_id:	~ { $In:[fields c:sequence_id t:html] pre {if:$ParInt:isTable $Fail} }
	tag ( id_ext
	{$Rep:$Hlink:[entryR p:{"($ParStr:userIdOpt)" 'TGP_SEQUENCE' 'SequenceID' $Ct $Ct} ]}
	| sp_or_ln )*
~
h_pubmed:		~ { $In:[fields c:pubmed t:html] pre {if:$ParInt:isTable $Fail} }
	tag_sp /[0-9]+/ {$Rep:$Hlink:[PMIDR p:{$Ct $Ct}]} ln
~

#
# Special
#

id_ext:		~ /^[a-zA-Z]+:[a-zA-Z0-9_+:.-]+/ ~

#
# Common
#
toln:		~ /[^\n]*\n/ ~
noln:		~ /[^\n]*/ ~
ln:		~ /\n/ ~
sp_or_ln:	~ /[ \n]+/ ~
sp_toln:	~ / [^\n]*\n/ ~
tag:		~ /[A-Z0-9_-]+/ ~
tag_sp:		~ /[A-Z0-9_-]+ / ~
spaces:		~ /[ ]+/ ~
word:		~ /[a-zA-Z0-9]+/ ~
nonword:	~ /[^a-zA-Z0-9]*/ ~
wordext:	~ /[a-zA-Z0-9_-]+/ ~
wordext2:	~ /[a-zA-Z0-9_()<> -]+/ ~
fill:		~ /[^ \t\n]+/ ~
nonfill:	~ /[ \t\n]*/ ~

} # rules