Kaydet (Commit) 038e3ce8 authored tarafından Andre Fischer's avatar Andre Fischer

125035: Added support for actions to the experimental Java parser.

üst 7d096ff2
package org.apache.openoffice.ooxml.parser;
import java.util.Vector;
/** Container of all actions that are associated with a single state.
*/
public class ActionDescriptor
{
public ActionDescriptor (
final int nStateId,
final String sName)
{
msStateName = sName;
maElementStartActions = null;
maElementEndActions = null;
maTextActions = null;
}
public void AddAction (
final IAction aAction,
final ActionTrigger eTrigger)
{
GetActionsForTrigger(eTrigger, true).add(aAction);
}
public Iterable<IAction> GetActions (
final ActionTrigger eTrigger)
{
return GetActionsForTrigger(eTrigger, false);
}
@Override
public String toString ()
{
return "actions for state "+msStateName;
}
private Vector<IAction> GetActionsForTrigger (
final ActionTrigger eTrigger,
final boolean bCreateWhenMissing)
{
Vector<IAction> aActions = null;
switch(eTrigger)
{
case ElementStart:
aActions = maElementStartActions;
if (bCreateWhenMissing && aActions==null)
{
aActions = new Vector<>();
maElementStartActions = aActions;
}
break;
case ElementEnd:
aActions = maElementEndActions;
if (bCreateWhenMissing && aActions==null)
{
aActions = new Vector<>();
maElementEndActions = aActions;
}
break;
case Text:
aActions = maTextActions;
if (bCreateWhenMissing && aActions==null)
{
aActions = new Vector<>();
maTextActions = aActions;
}
break;
}
return aActions;
}
private final String msStateName;
private Vector<IAction> maElementStartActions;
private Vector<IAction> maElementEndActions;
private Vector<IAction> maTextActions;
}
package org.apache.openoffice.ooxml.parser;
import java.util.Iterator;
/** Iterate over two sources of actions, both given as an Iterable<IAction>
* object that can be null.
*/
public class ActionIterator implements Iterable<IAction>
{
public ActionIterator (
final Iterable<IAction> aOneStateActions,
final Iterable<IAction> aAllStateActions)
{
maOneStateActions = aOneStateActions;
maAllStateActions = aAllStateActions;
}
@Override public Iterator<IAction> iterator()
{
return new Iterator<IAction>()
{
Iterator<IAction> maIterator = null;
int mnPhase = 0;
@Override
public boolean hasNext()
{
while(true)
{
if (mnPhase == 2)
return false;
else if (mnPhase == 0)
{
if (maIterator == null)
if (maOneStateActions == null)
{
mnPhase = 1;
continue;
}
else
maIterator = maOneStateActions.iterator();
if (maIterator.hasNext())
return true;
else
{
maIterator = null;
mnPhase = 1;
}
}
else if (mnPhase == 1)
{
if (maIterator == null)
if (maAllStateActions == null)
{
mnPhase = 2;
return false;
}
else
maIterator = maAllStateActions.iterator();
if (maIterator.hasNext())
return true;
else
{
mnPhase = 2;
}
}
}
}
@Override
public IAction next()
{
return maIterator.next();
}
@Override
public void remove()
{
}
};
}
private final Iterable<IAction> maOneStateActions;
private final Iterable<IAction> maAllStateActions;
}
package org.apache.openoffice.ooxml.parser;
import java.util.HashMap;
import java.util.Map;
/** Manage actions that are bound to states and XML events.
*/
public class ActionManager
{
ActionManager (
final NameMap aStateNameToIdMap)
{
maStateNameToIdMap = aStateNameToIdMap;
maAllStatesActions = new ActionDescriptor(0,"*");
maStateToActionsMap = new HashMap<>();
}
/** Add an action for an element start.
* @param sStateSelector
* The element is specified via a state name. This allows one element
* that leads to different complex types to have different actions,
* depending on the complex type.
* The selector value can be a full state name (including the namespace
* prefix and CT prefix, e.g. w06_CT_Table) or a regular expression
* (e.g. .*_CT_Table to match w06_CT_Table and w12_CT_Table).
* The action is bound to all matching states.
* @param aAction
* The action to call on entering any of the states that match the
* selector.
*/
public void AddElementStartAction (
final String sStateSelector,
final IAction aAction)
{
AddAction(sStateSelector, aAction, ActionTrigger.ElementStart);
}
/** Add an action for an element end.
* @see AddElementStartAction.
*/
public void AddElementEndAction (
final String sStateSelector,
final IAction aAction)
{
AddAction(sStateSelector, aAction, ActionTrigger.ElementEnd);
}
/** Add an action for XML text events.
* @see AddElementStartAction.
*/
public void AddTextAction (
final String sStateSelector,
final IAction aAction)
{
AddAction(sStateSelector, aAction, ActionTrigger.Text);
}
/** Return an iterable object that gives access to all actions
* bound to the given state and trigger.
* Return value can be null when there are no actions bound to the state
* and trigger.
*/
public Iterable<IAction> GetActions (
final int nStateId,
final ActionTrigger eTrigger)
{
final ActionDescriptor aOneStateActionsDescriptor = maStateToActionsMap.get(nStateId);
final Iterable<IAction> aOneStateActions = aOneStateActionsDescriptor!=null
? aOneStateActionsDescriptor.GetActions(eTrigger)
: null;
final Iterable<IAction> aAllStateActions = maAllStatesActions.GetActions(eTrigger);
if (aOneStateActions == null)
return aAllStateActions;
else if (aAllStateActions == null)
return aOneStateActions;
else
return new ActionIterator(aOneStateActions, aAllStateActions);
}
private void AddAction (
final String sStateSelector,
final IAction aAction,
final ActionTrigger eTrigger)
{
if (sStateSelector.equals("*"))
{
// Simple optimization when an action is defined for all states.
maAllStatesActions.AddAction(aAction, eTrigger);
}
else if (sStateSelector.contains("*") || sStateSelector.contains("?"))
{
// The state selector contains wildcards. We have to iterate over
// all state names to find the matching ones.
for (final int nStateId : maStateNameToIdMap.GetMatchingStateIds(sStateSelector))
{
GetActionDescriptor(nStateId).AddAction(aAction, eTrigger);
}
}
else
{
final int nStateId = maStateNameToIdMap.GetIdForName(sStateSelector);
GetActionDescriptor(nStateId).AddAction(aAction, eTrigger);
}
}
private ActionDescriptor GetActionDescriptor (final int nStateId)
{
ActionDescriptor aDescriptor = maStateToActionsMap.get(nStateId);
if (aDescriptor == null)
{
aDescriptor = new ActionDescriptor(nStateId, maStateNameToIdMap.GetNameForId(nStateId));
maStateToActionsMap.put(nStateId, aDescriptor);
}
return aDescriptor;
}
private final NameMap maStateNameToIdMap;
private final ActionDescriptor maAllStatesActions;
private final Map<Integer,ActionDescriptor> maStateToActionsMap;
}
package org.apache.openoffice.ooxml.parser;
/** An enumeration of all supported action triggers.
*/
public enum ActionTrigger
{
ElementStart,
ElementEnd,
Text
}
\ No newline at end of file
......@@ -89,10 +89,12 @@ public class AttributeManager
/** For the state with id nStateId, match the attributes from the document
* with the attribute specifications of that state.
*/
public void ParseAttributes (
public AttributeValues ParseAttributes (
final int nStateId,
final AttributeProvider aDocumentAttributes)
{
final AttributeValues aValues = new AttributeValues();
final Map<Integer,AttributeDescriptor> aAttributesPerState = maStateIdToAttributesMap.get(nStateId);
if (aAttributesPerState == null)
{
......@@ -120,6 +122,8 @@ public class AttributeManager
aEntry[2],
aAttributesPerState);
aUsedAttributes.add(aAttributeDescriptor);
aValues.AddAttribute(aAttributeDescriptor, aEntry[2]);
if (Log.Dbg != null)
{
if (aAttributeDescriptor == null)
......@@ -147,6 +151,8 @@ public class AttributeManager
}
}
}
return aValues;
}
......
package org.apache.openoffice.ooxml.parser;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
/** Container of attribute values of an opening tag.
*/
public class AttributeValues
{
AttributeValues ()
{
maAttributes = new TreeMap<>();
}
public void AddAttribute (
final AttributeDescriptor aAttributeDescriptor,
final String sValue)
{
maAttributes.put(
aAttributeDescriptor.GetName(),
sValue);
}
public Iterable<Entry<String,Object>> GetAttributes ()
{
return maAttributes.entrySet();
}
public int GetAttributeCount ()
{
return maAttributes.size();
}
private Map<String,Object> maAttributes;
}
package org.apache.openoffice.ooxml.parser;
/** Context that has the same life time (by default) as the element it represents.
* Gives access to the attribute values and the parent context.
*/
public class ElementContext
{
ElementContext (
final String sElementName,
final String sTypeName,
final boolean bIsSkipping,
final AttributeValues aValues,
final ElementContext aParentContext)
{
msElementName = sElementName;
msTypeName = sTypeName;
mbIsSkipping = bIsSkipping;
maAttributeValues = aValues;
maParentContext = aParentContext;
}
public String GetElementName ()
{
return msElementName;
}
public String GetTypeName ()
{
return msTypeName;
}
public AttributeValues GetAttributes ()
{
return maAttributeValues;
}
/** Return the context of the parent element.
* Can be null when there is no parent element.
*/
public ElementContext GetParentContext ()
{
return maParentContext;
}
private final String msElementName;
private final String msTypeName;
private final boolean mbIsSkipping;
private final AttributeValues maAttributeValues;
private final ElementContext maParentContext;
}
package org.apache.openoffice.ooxml.parser;
import javax.xml.stream.Location;
/** Interface for actions that are bound to states and triggered by XML events.
*/
public interface IAction
{
/** Callback for a single XML event.
* @param eTrigger
* Equivalent to the XML event type.
* @param aContext
* The context of the element that was just entered (element start),
* is about to be left (element end) or is currently active (all other
* events).
* @param sText
* Contains text for ActionTrigger.Text. Is null for all other
* triggers.
* @param aLocation
* The location in the source file that triggered the XML event.
*/
void Run (
final ActionTrigger eTrigger,
final ElementContext aContext,
final String sText,
final Location aLocation);
}
......@@ -23,6 +23,7 @@ package org.apache.openoffice.ooxml.parser;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Vector;
public class NameMap
......@@ -78,6 +79,22 @@ public class NameMap
/** Return the ids of all states whose names match the given pattern.
*/
public Vector<Integer> GetMatchingStateIds (final String sPattern)
{
final Vector<Integer> aStateIds = new Vector<>();
for (final Entry<String,Integer> aEntry : maNameToIdMap.entrySet())
{
if (aEntry.getKey().matches(sPattern))
aStateIds.add(aEntry.getValue());
}
return aStateIds;
}
private final Map<String,Integer> maNameToIdMap;
private final Vector<String> maIdToNameMap;
}
package org.apache.openoffice.ooxml.parser;
import java.io.InputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
/** This is the actual parser (where OOXMLParser is the front end that handles
* parameters given to the main method).
*/
public class Parser
{
public Parser (
final StateMachine aMachine,
final InputStream aIn)
{
maMachine = aMachine;
maReader = GetStreamReader(aIn, "input");
mnElementCount = 0;
}
void Parse ()
{
try
{
final AttributeProvider aAttributeProvider = new AttributeProvider(maReader);
while (maReader.hasNext())
{
final int nCode = maReader.next();
switch(nCode)
{
case XMLStreamReader.START_ELEMENT:
++mnElementCount;
if (maMachine.IsInSkipState())
{
if (Log.Dbg != null)
Log.Dbg.printf("is skip state -> starting to skip\n");
Skip();
}
else if ( ! maMachine.ProcessStartElement(
maReader.getNamespaceURI(),
maReader.getLocalName(),
maReader.getLocation(),
aAttributeProvider))
{
if (Log.Dbg != null)
Log.Dbg.printf("starting to skip to recover from error\n");
Skip();
}
break;
case XMLStreamReader.END_ELEMENT:
maMachine.ProcessEndElement(
maReader.getNamespaceURI(),
maReader.getLocalName(),
maReader.getLocation());
break;
case XMLStreamReader.CHARACTERS:
maMachine.ProcessCharacters(
maReader.getText(),
maReader.getLocation());
break;
case XMLStreamReader.END_DOCUMENT:
Log.Std.printf("--- end of document ---\n");
break;
default:
Log.Err.printf("can't handle XML event of type %d\n", nCode);
}
}
maReader.close();
}
catch (final XMLStreamException aException)
{
aException.printStackTrace();
}
}
public int GetElementCount ()
{
return mnElementCount;
}
private void Skip ()
{
if (Log.Dbg != null)
{
Log.Dbg.printf("starting to skip on %s at L%dC%d\n",
maReader.getLocalName(),
maReader.getLocation().getLineNumber(),
maReader.getLocation().getColumnNumber());
Log.Dbg.IncreaseIndentation();
}
// We are called when processing a start element. This means that we are
// already at relative depth 1.
int nRelativeDepth = 1;
try
{
while (maReader.hasNext())
{
final int nCode = maReader.next();
switch (nCode)
{
case XMLStreamReader.START_ELEMENT:
++nRelativeDepth;
++mnElementCount;
if (Log.Dbg != null)
{
Log.Dbg.printf("skipping start element %s\n", maReader.getLocalName());
Log.Dbg.IncreaseIndentation();
}
break;
case XMLStreamReader.END_ELEMENT:
--nRelativeDepth;
if (Log.Dbg != null)
Log.Dbg.DecreaseIndentation();
if (nRelativeDepth <= 0)
{
if (Log.Dbg != null)
Log.Dbg.printf("leaving skip mode on %s\n", maReader.getLocalName());
return;
}
break;
case XMLStreamReader.END_DOCUMENT:
throw new RuntimeException("saw end of document while skipping elements\n");
case XMLStreamReader.CHARACTERS:
SkipText(maReader.getText());
break;
default:
if (Log.Dbg != null)
Log.Dbg.printf("%s\n", nCode);
break;
}
}
}
catch (final XMLStreamException aException)
{
aException.printStackTrace();
}
}
private void SkipText (final String sText)
{
if (Log.Dbg != null)
Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n"));
}
private XMLStreamReader GetStreamReader (
final InputStream aIn,
final String sDescription)
{
if (aIn == null)
return null;
try
{
final XMLInputFactory aFactory = (XMLInputFactory)XMLInputFactory.newInstance();
aFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
aFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
aFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
return (XMLStreamReader)aFactory.createXMLStreamReader(
sDescription,
aIn);
}
catch (final Exception aException)
{
aException.printStackTrace();
return null;
}
}
private final XMLStreamReader maReader;
private final StateMachine maMachine;
private int mnElementCount;
}
......@@ -47,20 +47,21 @@ public class StateMachine
aReader.GetSection("attribute"),
maNamespaceMap,
maNameMap);
System.out.printf("read %d namespace, %d names, %d states (%d skip, %d accept), %d transitions and %d attributes\n",
maNamespaceMap.GetNamespaceCount(),
maNameMap.GetNameCount(),
maStateNameMap.GetNameCount(),
maSkipStates.GetSkipStateCount(),
maAcceptingStates.GetAcceptingStateCount(),
maTransitions.GetTransitionCount(),
maAttributeManager.GetAttributeCount());
mnStartStateId = Integer.parseInt(aReader.GetSection("start-state").firstElement()[1]);
mnEndStateId = Integer.parseInt(aReader.GetSection("end-state").firstElement()[1]);
mnCurrentStateId = mnStartStateId;
maStateStack = new Stack<>();
maElementContextStack = new Stack<>();
maActionManager = new ActionManager(maStateNameMap);
System.out.printf("read %d namespace, %d names, %d states (%d skip, %d accept), %d transitions and %d attributes\n",
maNamespaceMap.GetNamespaceCount(),
maNameMap.GetNameCount(),
maStateNameMap.GetNameCount(),
maSkipStates.GetSkipStateCount(),
maAcceptingStates.GetAcceptingStateCount(),
maTransitions.GetTransitionCount(),
maAttributeManager.GetAttributeCount());
if (Log.Dbg != null)
Log.Dbg.printf("starting in state _start_ (%d)\n", mnCurrentStateId);
......@@ -79,20 +80,20 @@ public class StateMachine
try
{
final NamespaceMap.NamespaceDescriptor aDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
final NamespaceMap.NamespaceDescriptor aNamespaceDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
final int nElementNameId = maNameMap.GetIdForName(sElementName);
if (Log.Dbg != null)
Log.Dbg.printf("%s:%s(%d:%d) L%dC%d\n",
aDescriptor.Prefix,
aNamespaceDescriptor.Prefix,
sElementName,
aDescriptor.Id,
aNamespaceDescriptor.Id,
nElementNameId,
aLocation.getLineNumber(),
aLocation.getColumnNumber());
final Transition aTransition = maTransitions.GetTransition(
mnCurrentStateId,
aDescriptor.Id,
aNamespaceDescriptor.Id,
nElementNameId);
if (aTransition == null)
{
......@@ -100,7 +101,7 @@ public class StateMachine
"can not find transition for state %s(%d) and element %s(%d:%d) at L%dC%d\n",
maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId,
aDescriptor.Id,
aNamespaceDescriptor.Id,
maNameMap.GetNameForId(nElementNameId),
nElementNameId,
aLocation.getLineNumber(),
......@@ -123,10 +124,42 @@ public class StateMachine
Log.Dbg.printf("\n");
}
final int nOldState = mnCurrentStateId;
SetCurrentState(aTransition.GetEndStateId());
// Follow the transition to its end state but first process its
// content. We do that by
if (Log.Dbg != null)
Log.Dbg.IncreaseIndentation();
// a) pushing the end state to the state stack so that on the
// end tag that corresponds to the current start tag it will become the current state.
maStateStack.push(aTransition.GetEndStateId());
// b) entering the state that corresponds to start tag that
// we are currently processing.
mnCurrentStateId = aTransition.GetActionId();
// c) Prepare the attributes and store them in the new element context.
final AttributeValues aAttributeValues = maAttributeManager.ParseAttributes(
mnCurrentStateId,
aAttributes);
// d) creating a new ElementContext for the element that just starts.
maElementContextStack.push(maCurrentElementContext);
final ElementContext aPreviousElementContext = maCurrentElementContext;
maCurrentElementContext = new ElementContext(
sElementName,
maStateNameMap.GetNameForId(aTransition.GetActionId()),
false,
aAttributeValues,
aPreviousElementContext);
ExecuteActions(aTransition, aAttributes, nOldState, mnCurrentStateId);
// e) and run all actions that are bound to the the current start tag.
ExecuteActions(
mnCurrentStateId,
maCurrentElementContext,
ActionTrigger.ElementStart,
null,
aLocation);
bResult = true;
}
......@@ -161,8 +194,22 @@ public class StateMachine
final NamespaceMap.NamespaceDescriptor aDescriptor = maNamespaceMap.GetDescriptorForURI(sNamespaceURI);
final int nOldStateId = mnCurrentStateId;
SetCurrentState(maStateStack.pop());
// Leave the current element.
final int nPreviousStateId = mnCurrentStateId;
mnCurrentStateId = maStateStack.pop();
if (mnCurrentStateId == mnEndStateId)
mnCurrentStateId = mnStartStateId;
final ElementContext aPreviousElementContext = maCurrentElementContext;
maCurrentElementContext = maElementContextStack.pop();
ExecuteActions(
nPreviousStateId,
aPreviousElementContext,
ActionTrigger.ElementEnd,
null,
aLocation);
if (Log.Dbg != null)
{
......@@ -173,8 +220,8 @@ public class StateMachine
aLocation.getLineNumber(),
aLocation.getColumnNumber());
Log.Dbg.printf(" %s(%d) <- %s(%d)\n",
maStateNameMap.GetNameForId(nOldStateId),
nOldStateId,
maStateNameMap.GetNameForId(nPreviousStateId),
nPreviousStateId,
maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId);
}
......@@ -184,8 +231,19 @@ public class StateMachine
public void ProcessCharacters (
final String sText)
final String sText,
final Location aLocation)
{
if (Log.Dbg != null)
Log.Dbg.printf("text [%s]\n", sText.replace("\n", "\\n"));
ExecuteActions(
mnCurrentStateId,
maCurrentElementContext,
ActionTrigger.Text,
sText,
aLocation);
}
......@@ -199,34 +257,25 @@ public class StateMachine
private void SetCurrentState (final int nState)
public ActionManager GetActionManager ()
{
if (mnCurrentStateId != nState)
{
if (nState == mnEndStateId)
mnCurrentStateId = mnStartStateId;
else
mnCurrentStateId = nState;
}
return maActionManager;
}
private void ExecuteActions (
final Transition aTransition,
final AttributeProvider aAttributes,
final int nOldState,
final int nNewState)
final int nStateId,
final ElementContext aElementContext,
final ActionTrigger eTrigger,
final String sText,
final Location aLocation)
{
maStateStack.push(mnCurrentStateId);
if (Log.Dbg != null)
Log.Dbg.IncreaseIndentation();
final int nActionId = aTransition.GetActionId();
SetCurrentState(nActionId);
maAttributeManager.ParseAttributes(
nActionId,
aAttributes);
final Iterable<IAction> aActions = maActionManager.GetActions(nStateId, eTrigger);
if (aActions != null)
for (final IAction aAction : aActions)
aAction.Run(eTrigger, aElementContext, sText, aLocation);
}
......@@ -239,8 +288,11 @@ public class StateMachine
private final AttributeManager maAttributeManager;
private int mnCurrentStateId;
private Stack<Integer> maStateStack;
private ElementContext maCurrentElementContext;
private Stack<ElementContext> maElementContextStack;
private final int mnStartStateId;
private final int mnEndStateId;
private SkipStateTable maSkipStates;
private AcceptingStateTable maAcceptingStates;
private final ActionManager maActionManager;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment