View Javadoc
1   /**
2    * Copyright (C) 2014-2016 Philip Helger (www.helger.com)
3    * philip[at]helger[dot]com
4    *
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    *         http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package com.helger.schematron;
18  
19  import java.io.IOException;
20  
21  import javax.annotation.Nonnull;
22  import javax.annotation.Nullable;
23  import javax.annotation.concurrent.Immutable;
24  import javax.xml.transform.Source;
25  import javax.xml.transform.dom.DOMSource;
26  
27  import org.oclc.purl.dsdl.svrl.SchematronOutputType;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  import org.w3c.dom.Node;
31  
32  import com.helger.commons.ValueEnforcer;
33  import com.helger.commons.annotation.PresentForCodeCoverage;
34  import com.helger.commons.error.IResourceErrorGroup;
35  import com.helger.commons.error.ResourceErrorGroup;
36  import com.helger.commons.hierarchy.visit.ChildrenProviderHierarchyVisitor;
37  import com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback;
38  import com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn;
39  import com.helger.commons.io.resource.IReadableResource;
40  import com.helger.commons.state.ESuccess;
41  import com.helger.commons.wrapper.Wrapper;
42  import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
43  import com.helger.schematron.pure.errorhandler.LoggingPSErrorHandler;
44  import com.helger.schematron.resolve.DefaultSchematronIncludeResolver;
45  import com.helger.schematron.svrl.SVRLFailedAssert;
46  import com.helger.schematron.svrl.SVRLHelper;
47  import com.helger.schematron.svrl.SVRLResourceError;
48  import com.helger.xml.microdom.IMicroDocument;
49  import com.helger.xml.microdom.IMicroElement;
50  import com.helger.xml.microdom.IMicroNode;
51  import com.helger.xml.microdom.serialize.MicroReader;
52  import com.helger.xml.serialize.read.ISAXReaderSettings;
53  
54  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
55  
56  /**
57   * This is a helper class that provides a way to easily apply an Schematron
58   * resource on an XML resource.
59   *
60   * @author Philip Helger
61   */
62  @Immutable
63  public final class SchematronHelper
64  {
65    private static final Logger s_aLogger = LoggerFactory.getLogger (SchematronHelper.class);
66  
67    @PresentForCodeCoverage
68    private static final SchematronHelper s_aInstance = new SchematronHelper ();
69  
70    private SchematronHelper ()
71    {}
72  
73    /**
74     * Apply the passed schematron on the passed XML resource using a custom error
75     * handler.
76     *
77     * @param aSchematron
78     *        The Schematron resource. May not be <code>null</code>.
79     * @param aXML
80     *        The XML resource. May not be <code>null</code>.
81     * @return <code>null</code> if either the Schematron or the XML could not be
82     *         read.
83     * @throws IllegalStateException
84     *         if the processing throws an unexpected exception.
85     */
86    @Nullable
87    public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
88                                                        @Nonnull final IReadableResource aXML)
89    {
90      ValueEnforcer.notNull (aSchematron, "SchematronResource");
91      ValueEnforcer.notNull (aXML, "XMLSource");
92  
93      try
94      {
95        // Apply Schematron on XML
96        return aSchematron.applySchematronValidationToSVRL (aXML);
97      }
98      catch (final Exception ex)
99      {
100       throw new IllegalArgumentException ("Failed to apply Schematron " +
101                                           aSchematron.getID () +
102                                           " onto XML resource " +
103                                           aXML.getResourceID (),
104                                           ex);
105     }
106   }
107 
108   /**
109    * Apply the passed schematron on the passed XML resource.
110    *
111    * @param aSchematron
112    *        The Schematron resource. May not be <code>null</code>.
113    * @param aXML
114    *        The XML resource. May not be <code>null</code>.
115    * @return <code>null</code> if either the Schematron or the XML could not be
116    *         read.
117    * @throws IllegalStateException
118    *         if the processing throws an unexpected exception.
119    */
120   @Nullable
121   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
122                                                       @Nonnull final Source aXML)
123   {
124     ValueEnforcer.notNull (aSchematron, "SchematronResource");
125     ValueEnforcer.notNull (aXML, "XMLSource");
126 
127     try
128     {
129       // Apply Schematron on XML.
130       return aSchematron.applySchematronValidationToSVRL (aXML);
131     }
132     catch (final Exception ex)
133     {
134       throw new IllegalArgumentException ("Failed to apply Schematron " +
135                                           aSchematron.getID () +
136                                           " onto XML source " +
137                                           aXML,
138                                           ex);
139     }
140   }
141 
142   /**
143    * Apply the passed schematron on the passed XML node.
144    *
145    * @param aSchematron
146    *        The Schematron resource. May not be <code>null</code>.
147    * @param aNode
148    *        The XML node. May not be <code>null</code>.
149    * @return <code>null</code> if either the Schematron or the XML could not be
150    *         read.
151    * @throws IllegalStateException
152    *         if the processing throws an unexpected exception.
153    */
154   @Nullable
155   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
156                                                       @Nonnull final Node aNode)
157   {
158     ValueEnforcer.notNull (aSchematron, "SchematronResource");
159     ValueEnforcer.notNull (aNode, "Node");
160 
161     return applySchematron (aSchematron, new DOMSource (aNode));
162   }
163 
164   /**
165    * Convert a {@link SchematronOutputType} to an {@link IResourceErrorGroup}.
166    *
167    * @param aSchematronOutput
168    *        The result of Schematron validation
169    * @param sResourceName
170    *        The name of the resource that was validated (may be a file path
171    *        etc.)
172    * @return List non-<code>null</code> error list of {@link SVRLResourceError}
173    *         objects.
174    */
175   @Nonnull
176   public static IResourceErrorGroup convertToResourceErrorGroup (@Nonnull final SchematronOutputType aSchematronOutput,
177                                                                  @Nullable final String sResourceName)
178   {
179     ValueEnforcer.notNull (aSchematronOutput, "SchematronOutput");
180 
181     final ResourceErrorGroup ret = new ResourceErrorGroup ();
182     for (final SVRLFailedAssert aFailedAssert : SVRLHelper.getAllFailedAssertions (aSchematronOutput))
183       ret.addResourceError (aFailedAssert.getAsResourceError (sResourceName));
184     return ret;
185   }
186 
187   @SuppressFBWarnings ("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
188   @Nonnull
189   private static ESuccess _recursiveResolveAllSchematronIncludes (@Nonnull final IMicroElement eRoot,
190                                                                   @Nonnull final IReadableResource aResource,
191                                                                   @Nullable final ISAXReaderSettings aSettings,
192                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
193   {
194     if (eRoot != null)
195     {
196       final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver (aResource);
197 
198       for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive ())
199         if (CSchematron.NAMESPACE_SCHEMATRON.equals (aElement.getNamespaceURI ()) &&
200             aElement.getLocalName ().equals (CSchematronXML.ELEMENT_INCLUDE))
201         {
202           String sHref = aElement.getAttributeValue (CSchematronXML.ATTR_HREF);
203           try
204           {
205             final int nHashIndex = sHref.indexOf ('#');
206             String sAnchor = null;
207             if (nHashIndex >= 0)
208             {
209               sAnchor = sHref.substring (nHashIndex + 1);
210               sHref = sHref.substring (0, nHashIndex);
211             }
212 
213             final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource (sHref);
214             if (aIncludeRes == null)
215             {
216               aErrorHandler.error (aResource, null, "Failed to resolve include '" + sHref + "'", null);
217               return ESuccess.FAILURE;
218             }
219 
220             if (s_aLogger.isDebugEnabled ())
221               s_aLogger.debug ("Resolved '" +
222                                sHref +
223                                "' relative to '" +
224                                aIncludeResolver.getBaseHref () +
225                                "' as '" +
226                                aIncludeRes.getPath () +
227                                "'");
228 
229             // Read XML to be included
230             final IMicroDocument aIncludedDoc = MicroReader.readMicroXML (aIncludeRes, aSettings);
231             if (aIncludedDoc == null)
232             {
233               aErrorHandler.error (aResource, null, "Failed to parse include " + aIncludeRes, null);
234               return ESuccess.FAILURE;
235             }
236 
237             IMicroElement aIncludedContent;
238             if (sAnchor == null)
239             {
240               // no anchor present - include the whole document
241 
242               // Return the document element
243               aIncludedContent = aIncludedDoc.getDocumentElement ();
244             }
245             else
246             {
247               final String sFinalAnchor = sAnchor;
248               final Wrapper <IMicroElement> aMatch = new Wrapper<> ();
249               // Also include the root element in the search
250               ChildrenProviderHierarchyVisitor.visitFrom (aIncludedDoc.getDocumentElement (),
251                                                           new DefaultHierarchyVisitorCallback <IMicroNode> ()
252                                                           {
253                                                             @Override
254                                                             public EHierarchyVisitorReturn onItemBeforeChildren (final IMicroNode aItem)
255                                                             {
256                                                               if (aItem.isElement ())
257                                                               {
258                                                                 final IMicroElement aCurElement = (IMicroElement) aItem;
259                                                                 final String sID = aCurElement.getAttributeValue ("id");
260                                                                 if (sFinalAnchor.equals (sID))
261                                                                   aMatch.set (aCurElement);
262                                                               }
263                                                               return EHierarchyVisitorReturn.CONTINUE;
264                                                             }
265                                                           },
266                                                           true);
267               aIncludedContent = aMatch.get ();
268               if (aIncludedContent == null)
269               {
270                 aErrorHandler.warn (aResource,
271                                     null,
272                                     "Failed to resolve an element with the ID '" +
273                                           sAnchor +
274                                           "' in " +
275                                           aIncludeRes +
276                                           "! Therefore including the whole document!");
277                 aIncludedContent = aIncludedDoc.getDocumentElement ();
278               }
279             }
280 
281             // Important to detach from parent!
282             aIncludedContent.detachFromParent ();
283 
284             // Check for correct namespace URI of included content
285             if (!CSchematron.NAMESPACE_SCHEMATRON.equals (aIncludedContent.getNamespaceURI ()))
286             {
287               aErrorHandler.error (aResource,
288                                    null,
289                                    "The included resource " +
290                                          aIncludeRes +
291                                          " contains the wrong XML namespace URI '" +
292                                          aIncludedContent.getNamespaceURI () +
293                                          "' but was expected to have '" +
294                                          CSchematron.NAMESPACE_SCHEMATRON +
295                                          "'",
296                                    null);
297               return ESuccess.FAILURE;
298             }
299 
300             // Check that not a whole Schema but only a part is included
301             if (CSchematronXML.ELEMENT_SCHEMA.equals (aIncludedContent.getLocalName ()))
302             {
303               aErrorHandler.warn (aResource,
304                                   null,
305                                   "The included resource " +
306                                         aIncludeRes +
307                                         " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.");
308             }
309 
310             // Recursive resolve includes
311             if (_recursiveResolveAllSchematronIncludes (aIncludedContent,
312                                                         aIncludeRes,
313                                                         aSettings,
314                                                         aErrorHandler).isFailure ())
315               return ESuccess.FAILURE;
316 
317             // Now replace "include" element with content in MicroDOM
318             aElement.getParent ().replaceChild (aElement, aIncludedContent);
319           }
320           catch (final IOException ex)
321           {
322             aErrorHandler.error (aResource, null, "Failed to read include '" + sHref + "'", ex);
323             return ESuccess.FAILURE;
324           }
325         }
326     }
327     return ESuccess.SUCCESS;
328   }
329 
330   /**
331    * Resolve all Schematron includes of the passed resource.
332    *
333    * @param aResource
334    *        The Schematron resource to read. May not be <code>null</code>.
335    * @return <code>null</code> if the passed resource could not be read as XML
336    *         document
337    */
338   @Nullable
339   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource)
340   {
341     return getWithResolvedSchematronIncludes (aResource, (ISAXReaderSettings) null, new LoggingPSErrorHandler ());
342   }
343 
344   /**
345    * Resolve all Schematron includes of the passed resource.
346    *
347    * @param aResource
348    *        The Schematron resource to read. May not be <code>null</code>.
349    * @param aSettings
350    *        The SAX reader settings to be used. May be <code>null</code> to use
351    *        the default settings.
352    * @return <code>null</code> if the passed resource could not be read as XML
353    *         document
354    */
355   @Nullable
356   @Deprecated
357   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
358                                                                   @Nullable final ISAXReaderSettings aSettings)
359   {
360     return getWithResolvedSchematronIncludes (aResource, aSettings, new LoggingPSErrorHandler ());
361   }
362 
363   /**
364    * Resolve all Schematron includes of the passed resource.
365    *
366    * @param aResource
367    *        The Schematron resource to read. May not be <code>null</code>.
368    * @param aSettings
369    *        The SAX reader settings to be used. May be <code>null</code> to use
370    *        the default settings.
371    * @param aErrorHandler
372    *        The error handler to be used. May not be <code>null</code>.
373    * @return <code>null</code> if the passed resource could not be read as XML
374    *         document
375    */
376   @Nullable
377   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
378                                                                   @Nullable final ISAXReaderSettings aSettings,
379                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
380   {
381     final IMicroDocument aDoc = MicroReader.readMicroXML (aResource, aSettings);
382     if (aDoc != null)
383     {
384       // Resolve all Schematron includes
385       if (_recursiveResolveAllSchematronIncludes (aDoc.getDocumentElement (),
386                                                   aResource,
387                                                   aSettings,
388                                                   aErrorHandler).isFailure ())
389       {
390         // Error resolving includes
391         return null;
392       }
393     }
394     return aDoc;
395   }
396 }