1   /*
2    *  Gate.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Hamish Cunningham, 31/07/98
12   *
13   *  $Id: Gate.java,v 1.42 2001/12/03 12:04:10 hamish Exp $
14   */
15  
16  package gate;
17  
18  import java.util.*;
19  import java.net.*;
20  import java.io.*;
21  
22  import gate.util.*;
23  import gate.creole.*;
24  import gate.config.*;
25  import gate.event.*;
26  
27  /** The class is responsible for initialising the GATE libraries, and
28    * providing access to singleton utility objects, such as the GATE class
29    * loader, CREOLE register and so on.
30    */
31  public class Gate implements GateConstants
32  {
33    /** Debug flag */
34    private static final boolean DEBUG = false;
35  
36    /**
37     *  The default StringBuffer size, it seems that we need longer string
38     *  than the StringBuffer class default because of the high number of
39     *  buffer expansions
40     *  */
41    public static final int STRINGBUFFER_SIZE = 1024;
42  
43    /**
44     *  The default size to be used for Hashtable, HashMap and HashSet.
45     *  The defualt is 11 and it leads to big memory usage. Having a default
46     *  load factor of 0.75, table of size 4 can take 3 elements before being
47     *  re-hashed - a values that seems to be optimal for most of the cases.
48     *  */
49    public static final int HASH_STH_SIZE = 4;
50  
51  
52    /**
53     *  The database schema owner (GATEADMIN is default)
54     *  this one should not be hardcoded but set in the
55     *  XML initialization files
56     *
57     *  */
58    public static final String DB_OWNER = "gateadmin";
59  
60  
61    /** The list of builtin URLs to search for CREOLE resources. */
62    private static String builtinCreoleDirectoryUrls[] = {
63      // "http://derwent.dcs.shef.ac.uk/gate.ac.uk/creole/"
64  
65      // this has been moved to initCreoleRegister and made relative to
66      // the base URL returned by getUrl()
67      // "http://gate.ac.uk/creole/"
68    };
69  
70  
71    /** The GATE URI used to interpret custom GATE tags*/
72    public static final String URI = "http://www.gate.ac.uk";
73  
74    /** Minimum version of JDK we support */
75    protected static final String MIN_JDK_VERSION = "1.3";
76  
77    /** Get the minimum supported version of the JDK */
78    public static String getMinJdkVersion() { return MIN_JDK_VERSION; }
79  
80    /** Initialisation - must be called by all clients before using
81      * any other parts of the library. Also initialises the CREOLE
82      * register and reads config data (<TT>gate.xml</TT> files).
83      * @see #initCreoleRegister
84      */
85    public static void init() throws GateException {
86  
87      // register the URL handler  for the "gate://" URLs
88      System.setProperty(
89        "java.protocol.handler.pkgs",
90        System.getProperty("java.protocol.handler.pkgs")
91          + "|" + "gate.util.protocols"
92      );
93  
94      System.setProperty("javax.xml.parsers.SAXParserFactory",
95                               "org.apache.xerces.jaxp.SAXParserFactoryImpl");
96  
97      //initialise the symbols generator
98      lastSym = 0;
99  
100     // create class loader and creole register if they're null
101     if(classLoader == null)
102       classLoader = new GateClassLoader();
103     if(creoleRegister == null)
104       creoleRegister = new CreoleRegisterImpl();
105     // init the creole register
106     initCreoleRegister();
107     // init the data store register
108     initDataStoreRegister();
109     // read gate.xml files; this must come before creole register
110     // initialisation in order for the CREOLE-DIR elements to have and effect
111     initConfigData();
112     // the creoleRegister acts as a proxy for datastore related events
113     dataStoreRegister.addCreoleListener(creoleRegister);
114 
115     // some of the events are actually fired by the {@link gate.Factory}
116     Factory.addCreoleListener(creoleRegister);
117 
118     // check we have a useable JDK
119     if(System.getProperty("java.version").compareTo(MIN_JDK_VERSION) < 0) {
120       throw new GateException(
121         "GATE requires JDK " + MIN_JDK_VERSION + " or newer"
122       );
123     }
124   } // init()
125 
126   /** Initialise the CREOLE register. */
127   public static void initCreoleRegister() throws GateException {
128 
129     // register the builtin CREOLE directories
130     for(int i=0; i<builtinCreoleDirectoryUrls.length; i++)
131       try {
132         creoleRegister.addDirectory(
133           new URL(builtinCreoleDirectoryUrls[i])
134         );
135       } catch(MalformedURLException e) {
136         throw new GateException(e);
137       }
138 
139 /*
140 We'll have to think about this. Right now it points to the creole inside the
141 jar/classpath so it's the same as registerBuiltins
142 */
143 //    // add the GATE base URL creole directory
144 //    creoleRegister.addDirectory(Gate.getUrl("creole/"));
145 //    creoleRegister.registerDirectories();
146 
147     // register the resources that are actually in gate.jar
148     creoleRegister.registerBuiltins();
149   } // initCreoleRegister
150 
151   /** Initialise the DataStore register. */
152   public static void initDataStoreRegister() {
153     dataStoreRegister = new DataStoreRegister();
154   } // initDataStoreRegister()
155 
156   /**
157    * Reads config data (<TT>gate.xml</TT> files). There are three
158    * sorts of these files:
159    * <UL>
160    * <LI>
161    * The builtin file from GATE's resources - this is read first.
162    * <LI>
163    * A site-wide init file given as a command-line argument or as a
164    * <TT>gate.config</TT> property - this is read second.
165    * <LI>
166    * The user's file from their home directory - this is read last.
167    * </UL>
168    * Settings from files read after some settings have already been
169    * made will simply overwrite the previous settings.
170    */
171   public static void initConfigData() throws GateException {
172     ConfigDataProcessor configProcessor = new ConfigDataProcessor();
173 
174     // url of the builtin config data (for error messages)
175     URL configUrl =
176       Gate.getClassLoader().getResource("gate/resources/" + GATE_DOT_XML);
177 
178     // open a stream to the builtin config data file and parse it
179     InputStream configStream = null;
180     try {
181       configStream = Files.getGateResourceAsStream(GATE_DOT_XML);
182     } catch(IOException e) {
183       throw new GateException(
184         "Couldn't open builtin config data file: " + configUrl + " " + e
185       );
186     }
187     configProcessor.parseConfigFile(configStream, configUrl);
188 
189     // parse any command-line initialisation file
190     File siteConfigFile = Gate.getSiteConfigFile();
191     if(siteConfigFile != null) {
192       try {
193         configUrl = siteConfigFile.toURL();
194         configStream = new FileInputStream(Gate.getSiteConfigFile());
195       } catch(IOException e) {
196         throw new GateException(
197           "Couldn't open site config data file: " + configUrl + " " + e
198         );
199       }
200       configProcessor.parseConfigFile(configStream, configUrl);
201     }
202 
203     // parse the user's config file (if it exists)
204     String userConfigName = getUserConfigFileName();
205     File userConfigFile = null;
206     URL userConfigUrl = null;
207     if(DEBUG) { Out.prln("loading user config from " + userConfigName); }
208     configStream = null;
209     boolean userConfigExists = true;
210     try {
211       userConfigFile = new File(userConfigName);
212       configStream = new FileInputStream(userConfigFile);
213       userConfigUrl = userConfigFile.toURL();
214     } catch(IOException e) {
215       userConfigExists = false;
216     }
217     if(userConfigExists)
218       configProcessor.parseConfigFile(configStream, userConfigUrl);
219 
220     // remember the init-time config options
221     originalUserConfig.putAll(userConfig);
222 
223     if(DEBUG) {
224       Out.prln(
225         "user config loaded; DBCONFIG=" + DataStoreRegister.getConfigData()
226       );
227     }
228   } // initConfigData()
229 
230   /**
231    * Attempts to guess the Unicode font for the platform.
232    */
233   public static String guessUnicodeFont(){
234     //guess the Unicode font for the platform
235     String[] fontNames = java.awt.GraphicsEnvironment.
236                          getLocalGraphicsEnvironment().
237                          getAvailableFontFamilyNames();
238     String unicodeFontName = null;
239     for(int i = 0; i < fontNames.length; i++){
240       if(fontNames[i].equalsIgnoreCase("Arial Unicode MS")){
241         unicodeFontName = fontNames[i];
242         break;
243       }
244       if(fontNames[i].toLowerCase().indexOf("unicode") != -1){
245         unicodeFontName = fontNames[i];
246       }
247     }//for(int i = 0; i < fontNames.length; i++)
248     return unicodeFontName;
249   }
250 
251   /** Get a URL that points to either an HTTP server or a file system
252     * that contains GATE files (such as test cases). The following locations
253     * are tried in sequence:
254     * <UL>
255     * <LI>
256     * <TT>http://derwent.dcs.shef.ac.uk/gate.ac.uk/</TT>, a Sheffield-internal
257     * development server (the gate.ac.uk affix is a copy of the file system
258     * present on GATE's main public server - see next item);
259     * <LI>
260     * <TT>http://gate.ac.uk/</TT>, GATE's main public server;
261     * <LI>
262     * <TT>http://localhost/gate.ac.uk/</TT>, a Web server running on the
263     * local machine;
264     * <LI>
265     * the local file system where the binaries for the
266     * current invocation of GATE are stored.
267     * </UL>
268     * In each case we assume that a Web server will be running on port 80,
269     * and that if we can open a socket to that port then the server is
270     * running. (This is a bit of a strong assumption, but this URL is used
271     * largely by the test suite, so we're not betting anything too critical
272     * on it.)
273     * <P>
274     * Note that the value returned will only be calculated when the existing
275     * value recorded by this class is null (which will be the case when
276     * neither setUrlBase nor getUrlBase have been called, or if
277     * setUrlBase(null) has been called).
278     */
279   public static URL getUrl() throws GateException {
280     if(urlBase != null) return urlBase;
281 
282     try {
283 
284        // if we're assuming a net connection, try network servers
285       if(isNetConnected()) {
286         if(
287           tryNetServer("gate-internal.dcs.shef.ac.uk", 80, "/") ||
288    //       tryNetServer("derwent.dcs.shef.ac.uk", 80, "/gate.ac.uk/") ||
289           tryNetServer("gate.ac.uk", 80, "/")
290         ) {
291             if(DEBUG) Out.prln("getUrl() returned " + urlBase);
292             return urlBase;
293         }
294       } // if isNetConnected() ...
295 
296       // no network servers; try for a local host web server.
297       // we use InetAddress to get host name instead of using "localhost" coz
298       // badly configured Windoze IP sometimes doesn't resolve the latter
299       if(
300         isLocalWebServer() &&
301         tryNetServer(
302           InetAddress.getLocalHost().getHostName(), 80, "/gate.ac.uk/"
303         )
304       ) {
305         if(DEBUG) Out.prln("getUrlBase() returned " + urlBase);
306         return urlBase;
307       }
308 
309       // try the local file system
310       tryFileSystem();
311 
312     } catch(MalformedURLException e) {
313       throw new GateException("Bad URL, getUrlBase(): " + urlBase + ": " + e);
314     } catch(UnknownHostException e) {
315       throw new GateException("No host, getUrlBase(): " + urlBase + ": " + e);
316     }
317 
318     // return value will be based on the file system, or null
319     if(DEBUG) Out.prln("getUrlBase() returned " + urlBase);
320     return urlBase;
321   } // getUrl()
322 
323   /** Get a URL that points to either an HTTP server or a file system
324     * that contains GATE files (such as test cases).
325     * Calls <TT>getUrl()</TT> then adds the <TT>path</TT> parameter to
326     * the result.
327     * @param path a path to add to the base URL.
328     * @see #getUrl()
329     */
330   public static URL getUrl(String path) throws GateException {
331     getUrl();
332     if(urlBase == null)
333       return null;
334 
335     URL newUrl = null;
336     try {
337       newUrl = new URL(urlBase, path);
338     } catch(MalformedURLException e) {
339       throw new GateException("Bad URL, getUrl( " + path + "): " + e);
340     }
341 
342     if(DEBUG) Out.prln("getUrl(" + path + ") returned " + newUrl);
343     return newUrl;
344   } // getUrl(path)
345 
346   /** Flag controlling whether we should try to access the net, e.g. when
347     * setting up a base URL.
348     */
349   private static boolean netConnected = true;
350 
351   private static int lastSym;
352 
353   /** Should we assume we're connected to the net? */
354   public static boolean isNetConnected() { return netConnected; }
355 
356   /**
357    * Tell GATE whether to assume we're connected to the net. Has to be
358    * called <B>before</B> {@link #init()}.
359    */
360   public static void setNetConnected(boolean b) { netConnected = b; }
361 
362   /**
363    * Flag controlling whether we should try to access a web server on
364    * localhost, e.g. when setting up a base URL. Has to be
365    * called <B>before</B> {@link #init()}.
366    */
367   private static boolean localWebServer = true;
368 
369   /** Should we assume there's a local web server? */
370   public static boolean isLocalWebServer() { return localWebServer; }
371 
372   /** Tell GATE whether to assume there's a local web server. */
373   public static void setLocalWebServer(boolean b) { localWebServer = b; }
374 
375   /** Try to contact a network server. When sucessfull sets urlBase to an HTTP
376     * URL for the server.
377     * @param hostName the name of the host to try and connect to
378     * @param serverPort the port to try and connect to
379     * @param path a path to append to the URL when we make a successfull
380     * connection. E.g. for host xyz, port 80, path /thing, the resultant URL
381     * would be <TT>http://xyz:80/thing</TT>.
382     */
383   public static boolean tryNetServer(
384     String hostName, int serverPort, String path
385   ) throws MalformedURLException {
386     Socket socket = null;
387     if(DEBUG)
388       Out.prln(
389         "tryNetServer(hostName=" + hostName + ", serverPort=" + serverPort +
390         ", path=" + path +")"
391       );
392 
393     // is the host listening at the port?
394     try{
395       URL url = new URL("http://" + hostName + ":" + serverPort + "/");
396       URLConnection uConn =  url.openConnection();
397       HttpURLConnection huConn = null;
398       if(uConn instanceof HttpURLConnection)
399         huConn = (HttpURLConnection)uConn;
400       if(huConn.getResponseCode() == -1) return false;
401     } catch (IOException e){
402       return false;
403     }
404 
405 //    if(socket != null) {
406       urlBase = new URL("http", hostName, serverPort, path);
407       return true;
408 //    }
409 
410 //    return false;
411   } // tryNetServer()
412 
413   /** Try to find GATE files in the local file system */
414   protected static boolean tryFileSystem() throws MalformedURLException {
415     String urlBaseName = locateGateFiles();
416     if(DEBUG) Out.prln("tryFileSystem: " + urlBaseName);
417 
418     urlBase = new URL(urlBaseName + "gate/resources/gate.ac.uk/");
419     return urlBase == null;
420   } // tryFileSystem()
421 
422   /**
423    * Find the location of the GATE binaries (and resources) in the
424    * local file system.
425    */
426   public static String locateGateFiles() {
427     String aGateResourceName = "gate/resources/creole/creole.xml";
428     URL resourcesUrl = Gate.getClassLoader().getResource(aGateResourceName);
429 
430     StringBuffer basePath = new StringBuffer(resourcesUrl.toExternalForm());
431     String urlBaseName =
432       basePath.substring(0, basePath.length() - aGateResourceName.length());
433 
434     return urlBaseName;
435   } // locateGateFiles
436 
437   /**
438    * Checks whether a particular class is a Gate defined type
439    */
440   public static boolean isGateType(String classname){
441     return getCreoleRegister().containsKey(classname) ||
442            classname.equals("gate.Resource")||
443            classname.equals("gate.LanguageResource")||
444            classname.equals("gate.ProcessingResource")||
445            classname.equals("gate.VisualResource")||
446            classname.equals("gate.Controller")||
447            classname.equals("gate.Document")||
448            classname.equals("gate.Corpus")||
449            classname.equals("gate.DataStore");
450   }
451 
452   /** Returns the value for the HIDDEN attribute of a feature map */
453   static public boolean getHiddenAttribute(FeatureMap fm){
454     if(fm == null) return false;
455     Object value = fm.get("gate.HIDDEN");
456     return value != null &&
457            value instanceof String &&
458            ((String)value).equals("true");
459   }
460 
461   /** Sets the value for the HIDDEN attribute of a feature map */
462   static public void setHiddenAttribute(FeatureMap fm, boolean hidden){
463     if(hidden){
464       fm.put("gate.HIDDEN", "true");
465     }else{
466       fm.remove("gate.HIDDEN");
467     }
468   }
469 
470 
471   /** Registers a {@link gate.event.CreoleListener} with the Gate system
472     */
473   public static synchronized void addCreoleListener(CreoleListener l){
474     creoleRegister.addCreoleListener(l);
475   } // addCreoleListener
476 
477   /** Set the URL base for GATE files, e.g. <TT>http://gate.ac.uk/</TT>. */
478   public static void setUrlBase(URL urlBase) { Gate.urlBase = urlBase; }
479 
480   /** The URL base for GATE files, e.g. <TT>http://gate.ac.uk/</TT>. */
481   private static URL urlBase = null;
482 
483   /** Class loader used e.g. for loading CREOLE modules, of compiling
484     * JAPE rule RHSs.
485     */
486   private static GateClassLoader classLoader = null;
487 
488   /** Get the GATE class loader. */
489   public static GateClassLoader getClassLoader() { return classLoader; }
490 
491   /** The CREOLE register. */
492   private static CreoleRegister creoleRegister = null;
493 
494   /** Get the CREOLE register. */
495   public static CreoleRegister getCreoleRegister() { return creoleRegister; }
496 
497   /** The DataStore register */
498   private static DataStoreRegister dataStoreRegister = null;
499 
500   /**
501    * The current executable under execution.
502    */
503   private static gate.Executable currentExecutable;
504 
505   /** Get the DataStore register. */
506   public static DataStoreRegister getDataStoreRegister() {
507     return dataStoreRegister;
508   } // getDataStoreRegister
509 
510   /**
511    * Sets the {@link Executable} currently under execution.
512    * At a givem time there can be only one executable set. After the executable
513    * has finished its execution this value should be set back to null.
514    * An attempt to set the executable while this value is not null will result
515    * in the method call waiting until the old executable is set to null.
516    */
517   public synchronized static void setExecutable(gate.Executable executable) {
518     if(executable == null) currentExecutable = executable;
519     else{
520       while(getExecutable() != null){
521         try{
522           Thread.currentThread().sleep(200);
523         }catch(InterruptedException ie){
524           throw new LuckyException(ie.toString());
525         }
526       }
527       currentExecutable = executable;
528     }
529   } // setExecutable
530 
531   /**
532    * Returns the curently set executable.
533    * {@see setExecutable()}
534    */
535   public synchronized static gate.Executable getExecutable() {
536     return currentExecutable;
537   } // getExecutable
538 
539   /**
540    * Returns a new unique string
541    */
542   public synchronized static String genSym() {
543     StringBuffer buff = new StringBuffer(Integer.toHexString(lastSym++).
544                                          toUpperCase());
545     for(int i = buff.length(); i <= 4; i++) buff.insert(0, '0');
546     return buff.toString();
547   } // genSym
548 
549   /** GATE development environment configuration data (stored in gate.xml). */
550   private static OptionsMap userConfig = new OptionsMap();
551 
552   /**
553    * This map stores the init-time config data in case we need it later.
554    * GATE development environment configuration data (stored in gate.xml).
555    */
556   private static OptionsMap originalUserConfig = new OptionsMap();
557 
558   /** Name of the XML element for GATE development environment config data. */
559   private static String userConfigElement = "GATECONFIG";
560 
561   /**
562    * Gate the name of the XML element for GATE development environment
563    * config data.
564    */
565   public static String getUserConfigElement() { return userConfigElement; }
566 
567   /**
568    * Get the site config file (generally set during command-line processing
569    * or as a <TT>gate.config</TT> property).
570    * If the config is null, this method checks the <TT>gate.config</TT>
571    * property and uses it if non-null.
572    */
573   public static File getSiteConfigFile() {
574     if(siteConfigFile == null) {
575       String gateConfigProperty = System.getProperty(GATE_CONFIG_PROPERTY);
576       if(gateConfigProperty != null)
577         siteConfigFile = new File(gateConfigProperty);
578     }
579     return siteConfigFile;
580   } // getSiteConfigFile
581 
582   /** Set the site config file (e.g. during command-line processing). */
583   public static void setSiteConfigFile(File siteConfigFile) {
584     Gate.siteConfigFile = siteConfigFile;
585   } // setSiteConfigFile
586 
587   /** Site config file */
588   private static File siteConfigFile;
589 
590   /** Shorthand for local newline */
591   private static String nl = Strings.getNl();
592 
593   /** An empty config data file. */
594   private static String emptyConfigFile =
595     "<?xml version=\"1.0\"?>" + nl +
596     "<!-- " + GATE_DOT_XML + ": GATE configuration data -->" + nl +
597     "<GATE>" + nl +
598     "" + nl +
599     "<!-- NOTE: the next element may be overwritten by the GUI!!! -->" + nl +
600     "<" + userConfigElement + "/>" + nl +
601     "" + nl +
602     "</GATE>" + nl;
603 
604   /**
605    * Get an empty config file. <B>NOTE:</B> this method is intended only
606    * for use by the test suite.
607    */
608   public static String getEmptyConfigFile() { return emptyConfigFile; }
609 
610   /**
611    * Get the GATE development environment configuration data
612    * (initialised from <TT>gate.xml</TT>).
613    */
614   public static OptionsMap getUserConfig() { return userConfig; }
615 
616   /**
617    * Get the original, initialisation-time,
618    * GATE development environment configuration data
619    * (initialised from <TT>gate.xml</TT>).
620    */
621   public static OptionsMap getOriginalUserConfig() {
622     return originalUserConfig;
623   } // getOriginalUserConfig
624 
625   /**
626    * Update the GATE development environment configuration data in the
627    * user's <TT>gate.xml</TT> file (create one if it doesn't exist).
628    */
629   public static void writeUserConfig() throws GateException {
630     // the user's config file
631     String configFileName = getUserConfigFileName();
632     File configFile = new File(configFileName);
633 
634     // create if not there, then update
635     try {
636       // if the file doesn't exist, create one with an empty GATECONFIG
637       if(! configFile.exists()) {
638         FileWriter writer = new FileWriter(configFile);
639         writer.write(emptyConfigFile);
640         writer.close();
641       }
642 
643       // update the config element of the file
644       Files.updateXmlElement(
645         new File(configFileName), userConfigElement, userConfig
646       );
647 
648     } catch(IOException e) {
649       throw new GateException(
650         "problem writing user " + GATE_DOT_XML + ": " + nl + e.toString()
651       );
652     }
653   } // writeUserConfig
654 
655   /**
656    * Get the name of the user's <TT>gate.xml</TT> config file (this
657    * doesn't guarantee that file exists!).
658    */
659   public static String getUserConfigFileName() {
660     String filePrefix = "";
661     if(runningOnUnix()) filePrefix = ".";
662 
663     String userConfigName =
664       System.getProperty("user.home") + Strings.getFileSep() +
665       filePrefix + GATE_DOT_XML;
666     return userConfigName;
667   } // getUserConfigFileName
668 
669   /**
670    * Get the name of the user's <TT>gate.ser</TT> session state file (this
671    * doesn't guarantee that file exists!).
672    */
673   public static String getUserSessionFileName() {
674     String filePrefix = "";
675     if(runningOnUnix()) filePrefix = ".";
676 
677     String userSessionName =
678       System.getProperty("user.home") + Strings.getFileSep() +
679       filePrefix + GATE_DOT_SER;
680     return userSessionName;
681   } // getUserSessionFileName
682 
683   /**
684    * This method tries to guess if we are on a UNIX system. It does this
685    * by checking the value of <TT>System.getProperty("file.separator")</TT>;
686    * if this is "/" it concludes we are on UNIX. <B>This is obviously not
687    * a very good idea in the general case, so nothing much should be made
688    * to depend on this method (e.g. just naming of config file
689    * <TT>.gate.xml</TT> as opposed to <TT>gate.xml</TT>)</B>.
690    */
691   public static boolean runningOnUnix() {
692     return Strings.getFileSep().equals("/");
693   } // runningOnUnix
694 } // class Gate
695