Wednesday, July 13, 2011

Robots.txt enterprise version for EPiServer CMS

Adding robots.txt file to a single site is easy. To do the samething for an enterprise version is a bit more work. The easiest way to solve the challange is to add an HttpHandler that write out the correct content based on the url of the sites. The example code below get the content from files that is stored in  /Global/Robots/ in filemanager, but you could easly change it to a different path.

Under the /Global/Robots folder you add files that is simular to this: 
  • mysite.com_robots.txt
  • mysite2.com_robots.txt
Also add an default_robots.txt to have a fallback version. Read here to get an idea how to write the content of the robots.text file, robotstxt.org

Create a class and add this code:

   1:  namespace Common.CoreLibrary.Robots
   2:  {
   3:      public class RobotsHandler : IHttpHandler
   4:      {
   5:   
   6:          public void ProcessRequest(HttpContext context)
   7:          {
   8:              string path = context.Request.Url.DnsSafeHost;
   9:   
  10:              if( path.Contains("www."))
  11:                  path = path.Replace("www.","");
  12:   
  13:              path = "/Global/Robots/" + path + "_robots.txt";
  14:              UnifiedFile file = HostingEnvironment.VirtualPathProvider.GetFile(path) as UnifiedFile;
  15:   
  16:              if (file != null)
  17:              {
  18:                  if (context.Request.ServerVariables["Https"] == "off")
  19:                  {
  20:                      //Read spesific file for each brand
  21:                      Outfile(context, file);
  22:                  }
  23:                  else
  24:                  {
  25:                      // HTTPS
  26:                      context.Response.ContentType = "text/plain";
  27:                      context.Response.Write("User-agent: *\n");
  28:                      context.Response.Write("Disallow: /");
  29:                  }
  30:              }
  31:              else
  32:              {
  33:                  //Backup file if not the spesific robots.txt exits.
  34:                  path = "/Global/Robots/default_robots.txt";
  35:                  file = HostingEnvironment.VirtualPathProvider.GetFile(path) as UnifiedFile;
  36:   
  37:                  if (file == null) return;
  38:   
  39:                  Outfile(context, file);
  40:              }
  41:          }
  42:   
  43:          private static void Outfile(HttpContext context, UnifiedFile file)
  44:          {
  45:              Stream sourceFile = file.Open(FileMode.Open, FileAccess.Read, FileShare.Read);
  46:              context.Response.ContentEncoding = Encoding.UTF8;
  47:              context.Response.ContentType = "text/plain";
  48:              //Write the file directly to the HTTP content output stream. 
  49:              long FileSize;
  50:              FileSize = sourceFile.Length;
  51:              byte[] getContent = new byte[(int)FileSize];
  52:              sourceFile.Read(getContent, 0, (int)sourceFile.Length);
  53:              sourceFile.Close();
  54:   
  55:              context.Response.BinaryWrite(getContent);
  56:              context.Response.End();
  57:          }
  58:   
  59:          #region IHttpHandler Members
  60:   
  61:          public bool IsReusable
  62:          {
  63:              get { return false; }
  64:          }
  65:   
  66:          #endregion
  67:   
  68:      }
  69:  }


Add the following line to the web.config after <system.webServer><handlers>:
<add name="Robots" path="/robots.txt" verb="*" type="Common.CoreLibrary.Robots.RobotsHandler" resourceType="Unspecified" requireAccess="Script" />

So next time you write mysite.com/robots.txt or mysite2.com/robots.txt you should recive different content. :-)